我有一个 idx 文件: https ://www.sec.gov/Archives/edgar/daily-index/2020/QTR4/master.20201231.idx
一年前我可以使用以下代码打开 idx 文件,但现在这些代码不起作用。这是为什么?我应该如何修改代码?
import requests
import urllib
from bs4 import BeautifulSoup
master_data = []
file_url = r"https://www.sec.gov/Archives/edgar/daily-index/2020/QTR4/master.20201231.idx"
byte_data = requests.get(file_url).content
data_format = byte_data.decode('utf-8').split('------')
content = data_format[-1]
data_list = content.replace('\n','|').split('|')
for index, item in enumerate(data_list):
if '.txt' in item:
if data_list[index - 2] == '10-K':
entry_list = data_list[index - 4: index + 1]
entry_list[4] = "https://www.sec.gov/Archives/" + entry_list[4]
master_data.append(entry_list)
print(master_data)