您提到的 URL 是动态页面。但是,页面内容是从此静态页面加载的。
https://www.sec.gov/Archives/edgar/data/200406/000020040621000057/jnj-20210704.htm
您可以抓取此页面并提取数据。
这是抓取您需要的数据的代码。
from bs4 import BeautifulSoup
import requests
headers = {"User-agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36"}
# Obtain HTML for search page
base_url = "https://www.sec.gov/Archives/edgar/data/200406/000020040621000057/jnj-20210704.htm"
edgar_resp = requests.get(base_url, headers=headers)
edgar_str = edgar_resp.text
soup = BeautifulSoup(edgar_str, 'html.parser')
s = soup.find('span', recursive=True, string='SALES BY SEGMENT OF BUSINESS ')
t = s.find_next('table')
trs = t.find_all('tr')
for tr in trs:
if tr.text:
print(list(tr.stripped_strings))
['Fiscal Second Quarter Ended', 'Fiscal Six Months Ended']
['(Dollars in Millions)', 'July 4,', '2021', 'June 28,', '2020', 'Percent', 'Change', 'July 4,', '2021', 'June 28,', '2020', 'Percent Change']
['Consumer Health']
['OTC']
['U.S.', '$', '675', '627', '7.7', '%', '$', '1,274', '1,316', '(', '3.2', ')', '%']
['International', '633', '522', '21.2', '1,208', '1,181', '2.3']
['Worldwide', '1,307', '1,149', '13.8', '2,482', '2,497', '(', '0.6', ')']
['Skin Health/Beauty']
['U.S.', '659', '536', '23.0', '1,293', '1,195', '8.2']
['International', '511', '471', '8.4', '1,040', '929', '12.0']
['Worldwide', '1,170', '1,007', '16.2', '2,333', '2,124', '9.8']
['Oral Care']
['U.S.', '165', '170', '(', '3.1', ')', '328', '346', '(', '5.2', ')']
['International', '260', '227', '14.6', '514', '446', '15.3']
['Worldwide', '426', '397', '7.0', '843', '792', '6.3']
['Baby Care']
['U.S.', '97', '96', '0.8', '193', '188', '2.4']
['International', '290', '260', '11.5', '583', '529', '10.2']
['Worldwide', '387', '356', '8.6', '776', '717', '8.1']
["Women's Health"]
['U.S.', '3', '3', '(', '3.1', ')', '6', '7', '(', '16.0', ')']
['International', '227', '199', '14.2', '446', '427', '4.5']
['Worldwide', '230', '202', '13.9', '452', '434', '4.2']
['Wound Care/Other']
['U.S.', '153', '126', '20.9', '268', '245', '9.3']
['International', '64', '59', '7.3', '125', '111', '12.1']
['Worldwide', '216', '185', '16.6', '393', '356', '10.2']
['TOTAL', 'Consumer Health']
['U.S.', '1,751', '1,557', '12.4', '3,362', '3,297', '2.0']
['International', '1,984', '1,739', '14.1', '3,916', '3,624', '8.1']
['Worldwide', '3,735', '3,296', '13.3', '7,278', '6,921', '5.2']