我目前正在尝试创建一个股票信息脚本。但是,我只能从股票的初始页面检索数据,而不是关键统计页面。这是我试图从中获取数据的页面: https ://au.finance.yahoo.com/quote/TICKER/key-statistics?p=TICKER
这是我正在使用的代码:(主要来自我观看的视频)
# -*- coding: utf-8 -*-
import os, sys
import csv
from bs4 import BeautifulSoup
import urllib3
import xlsxwriter
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
key_stats_on_main =['Market cap', 'PE ratio (TTM)', 'EPS (TTM)']
key_stats_on_stat =['Enterprise value', 'Trailing P/E', 'Forward P/E',
'PEG Ratio (5-yr expected)', 'Return on assets', 'Quarterly revenue growth (yoy)',
'EBITDA', 'Diluted EPS (ttm)', 'Total debt/equity (mrq)', 'Current ratio (mrq)']
stocks_arr =[]
pfolio_file= open("/Users/z_hutcho/Documents/Programming/yfinance/stocks.csv", "r")
for line in pfolio_file:
indv_stock_arr = line.strip().split(',')
stocks_arr.append(indv_stock_arr)
print(stocks_arr)
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--window-size=2560x1600")
chrome_driver = "/usr/local/bin/chromedriver"
browser = webdriver.Chrome(options=chrome_options, executable_path=chrome_driver)
stock_info_arr = []
for stock in stocks_arr:
stock_info = []
ticker = stock[0]
stock_info.append(ticker)
url = "https://au.finance.yahoo.com/quote/{0}?p={0}".format(ticker)
url2 = "https://au.finance.yahoo.com/quote/{0}/key-statistics?p={0}".format(ticker)
browser.get(url)
innerHTML = browser.execute_script("return document.body.innerHTML")
soup = BeautifulSoup(innerHTML, 'html.parser')
for stat in key_stats_on_main:
page_stat1 = soup.find(text=stat)
try:
page_row1 = page_stat1.find_parent('tr')
try:
page_statnum1 = page_row1.find_all('span')[1].contents[1].get_text(strip=True)
print(page_statnum1)
except:
page_statnum1 = page_row1.find_all('td')[1].contents[0].get_text(strip=True)
print(page_statnum1)
except:
print('Invalid parent for this element')
page_statnum1 = "N/A"
stock_info.append(page_statnum1)
browser.get(url2)
innerHTML2 = browser.execute_script("return document.body.innerHTML")
soup2 = BeautifulSoup(innerHTML2, 'html.parser')
for stat in key_stats_on_stat:
page_stat2 = soup2.find(text=stat)
try:
page_row2 = page_stat2.find_parent('tr')
try:
page_statnum2 = page_row2.find_all('span')[1].contents[1].get_text(strip=True)
print(page_statnum2)
except:
page_statnum2 = page_row2.find_all('td')[1].contents[0].get_text(strip=True)
print(page_statnum2)
except:
print('Invalid parent for this element')
page_statnum2 = 'N/A'
stock_info.append(page_statnum2)
stock_info_arr.append(stock_info)
print(stock_info_arr)
股票代码 AAPL 的结果如下:
1.3T
24.52
11.89
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
Invalid parent for this element
不太清楚为什么第二页没有被正确刮掉……我对漂亮的汤不是很有经验。任何帮助将非常感激。