1

我一直在使用 python 中的网络抓取工具来抓取 Google Finance,但我无法使用 find() 方法找到我正在寻找的特定标签。最后,我很生气,我决定将返回的数据写入文件并自己查找。所以我把它写到同一目录下的 testing.html 中,然后用 Google Chromium 打开它,这样我就可以使用检查工具了。几分钟之内,我找到了我正在寻找的元素。我究竟做错了什么?我的代码附在下面:

import dryscrape

session = dryscrape.Session()


def get(url):
    global session
    try:
        session.visit(url)
        data = session.body()
    except:
        print('Connection Failed')
    return str(data)

def save(price, stockname):
    pass

def extract(data):
    return data.find('<div class="YMLKec fxKbKc">')

class following():
    apple = "https://www.google.com/finance/quote/AAPL:NASDAQ"
    tesla = "https://www.google.com/finance/quote/TSLA:NASDAQ"
    google = "https://www.google.com/finance/quote/GOOGL:NASDAQ"
    amazon = "https://www.google.com/finance/quote/AMZN:NASDAQ"
    microsoft = "https://www.google.com/finance/quote/MSFT:NASDAQ"
    netflix = "https://www.google.com/finance/quote/NFLX:NASDAQ"
    def __init__():
        global apple
        global tesla
        global google
        global amazon
        global microsoft
        global netflix
        save(extract(get(following.apple)), following.apple)
        save(extract(get(following.tesla)), following.tesla)
        save(extract(get(following.google)), following.google)
        save(extract(get(following.amazon)), following.amazon)
        save(extract(get(following.microsoft)), following.microsoft)
        save(extract(get(following.netflix)), following.netflix)

f = open("testing.html")
print(extract(f.read()))
f.close()
4

2 回答 2

0

你为什么不尝试使用 requests 和 BeautifulSoup 库。以下是我的意思。

import requests
from bs4 import BeautifulSoup


class following():

    def __init__(self):
        self.session = requests.Session()
        self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36'

    def get_last_price(self,link):
        r = self.session.get(link)
        return BeautifulSoup(r.text,"lxml")

    def extract(self,soup):
        return soup.select_one("[data-exchange='NASDAQ']")['data-last-price']


if __name__ == '__main__':
    base = "https://www.google.com/finance/quote/{}:NASDAQ"
    scraper = following()

    for ticker in ['AAPL','TSLA','GOOGL','AMZN','MSFT','NFLX']:
        soup_object = scraper.get_last_price(base.format(ticker))
        print(scraper.extract(soup_object))
于 2021-10-24T16:31:25.603 回答
0

发现问题:不是YMLKec但是YMlKec。不是资本L

data = open("testing.html", "r").read()
class_ = "YMlKec fxKbKc"
print(data.find(class_))
>>> 992880
于 2021-10-24T16:51:30.670 回答