我从下面给出的网站上抓取了一些数据。我无法在 excel 上输出这些数据。另外,我已经将我抓取的表格存储为字典。但是键值对不同步。有人请帮忙。
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
url = requests.get("http://stats.espncricinfo.com/ci/content/records/307847.html" )
soup = bs(url.text, 'lxml')
soup_1 = soup.find(class_ = "recordsTable")
soup_pages = soup_1.find_all('a', href= True)
state_links =[]
for link in soup_pages:
state_links.append(link['href'])
for i in state_links:
parse_link = "http://stats.espncricinfo.com"+i
url_new = requests.get(parse_link)
soup_new = bs(url_new.text, 'lxml')
soup_table = soup_new.find(class_="engineTable")
results = {}
newdict = dict()
for col in soup_table.findAll('th'):
colname = (col.text).lstrip().rstrip()
for row in soup_table.findAll("td"):
rowname = row.text.lstrip().rstrip()
newdict[col.text] = row.text
print (newdict)