感谢您的光临...我非常感谢您的帮助。我正在尝试抓取简单的 craigslist 列表,但此代码不起作用...请帮助!返回空列表[] ...请帮助...代码如下..
导入包
from robobrowser import RoboBrowser
import sys, codecs, locale
import pandas as pd
browser = RoboBrowser(history=True, parser='html.parser')
定义函数以从旅行中抓取数据
def getTrips(website) :
browser.open(website)
trips = browser.find_all(class_='result-info')
data = []
for trip in trips:
title = get_title(trip)
url = get_url(trip)
data.append({
"title": title,
"url": url,
"website": website
})
next_page = browser.get_link('next >')
if next_page:
getTrips(browser._build_url(next_page.get('href')))
return data
def get_title(trip):
if trip.find(class_='result-title hdrlnk'):
return trip.find(class_='result-title hdrlnk').text
else:
return "Title not found"
def get_url(trip):
if trip.find(class_='result-info'):
return item.find('a').get('href')
else:
return "URL not found"
制作汇总列表(*最终输出=带有名为“total”的列表的字典)
total = []
total.extend(getTrips('https://newyork.craigslist.org/search/bbb?query=photographer&sort=rel'))
打印(测试目的)
print(total)
将数据导出到 csv 文件
df = pd.DataFrame(total)
df.to_csv('photographer_data.csv', index=False)