0

感谢您的光临...我非常感谢您的帮助。我正在尝试抓取简单的 craigslist 列表,但此代码不起作用...请帮助!返回空列表[] ...请帮助...代码如下..

导入包

from robobrowser import RoboBrowser
import sys, codecs, locale
import pandas as pd

browser = RoboBrowser(history=True, parser='html.parser')

定义函数以从旅行中抓取数据

def getTrips(website) :
    browser.open(website)

    trips = browser.find_all(class_='result-info')

    data = []
    for trip in trips: 
        title = get_title(trip)
        url = get_url(trip)
        data.append({
            "title": title,
            "url": url,
            "website": website
        })
    next_page = browser.get_link('next >')
    if next_page:
        getTrips(browser._build_url(next_page.get('href')))
    return data

def get_title(trip):
    if trip.find(class_='result-title hdrlnk'):
        return trip.find(class_='result-title hdrlnk').text
    else:
        return "Title not found"

def get_url(trip):
    if trip.find(class_='result-info'):
        return item.find('a').get('href')
    else:
        return "URL not found"

制作汇总列表(*最终输出=带有名为“total”的列表的字典)

total = []
total.extend(getTrips('https://newyork.craigslist.org/search/bbb?query=photographer&sort=rel'))

打印(测试目的)

print(total)

将数据导出到 csv 文件

df = pd.DataFrame(total)
df.to_csv('photographer_data.csv', index=False)
4

0 回答 0