我正在尝试创建一个从 Steam 获取市场链接但遇到问题的机器人。我能够从一个页面返回所有数据,但是当我尝试获取多个页面时,它只是给了我第一页的副本,尽管我给了它工作链接(例如:http ://steamcommunity.com/market/search ?q=appid%3A753#p1然后http://steamcommunity.com/market/search?q=appid%3A753#p2)。我已经测试了这些链接,它们可以在我的浏览器中使用。这是我的代码。
import urllib2
import random
import time
start_url = "http://steamcommunity.com/market/search?q=appid%3A753"
end_page = 3
urls = []
def get_raw(url):
req = urllib2.Request(url)
response = urllib2.urlopen(req)
return response.read()
def get_market_urls(html):
index = 0
while index != -1:
index = html.find("market_listing_row_link", index+25)
beg = html.find("http", index)
end = html.find('"',beg)
print html[beg:end]
urls.append(html[beg:end])
def go_to_page(page):
return start_url+"#p"+str(page)
def wait(min, max):
wait_t = random.randint(min,max)
time.sleep(wait_t)
for i in range(end_page):
url = go_to_page(i+1)
raw = get_raw(url)
get_market_urls(raw)