r - 使用 for 循环抓取 Zillow

Question

我正在刮 Zillow 并希望刮掉所有页面。我正在使用 for 循环，如下所示。但似乎我只收到第一页的结果。

for (page_result in 1:20) {
  zillow_url = paste0("https://www.zillow.com/orlando-fl/",page_result,"_p/?searchQueryState=%7B%22
pagination%22%3A%7B%22currentPage%22%3A",page_result,"%7D%2C%22usersSearchTerm%22%3A%22
Orlando%2C%20Fl%22%2C%22mapBounds%22%3A%7B%22west%22%3A-81.6603646328125%2C%22east%22%3A-80.8144173671875%2C%22
south%22%3A28.191492307595613%2C%22north%22%3A28.794962421299882%7D%2C%22
regionSelection%22%3A%5B%7B%22regionId%22%3A13121%2C%22
regionType%22%3A6%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22sort%22%3A%7B%22value%22%3A%22globalrelevanceex
%22%7D%2C%22ah%22%3A%7B%22value%22%3Atrue%7D%7D%2C%22isListVisible%22%3Atrue%7D")
}

zpg = read_html(zillow_url)

res_all <-NULL
zillow_pg <-tibble(
  addr = zpg %>% html_nodes(".list-card-addr") %>% html_text(),
  price = zpg %>% html_nodes(".list-card-price") %>% html_text(),
  details = zpg %>% html_nodes(".list-card-details") %>% html_text() ,
  heading= zpg %>% html_nodes(".list-card-info a") %>% html_text() ,
  type = zpg %>% html_nodes(".list-card-statusText") %>% html_text())


res_all <- res_all %>% bind_rows(zillow_pg)

score 0 · Accepted Answer

你可能对这个ZillowR包感兴趣

https://www.rdocumentation.org/packages/ZillowR/versions/0.1.0

Zillow 是一家在线房地产公司，通过 REST API 为美国提供房地产和抵押数据。ZillowR 包为每个 API 服务提供了一个 R 函数，从而可以轻松地进行 API 调用并将响应处理为方便、R 友好的数据结构。有关 Zillow API 文档，请参阅http://www.zillow.com/howto/api/APIOverview.htm。

您的代码已经完成了 90%。我无法测试，但我认为这些编辑会让你朝着正确的方向前进：

res_all <-NULL

for (page_result in 1:20) {
  zillow_url = paste0("https://www.zillow.com/orlando-fl/",page_result,"_p/?searchQueryState=%7B%22
pagination%22%3A%7B%22currentPage%22%3A",page_result,"%7D%2C%22usersSearchTerm%22%3A%22
Orlando%2C%20Fl%22%2C%22mapBounds%22%3A%7B%22west%22%3A-81.6603646328125%2C%22east%22%3A-80.8144173671875%2C%22
south%22%3A28.191492307595613%2C%22north%22%3A28.794962421299882%7D%2C%22
regionSelection%22%3A%5B%7B%22regionId%22%3A13121%2C%22
regionType%22%3A6%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22sort%22%3A%7B%22value%22%3A%22globalrelevanceex
%22%7D%2C%22ah%22%3A%7B%22value%22%3Atrue%7D%7D%2C%22isListVisible%22%3Atrue%7D")

zpg = read_html(zillow_url)

zillow_pg <-tibble(
  addr = zpg %>% html_nodes(".list-card-addr") %>% html_text(),
  price = zpg %>% html_nodes(".list-card-price") %>% html_text(),
  details = zpg %>% html_nodes(".list-card-details") %>% html_text() ,
  heading= zpg %>% html_nodes(".list-card-info a") %>% html_text() ,
  type = zpg %>% html_nodes(".list-card-statusText") %>% html_text())


res_all <- bind_rows(res_all, zillow_pg)
}

score 0 · Accepted Answer

我正在尝试做类似的事情，但我无法弄清楚我做错了什么（我的 res_all 数据框一直以零观察结束）。就我而言，我试图从罗德岛普罗维登斯的多户住宅列表中返回一页以上的结果。


res_all <-NULL

for (page_result in 1:20) {
  
  zillow_url = paste0("https://www.zillow.com/providence-ri/duplex/", page_result, "_p/?searchQueryState=%7B%22pagination%22%3A%7B%22currentPage%22%3A2%7D%2C%22usersSearchTerm%22%3A%22Providence%2C%20RI%22%2C%22mapBounds%22%3A%7B%22west%22%3A-71.48892251635742%2C%22east%22%3A-71.36017648364258%2C%22south%22%3A41.77131876826507%2C%22north%22%3A41.862664689400106%7D%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A26637%2C%22regionType%22%3A6%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22sort%22%3A%7B%22value%22%3A%22globalrelevanceex%22%7D%2C%22ah%22%3A%7B%22value%22%3Atrue%7D%2C%22sf%22%3A%7B%22value%22%3Afalse%7D%2C%22con%22%3A%7B%22value%22%3Afalse%7D%2C%22manu%22%3A%7B%22value%22%3Afalse%7D%2C%22land%22%3A%7B%22value%22%3Afalse%7D%2C%22tow%22%3A%7B%22value%22%3Afalse%7D%2C%22apa%22%3A%7B%22value%22%3Afalse%7D%2C%22apco%22%3A%7B%22value%22%3Afalse%7D%7D%2C%22isListVisible%22%3Atrue%2C%22mapZoom%22%3A13%7D")
   
     zpg = read_html(zillow_url)
     
     zillow_pg <-tibble(
         addr = zpg %>% html_nodes(".list-card-addr") %>% html_text(),
         price = zpg %>% html_nodes(".list-card-price") %>% html_text(),
         details = zpg %>% html_nodes(".list-card-details") %>% html_text() ,
         heading= zpg %>% html_nodes(".list-card-info a") %>% html_text() ,
         type = zpg %>% html_nodes(".list-card-statusText") %>% html_text())
     
     
     res_all <- bind_rows(res_all, zillow_pg)
}

r - 使用 for 循环抓取 Zillow

2 回答 2

Related

Reference