我一直在从事一个项目,其目标是获取街道地址和邮政编码的两列 CSV,将其读入 R,然后对每一个执行 Zillow 查询(特别是 GetSearchResults),解析输出,然后将解析后的输出存储在要写入 CSV 的数据框中(并放置在现有数据旁边)。
警告:我一次只能通过 zillow API 调用一个地址/zip 组合,所以任何违反它的东西都会立即被排除在外。
到目前为止,我已经完成了大约 85% 的工作。我有 i) 一些代码,可以从数据帧中逐个查询这些地址/zip 组合,以及 ii) 将输入放回数据帧的暂定方式,
library(ZillowR)
library(rvest)
library(dplyr)
library(DT)
# this commented section is what I would use instead of creating the dataframe manually below, just for clarity
# data1 = read.csv('Addresses.csv', header = F, colClasses = 'character')$V1
# data2 = read.csv('Addresses.csv', header = F, colClasses = 'character')$V2
# data = data.frame(street = data1, city.state = as.character(data2))
# per comments, should add a "stringsAsFactors = FALSE" in the dataframe part
data = data.frame(
street = c('77 Huntington Ave',
'85 Prospect St',
'219 Lincoln St'),
city.state = c(rep('01752', 3)))
get.zillowdata = function(df, address, city.state){
require(ZillowR)
set_zillow_web_service_id('API KEY')
results = do.call(rbind, lapply(1:nrow(df), function(i){
z = tryCatch({
zdata = GetDeepSearchResults(address = df$street[i],
citystatezip = df$city.state[i],
zws_id = getOption("ZillowR-zws_id"),
url = "http://www.zillow.com/webservice/GetDeepSearchResults.htm")
return(zdata)
},
error = function(cond) {
message(paste("No Data Available:", df$street[i], df$city.state[i]))
return(NA) # Choose a return value in case of error
},
warning = function(cond) {
message(paste("Zdata caused a warning:", df$street[i], df$city.state[i]))
return(NA) # Choose a return value in case of warning
},
# print processing message to screen
finally = {
message(paste("Processed Address:", df$street[i], df$city.state[i]))
message(paste(i, "of", nrow(df), 'processed'))
}
)
}))
if(nrow(results)==nrow(df)){
results = cbind(df, results)
print(paste('Original data had', nrow(df), 'rows. Returning a dataframe with', nrow(results),
'rows. Returned dataframe has', sum(is.na(results$amount)), 'missing zdata values.'))
return(results)
}
else(print("Error: nrows(df) do not match nrows(zdata)"))
}
get.zillowdata(data)
`
以及 iii)当您通过 Zillow API 执行查询时获得的 XMLnode 响应的解析器,该 API 会挑选出特定的子值(zestimate、平方英尺、地块大小等;无论您指定什么)
library(ZillowR)
library(XML)
library(RCurl)
set_zillow_web_service_id('API KEY')
output123 = GetDeepSearchResults(address = 'STREET ADDRESS', citystatezip = '0ZIP CODE', zws_id = getOption("ZillowR-zws_id"), url = "http://www.zillow.com/webservice/GetSearchResults.htm")
results <- xmlToList(output123$response[["results"]])
getValRange <- function(x, hilo) {
ifelse(hilo %in% unlist(dimnames(x)), x["text",hilo][[1]], NA)
}
out <- apply(results, MAR=2, function(property) {
zpid <- property$zpid
links <- unlist(property$links)
address <- unlist(property$address)
z <- property$zestimate
zestdf <- list(
amount=ifelse("text" %in% names(z$amount), z$amount$text, NA),
lastupdated=z$"last-updated",
valueChange=ifelse(length(z$valueChange)==0, NA, z$valueChange),
valueLow=getValRange(z$valuationRange, "low"),
valueHigh=getValRange(z$valuationRange, "high"),
percentile=z$percentile)
list(id=zpid, links, address, zestdf)
})
data <- as.data.frame(do.call(rbind, lapply(out, unlist)),
row.names=seq_len(length(out)))
但我在这一点上有点卡住了。我应该如何将这些放在一起,以便我可以在 api 调用部分的末尾包含解析,并确保它们都在地址/zip 的完整列表中进行迭代?我现在的代码没有任何特定的顺序,所以如果你决定解决这个问题,请随意移动,如果有人需要更多信息,我很乐意澄清!
首先十分感谢。