这对我来说是一个熟悉 Zillow API 的有趣机会。为了便于检查,我遵循如何将 XML 解析为 R 数据框的方法是将响应转换为列表。繁重的一点是通过检查列表来弄清楚数据的结构,特别是因为每个属性都可能有一些缺失的数据。这就是我编写getValRange
函数来处理解析 Zestimate 数据的原因。
results <- xmlToList(xml$response[["results"]])
getValRange <- function(x, hilo) {
ifelse(hilo %in% unlist(dimnames(x)), x["text",hilo][[1]], NA)
}
out <- apply(results, MAR=2, function(property) {
zpid <- property$zpid
links <- unlist(property$links)
address <- unlist(property$address)
z <- property$zestimate
zestdf <- list(
amount=ifelse("text" %in% names(z$amount), z$amount$text, NA),
lastupdated=z$"last-updated",
valueChange=ifelse(length(z$valueChange)==0, NA, z$valueChange),
valueLow=getValRange(z$valuationRange, "low"),
valueHigh=getValRange(z$valuationRange, "high"),
percentile=z$percentile)
list(id=zpid, links, address, zestdf)
})
data <- as.data.frame(do.call(rbind, lapply(out, unlist)),
row.names=seq_len(length(out)))
样本输出:
> data[,c("id", "street", "zipcode", "amount")]
id street zipcode amount
1 2098001736 120 E 7th St APT 5A 10009 2321224
2 2101731413 120 E 7th St APT 1B 10009 2548390
3 2131798322 120 E 7th St APT 5B 10009 2408860
4 2126480070 120 E 7th St APT 1A 10009 2643454
5 2125360245 120 E 7th St APT 2A 10009 1257602
6 2118428451 120 E 7th St APT 4A 10009 <NA>
7 2125491284 120 E 7th St FRNT 1 10009 <NA>
8 2126626856 120 E 7th St APT 2B 10009 2520587
9 2131542942 120 E 7th St APT 4B 10009 1257676