0

在@SchaunW 的帮助下,我能够弄清楚如何将 XML 解析为 R 数据框

但是在我的数据中,我需要解析多个 XML 数据,我的代码如下,代码在前几个站点运行良好,但如果运行整个 500 个站点,则会弹出错误:

 "Error in temps.i[sapply(temps.i, function(x) any(unlist(x) == "hourly"))] : 
  invalid subscript type 'list'":

请帮忙,谢谢!

data.all = data.frame() 
lat = data.0$lat 
lon = data.0$lon 
head(data.0)
station_id  LocID   lat    lon
   10001    11694  32.82  -86.65
   10079   089214  27.65  -80.23 (node 'temperature' not exit in XML)

data.loop <- lapply(1:length(data.0$station_id), function(i) {
urls.i <- paste("http://forecast.weather.gov/MapClick.php?lat=",lat[i],"&lon=",lon[i],"&FcstType=digitalDWML",sep="")
data.i <- xmlParse(urls.i)
xml_data.i <- xmlToList(data.i)
location.i <- as.list(xml_data.i[["data"]][["location"]][["point"]])
start_time.i <- unlist(xml_data.i[["data"]][["time-layout"]][names(xml_data.i[["data"]][["time-layout"]]) == "start-valid-time"])
temps.i <- xml_data.i[["data"]][["parameters"]]
temps.i <- temps.i[names(temps.i) == "temperature"]
temps.i <- temps.i[sapply(temps.i, function(x) any(unlist(x) == "hourly"))]
temps.i <- unlist(temps.i[[1]][sapply(temps.i, names) == "value"])
data1.i <- data.frame(as.list(location.i), "hh" = start_time.i, "Temp" = temps.i)
 })

data.all <- as.data.frame(do.call(rbind, data.loop))
4

1 回答 1

0

我尝试重新格式化以简化您的代码并添加测试是否有温度向量:

data.0 <- read.table(text='station_id  LocID   lat    lon
                 10001    11694  32.82  -86.65
                 10079   089214  27.65  -80.23',header=TRUE)
library(XML)

res <- apply(data.0,1, function(row) {
  tryCatch({
  url <- paste("http://forecast.weather.gov/MapClick.php?lat=",
               row['lat'],"&lon=",
               row['lon'],"&FcstType=digitalDWML",sep="")
    doc <- xmlParse(url)
    data <- xmlToList(doc)$data
    location <- data$location$point
    start_valid_time <- data$`time-layout`$`start-valid-time`
    if( "temperature" %in% names(data$parameters)){
      templ <- data$parameters$temperature
      temps <- as.numeric(unlist(lapply(seq_along(templ),
                                        function(x)templ[x]$value)))
      }else
        temps <- NA
        data.frame(as.list(location), hh = start_valid_time, Temps = temps)
    },error = function(e)data.frame(row['lat'],row['lon'],temps = NA))

  })

do.call(rbind,res)
于 2013-06-25T19:21:40.343 回答