1

我有一个来自 googleway 包的地理编码输出列表(ggmap 地理编码不适用于 API 密钥)存储在一个列表中,其中每个元素都包含两个列表。但是,对于没有找到结果的地址,列表的结构是不同的,这使我将列表转换为数据框的尝试令人沮丧。

“非缺失”结果(使用 dput() 创建)的结构如下(忽略乱码,RStudio 无法在控制台中正确显示西里尔字母):

structure(list(results = structure(list(address_components = list(
    structure(list(long_name = c("11À", "óëèöà Ãîãîëÿ", "Çåëåíîãðàäñêèé àäìèíèñòðàòèâíûé îêðóã", 
    "Çåëåíîãðàä", "Ìîñêâà", "Ìîñêâà", "Ðîññèÿ", "124575"), short_name = c("11À", 
    "óë. Ãîãîëÿ", "Çåëåíîãðàäñêèé àäìèíèñòðàòèâíûé îêðóã", "Çåëåíîãðàä", 
    "Ìîñêâà", "Ìîñêâà", "RU", "124575"), types = list("street_number", 
        "route", c("political", "sublocality", "sublocality_level_1"
        ), c("locality", "political"), c("administrative_area_level_2", 
        "political"), c("administrative_area_level_1", "political"
        ), c("country", "political"), "postal_code")), .Names = c("long_name", 
    "short_name", "types"), class = "data.frame", row.names = c(NA, 
    8L))), formatted_address = "óë. Ãîãîëÿ, 11À, Çåëåíîãðàä, Ìîñêâà, Ðîññèÿ, 124575", 
    geometry = structure(list(location = structure(list(lat = 55.987567, 
        lng = 37.17152), .Names = c("lat", "lng"), class = "data.frame", row.names = 1L), 
        location_type = "ROOFTOP", viewport = structure(list(
            northeast = structure(list(lat = 55.9889159802915, 
                lng = 37.1728689802915), .Names = c("lat", "lng"
            ), class = "data.frame", row.names = 1L), southwest = structure(list(
                lat = 55.9862180197085, lng = 37.1701710197085), .Names = c("lat", 
            "lng"), class = "data.frame", row.names = 1L)), .Names = c("northeast", 
        "southwest"), class = "data.frame", row.names = 1L)), .Names = c("location", 
    "location_type", "viewport"), class = "data.frame", row.names = 1L), 
    place_id = "ChIJzXSgUeQUtUYREIohzQOG--A", types = list("street_address")), .Names = c("address_components", 
"formatted_address", "geometry", "place_id", "types"), class = "data.frame", row.names = 1L), 
    status = "OK"), .Names = c("results", "status"))

“缺失”结果的结构如下:

structure(list(results = list(), status = "ZERO_RESULTS"), .Names = c("results", 
"status"))

基本上,问题似乎是,当函数没有从 Google API 获得结果时,它会创建一个空列表,而不是一个与以 NA 作为值的“非缺失”列表具有相同元素的列表。当您将这些列表传递给 时,这会产生错误data.frame(),因为它无法从无到有创建数据框。

在将结果子列表提取到自己的列表中后,我在这里尝试了解决方案:Converting nested list (unequal length) to data frame。它应该填写 NA 并创建等长列表,从而可以转换为数据框:

first100geocode.results.l <- vector("list", 100)
for(i in 1:length(first100geocode.results.l)){
     first100geocode.results.l[[i]] <- first100geocode[[i]]$results
}

indx <- sapply(first100geocode.results.l, length)
res <- as.data.frame(do.call(rbind,lapply(first100geocode.results.l, 
`length<-`, max(indx))))
colnames(res) <- names(first100geocode.results.l[[which.max(indx)]])

但是,创建“res”对象的行会引发错误:Error in rbind(deparse.level, ...) : invalid list argument: all variables should have the same length'.

是否有其他方法可以为缺失的结果填写 NA,以便我可以将其转换为数据框?

(注意:我不能简单地删除丢失的结果,我需要将其绑定回原始地址列表)。

4

1 回答 1

1

我们将让我们jsonlite::flatten完成大部分工作:

将您的两个示例结果放在一个列表中(希望这忠实于您的实际数据结构):

first100geocode <- list(
  structure(list(results = structure(list(address_components = list(
    structure(list(long_name = c(
      "11À", "óëèöà Ãîãîëÿ", "Çåëåíîãðàäñêèé àäìèíèñòðàòèâíûé îêðóã", 
      "Çåëåíîãðàä", "Ìîñêâà", "Ìîñêâà", "Ðîññèÿ", "124575"), short_name = c(
        "11À", "óë. Ãîãîëÿ", "Çåëåíîãðàäñêèé àäìèíèñòðàòèâíûé îêðóã", "Çåëåíîãðàä", 
        "Ìîñêâà", "Ìîñêâà", "RU", "124575"), types = list(
          "street_number", "route", c("political", "sublocality", "sublocality_level_1"
          ), c("locality", "political"), c(
            "administrative_area_level_2", 
            "political"), c("administrative_area_level_1", "political"
            ), c("country", "political"), "postal_code")), .Names = c(
              "long_name", 
              "short_name", "types"), class = "data.frame", row.names = c(NA, 8L))), 
    formatted_address = "óë. Ãîãîëÿ, 11À, Çåëåíîãðàä, Ìîñêâà, Ðîññèÿ, 124575", 
    geometry = structure(list(location = structure(
      list(lat = 55.987567, lng = 37.17152), 
      .Names = c("lat", "lng"), class = "data.frame", row.names = 1L), 
      location_type = "ROOFTOP", viewport = structure(list(
        northeast = structure(list(
          lat = 55.9889159802915, lng = 37.1728689802915), .Names = c("lat", "lng"
          ), class = "data.frame", row.names = 1L), southwest = structure(list(
            lat = 55.9862180197085, lng = 37.1701710197085), .Names = c("lat", "lng"), 
            class = "data.frame", row.names = 1L)), .Names = c("northeast", "southwest"), 
        class = "data.frame", row.names = 1L)), 
      .Names = c("location", "location_type", "viewport"), 
      class = "data.frame", row.names = 1L), 
    place_id = "ChIJzXSgUeQUtUYREIohzQOG--A", types = list("street_address")), 
    .Names = c("address_components", 
               "formatted_address", "geometry", "place_id", "types"), 
    class = "data.frame", row.names = 1L), 
    status = "OK"), .Names = c("results", "status")),
  structure(list(results = list(), status = "ZERO_RESULTS"), 
            .Names = c("results", "status"))
)

做实际的展平(并过滤掉address_componentstypes这有点棘手,你不感兴趣):

flatten_googleway <- function(df) {
  res <- jsonlite::flatten(df)
  res[, !names(res) %in% c("address_components", "types")]
}

准备我们将用于“缺失”结果的模板数据框。并将其应用于那些:

template_res <- flatten_googleway(first100geocode[[1]]$results)[FALSE, ]
do.call(rbind, lapply(first100geocode, function(x) {
  if (length(x$results) == 0) template_res[1, ] else flatten_googleway(x$results)
}))
#                                      formatted_address                    place_id
# 1  óë. Ãîãîëÿ, 11À, Çåëåíîãðàä, Ìîñêâà, Ðîññèÿ, 124575 ChIJzXSgUeQUtUYREIohzQOG--A
# NA                                                <NA>                        <NA>
#    geometry.location_type geometry.location.lat geometry.location.lng
# 1                 ROOFTOP              55.98757              37.17152
# NA                   <NA>                    NA                    NA
#    geometry.viewport.northeast.lat geometry.viewport.northeast.lng
# 1                         55.98892                        37.17287
# NA                              NA                              NA
#    geometry.viewport.southwest.lat geometry.viewport.southwest.lng
# 1                         55.98622                        37.17017
# NA                              NA                              NA
于 2017-06-09T19:33:50.177 回答