0
library(jsonlite)
paths <- list.files(
  pattern="data.json",
  full.names=TRUE,
  recursive=TRUE
)
colNames = c("BillType",
             "Congress",
             "IntroducedAt",
             "OfficialTitle",
             "Number",
             "Status",
             "SubjectsTopTerm",
             "UpdatedAt")
trimData <- function(x) {
  a <- list(x$bill_type,
            x$congress,
            x$introduced_at,
            x$official_title,
            x$number,
            x$status,
            x$subjects_top_term,
            x$updated_at)
  result <- as.data.frame(a)
  return(result)
}
rawData <- do.call(
    "rbind",
    lapply(paths, function(x) fromJSON(txt = x, simplifyDataFrame = TRUE))
)
prunedData <- do.call(
    "rbind",
    lapply(rawData, function(x) trimData(x))
)
colnames(test) <- colNames
write.csv(prunedData, "test3.csv")

我使用此脚本的目标是获取数据框化的 JSON 数据并将其转换为更精简的数据框以用于 CSV 输出。该rawData变量最终大约有 100 列。在 RStudio 中执行此脚本时,出现以下错误:

> prunedData <- do.call("rbind", lapply(rawData, function(x) trimData(x)))
Error in data.frame(NULL, NULL, NULL, NULL, NULL, c(NA, "PASS_OVER:HOUSE",  : 
  arguments imply differing number of rows: 0, 4

我不是 R 和 SQL 等声明性语言的专家,所以,如果你能帮我把这件事简化一下,那将会有很长的路要走!

4

1 回答 1

0

考虑这种带有嵌套do.call()lapply()命令的 JSON 到数据框迁移方法。外行do.call跨文件绑定数据,do.call内行绑定每个文件内的 json 数据。折叠列表数据到一个元素中,如果paste()您的 json 文件打印得很漂亮并且没有在一行上全部压缩,则删除 EOF。

library(jsonlite)

paths <- list.files(pattern="data.json", full.names=TRUE, recursive=TRUE)
colNames = c("BillType", "Congress", "IntroducedAt", "OfficialTitle",
             "Number", "Status", "SubjectsTopTerm", "UpdatedAt")

rawData <- do.call(rbind,
                   lapply(paths, 
                          function(x)
                          do.call(rbind, 
                                  lapply(paste(readLines(x, warn=FALSE),
                                               collapse=""), 
                                         jsonlite::fromJSON)
                          )
                   )
           )

# TRIM TO NEEDED COLUMNS
prunedData <- rawdata[colNames]
于 2016-02-15T02:09:41.577 回答