2

我正在尝试将 JSON 正确转换为具有 3 列的 data.frame。

这是我的数据的简化

# simplification of my real data
my_data <- '{"Bag 1": [["bananas", 1], ["oranges", 2]],"Bag 2": [["bananas", 3], ["oranges", 4], ["apples", 5]]}'

library(jsonlite)

my_data <- fromJSON(my_data)

> my_data
$`Bag 1`
     [,1]      [,2]
[1,] "bananas" "1" 
[2,] "oranges" "2" 

$`Bag 2`
     [,1]      [,2]
[1,] "bananas" "3" 
[2,] "oranges" "4" 
[3,] "apples"  "5" 

我尝试将其转换为 data.frame

# this return an error about "arguments imply differing number of rows: 2, 3"
my_data <- as.data.frame(my_data)

> my_data <- as.data.frame(my_data)
Error in (function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE,  : 
  arguments imply differing number of rows: 2, 3

这是我创建 data.frame 的解决方案

# my solution 
my_data <- data.frame(fruit = do.call(c, my_data),
  bag_number = rep(1:length(my_data), 
  sapply(my_data, length)))

# how it looks
my_data

> my_data
         fruit bag_number
Bag 11 bananas          1
Bag 12 oranges          1
Bag 13       1          1
Bag 14       2          1
Bag 21 bananas          2
Bag 22 oranges          2
Bag 23  apples          2
Bag 24       3          2
Bag 25       4          2
Bag 26       5          2

但我的想法是获得这样的东西,以避免像my_data[a:b,1]我想使用 ggplot2 和其他人时那样做的问题。

fruit   | quantity | bag_number
oranges | 2        | 1
bananas | 1        | 1
oranges | 4        | 2
bananas | 3        | 2
apples  | 5        | 2
4

2 回答 2

2
library(plyr)

# import data (note that the rJSON package does this differently than the jsonlite package)
data.import <- jsonlite::fromJSON(my_data)

# combine all data using plyr
df <- ldply(data.import, rbind)

# clean up column names
colnames(df) <- c('bag_number', 'fruit', 'quantity')

  bag_number   fruit quantity
1      Bag 1 bananas        1
2      Bag 1 oranges        2
3      Bag 2 bananas        3
4      Bag 2 oranges        4
5      Bag 2  apples        5
于 2016-10-02T16:20:13.613 回答
2

purrr / tidyverse version. You also get proper types with this and rid of "Bag":

library(jsonlite)
library(purrr)
library(readr)

fromJSON(my_data, flatten=TRUE) %>% 
  map_df(~as.data.frame(., stringsAsFactors=FALSE), .id="bag") %>%
  type_convert() %>% 
  setNames(c("bag_number", "fruit", "quantity")) -> df

df$bag_number <- gsub("Bag ", "", df$bag_number)
于 2016-10-02T16:50:02.123 回答