1

我有一个看起来像这样的数据集

A    B    1960 1970 1980
x    a    1    2    3
x    b    1.1  2.1  NA
y    a    2    3    4
y    b    1    NA   1

我想根据 B 行转换列,使其看起来像这样

A    year       a    b
x    1960    1   1.1    
x    1970    2   2.1
x    1980    3   NA    
y    1960    2   1
y    1970    3   NA
y    1980    4   1

我不知道该怎么做。我知道我可以使用 t() 或使用 tidyverse 中的 row_to_columns() 进行完全转换,但结果不是我想要的。初始数据在 B 列中有大约 60 列和 165 个不同的值。

4

3 回答 3

2

你可以做pivot_long()then pivot_wide(),尽管再次重命名你的列“B”可能是个坏主意:

library(dplyr)
library(tidyr)

df %>% pivot_longer(-c(A,B)) %>% 
pivot_wider(names_from=B) %>% rename(B=name)

    # A tibble: 6 x 4
  A     B         a     b
  <fct> <chr> <dbl> <dbl>
1 x     1960      1   1.1
2 x     1970      2   2.1
3 x     1980      3  NA  
4 y     1960      2   1  
5 y     1970      3  NA  
6 y     1980      4   1 

df = structure(list(A = structure(c(1L, 1L, 2L, 2L), .Label = c("x", 
"y"), class = "factor"), B = structure(c(1L, 2L, 1L, 2L), .Label = c("a", 
"b"), class = "factor"), `1960` = c(1, 1.1, 2, 1), `1970` = c(2, 
2.1, 3, NA), `1980` = c(3L, NA, 4L, 1L)), class = "data.frame", row.names = c(NA, 
-4L))
于 2020-05-04T14:21:25.980 回答
0
library(data.table)

dt <- fread('A    B    1960 1970 1980
x    a    1    2    3
x    b    1.1  2.1  NA
y    a    2    3    4
y    b    1    NA   1')

names(dt) <- as.character(dt[1,])
dt <- dt[-1,]
dt[,(3:5):=lapply(.SD,as.numeric),.SDcols=3:5]
dcast(melt(dt,measure.vars = 3:5),...~B,value.var = "value")
#>    A variable a   b
#> 1: x     1960 1 1.1
#> 2: x     1970 2 2.1
#> 3: x     1980 3  NA
#> 4: y     1960 2 1.0
#> 5: y     1970 3  NA
#> 6: y     1980 4 1.0

reprex 包(v0.3.0)于 2020 年 5 月 5 日创建

于 2020-05-04T14:01:00.817 回答
0

基础 R 解决方案:

long_df <- reshape(df, direction = "long", 
        varying = which(!names(df) %in% c("A", "B")),
        v.names = "value",
        timevar = "year",
        times = names(df)[!(names(df) %in% c("A", "B"))],
        ids = NULL,
        new.row.names = 1:(length(which(!names(df) %in% c("A", "B"))) * nrow(df)))


wide_df <- setNames(reshape(long_df, direction = "wide", 
                   idvar = c("A", "year"),
                   timevar = "B"), c("A", "B", unique(df$B)))

数据:

df <- structure(list(A = c("x", "x", "y", "y"), B = c("a", "b", "a", 
"b"), `1960` = c(1, 1.1, 2, 1), `1970` = c(2, 2.1, 3, NA), `1980` = c(3L, 
NA, 4L, 1L)), row.names = 2:5, class = "data.frame")
于 2020-05-04T14:20:18.340 回答