2

我有如下数据:

df <- structure(list(x1 = c(0.544341260178568, 0.412555423655238, -0.013600925280521, 
-0.947831642260442, -0.705819557090343, -0.440052278478676, 0.583360907624305, 
-0.548217106316072, -0.381271093402877, 1.66078031000975), x2 = c(-2.17595468838955, 
3.73045998213455, 7.88166053118859, 0.295257601073637, -0.503260811313588, 
0.118118179398699, 3.77037347523743, 2.92758197923041, 3.40618904087335, 
1.45012335878481), x3 = c(14.1085074738418, 9.46630939737492, 
7.30026032988652, 10.1473062197382, 11.0336174184083, 7.09744336163716, 
16.6871358354018, 13.5030856142587, 14.8384334167838, 1.82381360524456
), x4 = c(-2.78166486821977, -3.14368874900826, -3.70425316743753, 
-4.34268218961615, -3.03557313652054, -2.74059520574829, -4.10826186695405, 
-1.97243713944283, -3.88803755426516, -2.56315085425652), x5 = c(-0.279614449281486, 
-0.480466773938402, -1.43353886424161, 0.286937906279445, 0.701999608919316, 
0.591932833840325, 0.994266002713824, 1.03424778687263, 0.462618513817936, 
-3.08491622131441)), row.names = c(NA, -10L), class = c("tbl_df", 
"tbl", "data.frame"))

现在我想创建每对列的总和、乘积和差异的列。有了总和就很容易了:

combn(df, 2, function(x) {
  x %>% transmute(!!paste(names(.), collapse = '+') := rowSums(.))}, 
simplify = FALSE)

但现在我需要计算产品和差异。由于没有等同rowSums于差异或产品,我的方法在这里失败了。我在想类似的东西

combn(df, 2, function(x) {
  x %>% transmute(!!paste(names(.), collapse = '-') := apply(., 1, `-`)}, 
simplify = FALSE)

但它不起作用。

4

1 回答 1

1

这是一种“整洁”的方法。它依赖于将数据转换为长格式,其中原始 df 中的每一行都被分配一个 id 并收集列。

这允许我们将数据框与自身进行完全连接。这样您就可以获得列的所有成对组合。一旦采用这种格式,应用总和、乘积和差异就变得非常容易。

更新:重新格式化输出

library(tidyverse)
df <-
  structure(
    list(
      x1 = c(
        0.544341260178568,
        0.412555423655238,
        -0.013600925280521,-0.947831642260442,
        -0.705819557090343,
        -0.440052278478676,
        0.583360907624305,-0.548217106316072,
        -0.381271093402877,
        1.66078031000975
      ),
      x2 = c(
        -2.17595468838955,
        3.73045998213455,
        7.88166053118859,
        0.295257601073637,
        -0.503260811313588,
        0.118118179398699,
        3.77037347523743,
        2.92758197923041,
        3.40618904087335,
        1.45012335878481
      ),
      x3 = c(
        14.1085074738418,
        9.46630939737492,
        7.30026032988652,
        10.1473062197382,
        11.0336174184083,
        7.09744336163716,
        16.6871358354018,
        13.5030856142587,
        14.8384334167838,
        1.82381360524456
      ),
      x4 = c(
        -2.78166486821977,
        -3.14368874900826,
        -3.70425316743753,-4.34268218961615,
        -3.03557313652054,
        -2.74059520574829,
        -4.10826186695405,-1.97243713944283,
        -3.88803755426516,
        -2.56315085425652
      ),
      x5 = c(
        -0.279614449281486,-0.480466773938402,
        -1.43353886424161,
        0.286937906279445,
        0.701999608919316,
        0.591932833840325,
        0.994266002713824,
        1.03424778687263,
        0.462618513817936,-3.08491622131441
      )
    ),
    row.names = c(NA,-10L),
    class = c("tbl_df",
              "tbl", "data.frame")
  )

# Add an id for each observation and covert to long format
df_wrangled <- df %>%
  mutate(id = 1:n()) %>%
  gather(col, val, -id)

pairs <- full_join(df_wrangled, df_wrangled, by = "id") %>%
  mutate(
    sum = val.x + val.y,
    prod = val.x * val.y,
    diff = val.x - val.y
  )

head(pairs)
#> # A tibble: 6 x 8
#>      id col.x val.x col.y   val.y     sum   prod    diff
#>   <int> <chr> <dbl> <chr>   <dbl>   <dbl>  <dbl>   <dbl>
#> 1     1 x1    0.544 x1      0.544   1.09   0.296   0    
#> 2     1 x1    0.544 x2     -2.18   -1.63  -1.18    2.72 
#> 3     1 x1    0.544 x3     14.1    14.7    7.68  -13.6  
#> 4     1 x1    0.544 x4     -2.78   -2.24  -1.51    3.33 
#> 5     1 x1    0.544 x5     -0.280   0.265 -0.152   0.824
#> 6     2 x1    0.413 x1      0.413   0.825  0.170   0

pairs_wrangled <- pairs %>%
  filter(col.x != col.y) %>%
  gather(operation, val, sum, prod, diff) %>%
  mutate(
    label = paste0(
      col.x,
      case_when(operation == "sum" ~ "+", operation == "diff" ~ "-", operation == "prod" ~ "*"),
      col.y
    )
  ) %>%
  select(id, label, val) %>%
  spread(label, val)

head(pairs_wrangled)
#> # A tibble: 6 x 61
#>      id `x1-x2` `x1-x3` `x1-x4` `x1-x5` `x1*x2` `x1*x3` `x1*x4` `x1*x5`
#>   <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#> 1     1   2.72   -13.6     3.33   0.824 -1.18    7.68   -1.51   -0.152 
#> 2     2  -3.32    -9.05    3.56   0.893  1.54    3.91   -1.30   -0.198 
#> 3     3  -7.90    -7.31    3.69   1.42  -0.107  -0.0993  0.0504  0.0195
#> 4     4  -1.24   -11.1     3.39  -1.23  -0.280  -9.62    4.12   -0.272 
#> 5     5  -0.203  -11.7     2.33  -1.41   0.355  -7.79    2.14   -0.495 
#> 6     6  -0.558   -7.54    2.30  -1.03  -0.0520 -3.12    1.21   -0.260 
#> # … with 52 more variables: `x1+x2` <dbl>, `x1+x3` <dbl>, `x1+x4` <dbl>,
#> #   `x1+x5` <dbl>, `x2-x1` <dbl>, `x2-x3` <dbl>, `x2-x4` <dbl>,
#> #   `x2-x5` <dbl>, `x2*x1` <dbl>, `x2*x3` <dbl>, `x2*x4` <dbl>,
#> #   `x2*x5` <dbl>, `x2+x1` <dbl>, `x2+x3` <dbl>, `x2+x4` <dbl>,
#> #   `x2+x5` <dbl>, `x3-x1` <dbl>, `x3-x2` <dbl>, `x3-x4` <dbl>,
#> #   `x3-x5` <dbl>, `x3*x1` <dbl>, `x3*x2` <dbl>, `x3*x4` <dbl>,
#> #   `x3*x5` <dbl>, `x3+x1` <dbl>, `x3+x2` <dbl>, `x3+x4` <dbl>,
#> #   `x3+x5` <dbl>, `x4-x1` <dbl>, `x4-x2` <dbl>, `x4-x3` <dbl>,
#> #   `x4-x5` <dbl>, `x4*x1` <dbl>, `x4*x2` <dbl>, `x4*x3` <dbl>,
#> #   `x4*x5` <dbl>, `x4+x1` <dbl>, `x4+x2` <dbl>, `x4+x3` <dbl>,
#> #   `x4+x5` <dbl>, `x5-x1` <dbl>, `x5-x2` <dbl>, `x5-x3` <dbl>,
#> #   `x5-x4` <dbl>, `x5*x1` <dbl>, `x5*x2` <dbl>, `x5*x3` <dbl>,
#> #   `x5*x4` <dbl>, `x5+x1` <dbl>, `x5+x2` <dbl>, `x5+x3` <dbl>,
#> #   `x5+x4` <dbl>

reprex 包(v0.2.1)于 2019 年 4 月 2 日创建

于 2019-04-01T14:51:00.673 回答