这是一种“整洁”的方法。它依赖于将数据转换为长格式,其中原始 df 中的每一行都被分配一个 id 并收集列。
这允许我们将数据框与自身进行完全连接。这样您就可以获得列的所有成对组合。一旦采用这种格式,应用总和、乘积和差异就变得非常容易。
更新:重新格式化输出
library(tidyverse)
df <-
structure(
list(
x1 = c(
0.544341260178568,
0.412555423655238,
-0.013600925280521,-0.947831642260442,
-0.705819557090343,
-0.440052278478676,
0.583360907624305,-0.548217106316072,
-0.381271093402877,
1.66078031000975
),
x2 = c(
-2.17595468838955,
3.73045998213455,
7.88166053118859,
0.295257601073637,
-0.503260811313588,
0.118118179398699,
3.77037347523743,
2.92758197923041,
3.40618904087335,
1.45012335878481
),
x3 = c(
14.1085074738418,
9.46630939737492,
7.30026032988652,
10.1473062197382,
11.0336174184083,
7.09744336163716,
16.6871358354018,
13.5030856142587,
14.8384334167838,
1.82381360524456
),
x4 = c(
-2.78166486821977,
-3.14368874900826,
-3.70425316743753,-4.34268218961615,
-3.03557313652054,
-2.74059520574829,
-4.10826186695405,-1.97243713944283,
-3.88803755426516,
-2.56315085425652
),
x5 = c(
-0.279614449281486,-0.480466773938402,
-1.43353886424161,
0.286937906279445,
0.701999608919316,
0.591932833840325,
0.994266002713824,
1.03424778687263,
0.462618513817936,-3.08491622131441
)
),
row.names = c(NA,-10L),
class = c("tbl_df",
"tbl", "data.frame")
)
# Add an id for each observation and covert to long format
df_wrangled <- df %>%
mutate(id = 1:n()) %>%
gather(col, val, -id)
pairs <- full_join(df_wrangled, df_wrangled, by = "id") %>%
mutate(
sum = val.x + val.y,
prod = val.x * val.y,
diff = val.x - val.y
)
head(pairs)
#> # A tibble: 6 x 8
#> id col.x val.x col.y val.y sum prod diff
#> <int> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 1 x1 0.544 x1 0.544 1.09 0.296 0
#> 2 1 x1 0.544 x2 -2.18 -1.63 -1.18 2.72
#> 3 1 x1 0.544 x3 14.1 14.7 7.68 -13.6
#> 4 1 x1 0.544 x4 -2.78 -2.24 -1.51 3.33
#> 5 1 x1 0.544 x5 -0.280 0.265 -0.152 0.824
#> 6 2 x1 0.413 x1 0.413 0.825 0.170 0
pairs_wrangled <- pairs %>%
filter(col.x != col.y) %>%
gather(operation, val, sum, prod, diff) %>%
mutate(
label = paste0(
col.x,
case_when(operation == "sum" ~ "+", operation == "diff" ~ "-", operation == "prod" ~ "*"),
col.y
)
) %>%
select(id, label, val) %>%
spread(label, val)
head(pairs_wrangled)
#> # A tibble: 6 x 61
#> id `x1-x2` `x1-x3` `x1-x4` `x1-x5` `x1*x2` `x1*x3` `x1*x4` `x1*x5`
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2.72 -13.6 3.33 0.824 -1.18 7.68 -1.51 -0.152
#> 2 2 -3.32 -9.05 3.56 0.893 1.54 3.91 -1.30 -0.198
#> 3 3 -7.90 -7.31 3.69 1.42 -0.107 -0.0993 0.0504 0.0195
#> 4 4 -1.24 -11.1 3.39 -1.23 -0.280 -9.62 4.12 -0.272
#> 5 5 -0.203 -11.7 2.33 -1.41 0.355 -7.79 2.14 -0.495
#> 6 6 -0.558 -7.54 2.30 -1.03 -0.0520 -3.12 1.21 -0.260
#> # … with 52 more variables: `x1+x2` <dbl>, `x1+x3` <dbl>, `x1+x4` <dbl>,
#> # `x1+x5` <dbl>, `x2-x1` <dbl>, `x2-x3` <dbl>, `x2-x4` <dbl>,
#> # `x2-x5` <dbl>, `x2*x1` <dbl>, `x2*x3` <dbl>, `x2*x4` <dbl>,
#> # `x2*x5` <dbl>, `x2+x1` <dbl>, `x2+x3` <dbl>, `x2+x4` <dbl>,
#> # `x2+x5` <dbl>, `x3-x1` <dbl>, `x3-x2` <dbl>, `x3-x4` <dbl>,
#> # `x3-x5` <dbl>, `x3*x1` <dbl>, `x3*x2` <dbl>, `x3*x4` <dbl>,
#> # `x3*x5` <dbl>, `x3+x1` <dbl>, `x3+x2` <dbl>, `x3+x4` <dbl>,
#> # `x3+x5` <dbl>, `x4-x1` <dbl>, `x4-x2` <dbl>, `x4-x3` <dbl>,
#> # `x4-x5` <dbl>, `x4*x1` <dbl>, `x4*x2` <dbl>, `x4*x3` <dbl>,
#> # `x4*x5` <dbl>, `x4+x1` <dbl>, `x4+x2` <dbl>, `x4+x3` <dbl>,
#> # `x4+x5` <dbl>, `x5-x1` <dbl>, `x5-x2` <dbl>, `x5-x3` <dbl>,
#> # `x5-x4` <dbl>, `x5*x1` <dbl>, `x5*x2` <dbl>, `x5*x3` <dbl>,
#> # `x5*x4` <dbl>, `x5+x1` <dbl>, `x5+x2` <dbl>, `x5+x3` <dbl>,
#> # `x5+x4` <dbl>
由reprex 包(v0.2.1)于 2019 年 4 月 2 日创建