0

我有一个这样的数据集

df <- structure(list(Idm = c("AAA", "AAA", "AAA", "AAAA", "AAAA", "AAAA", 
"AAAAA", "AAAAA", "AAAAA", "BB", "BB", "BB", "BBB", "BBB", "BBB", 
"BBBBB", "BBBBB", "BBBBB", "CCCC", "CCCC", "CCCC", "CCCCC", "CCCCC", 
"CCCCC"), name = c("G", "A", "B", "G", "A", "B", "G", "A", "B", 
"G", "A", "B", "G", "A", "B", "G", "A", "B", "G", "A", "B", "G", 
"A", "B"), value = c(2506.3, 5306.7, 6558.1, 2270.1, 5449.3, 
5790.2, 334.1, 947, 1128.2, 809, 1944, 2539, 1302.3, 3447, 4107.7, 
2562.7, 5127.6, 4585.8, 911, 5121.9, 6313.4, 832.8, 1230.2, 1180.8
), sd = c(1865.19913950227, 2221.04246770145, 5885.17898538354, 
1273.08845332915, 2008.35456364989, 3037.90616433973, 181.270083944741, 
446.8334626383, 490.805504587442, 633.895459309604, 961.277571776227, 
2444.30575487874, 1012.39068051815, 1393.79545127684, 5826.31668323421, 
1476.91924739755, 1508.60484223007, 4258.95203228838, 838.051710815031, 
2911.84582696268, 4510.54727758543, 507.433227134369, 562.122249455875, 
1674.86096835926), n = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L)), row.names = c(NA, 
-24L), groups = structure(list(Idm = c("AAA", "AAAA", "AAAAA", 
"BB", "BBB", "BBBBB", "CCCC", "CCCCC"), .rows = structure(list(
    1:3, 4:6, 7:9, 10:12, 13:15, 16:18, 19:21, 22:24), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -8L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

例如,我想知道其中哪一个 Idm 的变化最小

AAAAA   G   334.1   181.2700839
AAAAA   A   947     446.8334626
AAAAA   B   1128.2  490.8055046

这一个在 3 次重复中的变化较小

CCCCC   G   832.8   507.4332271 1
CCCCC   A   1230.2  562.1222495 2
CCCCC   B   1180.8  1674.860968 3

我可以通过如下情节看到

在此处输入图像描述

我正在考虑获取每 3 个重复(值)和每三个(sd)的平均值,并且值和 sd 都较低的那个是变异最小的那个

4

1 回答 1

0

这个问题需要澄清,但如果您只想要按组划分的具有最小标准偏差的行,那么您可以使用dplyr.

library(dplyr)

df %>%
  dplyr::group_by(Idm) %>%
  slice(which.min(sd)) %>%
  arrange(sd)

输出

# A tibble: 8 × 5
# Groups:   Idm [8]
  Idm   name  value    sd     n
  <chr> <chr> <dbl> <dbl> <int>
1 AAAAA G      334.  181.     1
2 CCCCC G      833.  507.     1
3 BB    G      809   634.     1
4 CCCC  G      911   838.     1
5 BBB   G     1302. 1012.     1
6 AAAA  G     2270. 1273.     1
7 BBBBB G     2563. 1477.     1
8 AAA   G     2506. 1865.     1

或者您可以对每个组进行排序并保留所有数据:

df %>%
  dplyr::group_by(Idm) %>%
  arrange(Idm, sd)

输出

# A tibble: 24 × 5
# Groups:   Idm [8]
   Idm   name  value    sd     n
   <chr> <chr> <dbl> <dbl> <int>
 1 AAAAA G      334.  181.     1
 2 AAAAA A      947   447.     2
 3 AAAAA B     1128.  491.     3
 4 CCCCC G      833.  507.     1
 5 CCCCC A     1230.  562.     2
 6 BB    G      809   634.     1
 7 CCCC  G      911   838.     1
 8 BB    A     1944   961.     2
 9 BBB   G     1302. 1012.     1
10 AAAA  G     2270. 1273.     1
# … with 14 more rows
于 2021-07-27T20:42:55.247 回答