1

我有一个记录在 2 个位置的 11 种植物的变量数据框。对于每个物种,我正在尝试使用 t.test(或 wilcoxon 测试)比较两个不同位置之间变量的平均值。

这是我的数据的前几行

 SPECIES   LOCATION X.COLONIZATION SPORE_DENSITY   pH  NO3  NH4    P Organic_C      K   Cu    Mn   Zn   BD X.Sand
1   C. comosa    Gauteng             90           387 5.40 8.24 1.35 1.10      0.95  94.40 3.36 84.40 4.72 1.45   68.0
2   C. comosa    Gauteng             84           270 5.25 8.36 1.37 1.20      0.99  94.87 3.39 84.87 4.77 1.36   76.0
3   C. comosa    Gauteng             96           404 5.55 8.19 1.32 1.11      0.94  94.01 3.35 84.01 4.68 1.54   78.0
4   C. comosa Mpumalanga             79           382 5.84 4.05 3.46 3.04      1.55 130.40 0.28 25.43 2.00 1.66   73.6
5   C. comosa Mpumalanga             82           383 5.49 4.45 3.48 3.09      1.53 131.36 0.27 25.35 2.12 1.45   76.5
6   C. comosa Mpumalanga             86           371 6.19 4.43 3.44 3.04      1.58 129.95 0.29 25.45 2.14 1.87   74.9
7  C. distans    Gauteng             80           334 5.48 8.88 1.96 3.33      0.99 130.24 0.99 40.01 3.94 1.55   70.0
8  C. distans    Gauteng             75           409 5.29 8.54 1.99 3.28      0.99 130.28 0.95 40.25 3.89 1.48   79.0
9  C. distans    Gauteng             85           259 5.67 8.63 1.93 3.39      1.02 130.30 0.98 40.12 3.97 1.62   79.0
10 C. distans Mpumalanga             65           326 5.61 6.02 2.65 4.45      2.58 163.25 1.79 53.11 6.11 1.68   72.0
11 C. distans Mpumalanga             79           351 5.43 6.58 2.55 4.49      2.59 163.55 1.78 52.89 6.04 1.63   78.0
12 C. distans Mpumalanga             71           251 5.79 6.24 2.59 4.41      2.59 163.27 1.75 53.03 6.19 1.73   75.0
   X.Silt X.Clay
1      12      9
2      16     13
3      14     14
4       9     10
5      11     16
6      13     16
7       8     11
8      12     15
9      10     16
10      8     10
11     15     14
12     16     12

例如,对于每个物种,我想比较(测试显着差异)豪登省和普马兰加省孢子密度的平均值。请问有什么帮助吗?

4

1 回答 1

1

我们按 'SPECIES' 分组,然后在数字列上使用summarisewith across,将列值子集为 'LOCATION' 是 'Gauteng' 或另一个,应用t.test并提取 pvalue

library(dplyr) #1.0.0
df1 %>%
    group_by(SPECIES) %>%
    summarise(across(where(is.numeric), ~ 
         t.test(.[LOCATION == 'Gauteng'], .[LOCATION == 'Mpumalanga'])$p.value))
# A tibble: 2 x 16
#  SPECIES   X.COLONIZATION SPORE_DENSITY    pH      NO3        NH4        P  Organic_C        K       Cu        Mn       Zn     BD X.Sand X.Silt X.Clay
#  <chr>              <dbl>         <dbl> <dbl>    <dbl>      <dbl>    <dbl>      <dbl>    <dbl>    <dbl>     <dbl>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#1 C. comosa          0.146         0.614 0.149 0.000269    7.27e-8  1.35e-5 0.00000970  2.15e-6  3.12e-7   1.35e-5  7.23e-6 0.219   0.779  0.140  0.474
#2 C. dista…          0.177         0.667 0.438 0.000624    1.94e-4  2.04e-5 0.00000670  4.48e-6  1.22e-6   1.90e-8  2.07e-5 0.0653  0.791  0.363  0.359

数据

df1 <- structure(list(SPECIES = c("C. comosa", "C. comosa", "C. comosa", 
"C. comosa", "C. comosa", "C. comosa", "C. distans", "C. distans", 
"C. distans", "C. distans", "C. distans", "C. distans"), LOCATION = c("Gauteng", 
"Gauteng", "Gauteng", "Mpumalanga", "Mpumalanga", "Mpumalanga", 
"Gauteng", "Gauteng", "Gauteng", "Mpumalanga", "Mpumalanga", 
"Mpumalanga"), X.COLONIZATION = c(90L, 84L, 96L, 79L, 82L, 86L, 
80L, 75L, 85L, 65L, 79L, 71L), SPORE_DENSITY = c(387L, 270L, 
404L, 382L, 383L, 371L, 334L, 409L, 259L, 326L, 351L, 251L), 
    pH = c(5.4, 5.25, 5.55, 5.84, 5.49, 6.19, 5.48, 5.29, 5.67, 
    5.61, 5.43, 5.79), NO3 = c(8.24, 8.36, 8.19, 4.05, 4.45, 
    4.43, 8.88, 8.54, 8.63, 6.02, 6.58, 6.24), NH4 = c(1.35, 
    1.37, 1.32, 3.46, 3.48, 3.44, 1.96, 1.99, 1.93, 2.65, 2.55, 
    2.59), P = c(1.1, 1.2, 1.11, 3.04, 3.09, 3.04, 3.33, 3.28, 
    3.39, 4.45, 4.49, 4.41), Organic_C = c(0.95, 0.99, 0.94, 
    1.55, 1.53, 1.58, 0.99, 0.99, 1.02, 2.58, 2.59, 2.59), K = c(94.4, 
    94.87, 94.01, 130.4, 131.36, 129.95, 130.24, 130.28, 130.3, 
    163.25, 163.55, 163.27), Cu = c(3.36, 3.39, 3.35, 0.28, 0.27, 
    0.29, 0.99, 0.95, 0.98, 1.79, 1.78, 1.75), Mn = c(84.4, 84.87, 
    84.01, 25.43, 25.35, 25.45, 40.01, 40.25, 40.12, 53.11, 52.89, 
    53.03), Zn = c(4.72, 4.77, 4.68, 2, 2.12, 2.14, 3.94, 3.89, 
    3.97, 6.11, 6.04, 6.19), BD = c(1.45, 1.36, 1.54, 1.66, 1.45, 
    1.87, 1.55, 1.48, 1.62, 1.68, 1.63, 1.73), X.Sand = c(68, 
    76, 78, 73.6, 76.5, 74.9, 70, 79, 79, 72, 78, 75), X.Silt = c(12L, 
    16L, 14L, 9L, 11L, 13L, 8L, 12L, 10L, 8L, 15L, 16L), X.Clay = c(9L, 
    13L, 14L, 10L, 16L, 16L, 11L, 15L, 16L, 10L, 14L, 12L)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))
于 2020-06-25T23:27:34.230 回答