1

我对 R 很陌生。我正在尝试在一个列表中的数据框架子列表(包含数字数据)之间运行 Kruskal-Wallis 测试,但我不断收到错误。

每个子列表都有一列,但行数不相等(因此,据我所知,它们不能存储在一个数据帧中)

数据:

data_list <- list(tumor = 0.004255040 0.002703172 0.007478089 0.003554968 0.003803952 0.005225325 0.004816366 0.005674340 0.003474605 0.004784456, 
t = 0.004326186 0.008126497 0.009110830 0.004030094 0.005784066 0.006752136 0.009840556, 
b = 0.004872971 0.009066809 0.005964638 0.003622466 0.011660714, 
caf = 0.003618611 0.007463386 0.007463134 0.005453387 0.010409640 0.012020965))

所以它看起来像这样:

$tumor
1                                               0.004255040
2                                               0.002703172
3                                               0.007478089
4                                               0.003554968
5                                               0.003803952
6                                               0.005225325
7                                               0.004816366
8                                               0.005674340
9                                               0.003474605
10                                              0.004784456

$t
1                                         0.004326186
2                                         0.008126497
3                                         0.009110830
4                                         0.004030094
5                                         0.005784066
6                                         0.006752136
7                                         0.009840556

$b
1                                         0.004872971
2                                         0.009066809
3                                         0.005964638
4                                         0.003622466
5                                         0.011660714

$caf
1                                           0.003618611
2                                           0.007463386
3                                           0.007463134
4                                           0.005453387
5                                           0.010409640
6                                           0.012020965

我尝试了很多事情,都返回错误并且不成功:

> kruskal.test(data_list)
    Error in `[.data.frame`(u, complete.cases(u)) : 
     undefined columns selected
> kruskal.test(list(data_list$tumor,data_list$t,data_list$b,data_list$caf))
    Error in `[.data.frame`(u, complete.cases(u)) : 
     undefined columns selected
> kruskal.test(list(data_list$tumor[,1],data_list$t,data_list$b[,1],data_list$caf[,1]))
    Error in `[.data.frame`(u, complete.cases(u)) : 
     undefined columns selected
> kruskal.test(unlist(data_list))
    Error in kruskal.test.default(unlist(data_list)) : 
     argument "g" is missing, with no default

谢谢!:)

4

2 回答 2

2

您可以将向量列表作为 kruskal.test 的第一个参数传递

data_list <- structure(list(tumor = c(0.00425504, 0.002703172, 0.007478089, 
  0.003554968, 0.003803952, 0.005225325, 0.004816366, 0.00567434, 
  0.003474605, 0.004784456), t = c(0.004326186, 0.008126497, 0.00911083, 
  0.004030094, 0.005784066, 0.006752136, 0.009840556), b = c(0.004872971, 
  0.009066809, 0.005964638, 0.003622466, 0.011660714), caf = c(0.003618611, 
  0.007463386, 0.007463134, 0.005453387, 0.01040964, 0.012020965)),
  .Names = c("tumor", "t", "b", "caf"))

kruskal.test(data_list)

# Kruskal-Wallis rank sum test

# data:  data_list
# Kruskal-Wallis chi-squared = 7.0828, df = 3, p-value = 0.0693
于 2018-01-07T20:03:17.420 回答
1
data_list <- list(
  tumor = c(0.004255040, 0.002703172, 0.007478089, 0.003554968, 0.003803952, 0.005225325, 0.004816366, 0.005674340, 0.003474605, 0.004784456), 
  t = c(0.004326186, 0.008126497, 0.009110830, 0.004030094, 0.005784066, 0.006752136, 0.009840556), 
  b = c(0.004872971, 0.009066809, 0.005964638, 0.003622466, 0.011660714), 
  caf = c(0.003618611, 0.007463386, 0.007463134, 0.005453387, 0.010409640, 0.012020965))

library(purrr)

# transform list to dataframe
df = map2_df(names(data_list),  # get names of each sublist
             data_list,         # get the list values
             ~ data.frame(type=.x, value=.y, stringsAsFactors = F)) # create a dataset with two columns

# apply the test
kruskal.test(value ~ factor(type), data = df)

# Kruskal-Wallis rank sum test
# 
# data:  value by factor(type)
# Kruskal-Wallis chi-squared = 7.0828, df = 3, p-value = 0.0693
于 2018-01-07T14:53:52.583 回答