0

我有一个看起来像这样的表:

Index   Treatement   Y(0)   Y(1)
1       0            10     ?
2       0            20     ?
3       0            15     ?
4       1            ?      5
5       1            ?      9

我想置换所有分配机制,使 3 个分配给控制,2 个分配给治疗。换句话说,我不希望集合全是 1 或全 0,或者是 4 个 1 或 0 和 1 个 1 或 1 0。我希望每个排列都有 3 个 0 和 2 个 1,但该组中的项目不同. 然后我想看看这些分配的哪些版本(例如,如果 1 分配给治疗,2 控制,3 治疗,4 继续,5 治疗)导致结果与观察到的一样极端。我将如何在 R 中做到这一点?

4

3 回答 3

1

你可以用 来很好地做到这一点ri2,你可以用它来安装install.packages("ri2")

library(ri2)

dat <- data.frame(Y = c(10, 20, 15, 5, 9),
                  Z = c(0, 0, 0, 1, 1))

declaration <- declare_ra(N = 5, m = 2)

# All 10 possibilities
obtain_permutation_matrix(declaration)
#>      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#> [1,]    0    0    0    0    0    0    1    1    1     1
#> [2,]    0    0    0    1    1    1    0    0    0     1
#> [3,]    0    1    1    0    0    1    0    0    1     0
#> [4,]    1    0    1    0    1    0    0    1    0     0
#> [5,]    1    1    0    1    0    0    1    0    0     0

# Do randomization inference
ri_out <- conduct_ri(formula = Y ~ Z, declaration = declaration, data = dat)

# check out the 10 possibilities
ri_out$sims_df
#>         est_sim est_obs coefficient
#> Z.1  -8.0000000      -8           Z
#> Z.2   0.3333333      -8           Z
#> Z.3  -3.0000000      -8           Z
#> Z.4   4.5000000      -8           Z
#> Z.5   1.1666667      -8           Z
#> Z.6   9.5000000      -8           Z
#> Z.7  -3.8333333      -8           Z
#> Z.8  -7.1666667      -8           Z
#> Z.9   1.1666667      -8           Z
#> Z.10  5.3333333      -8           Z

# Get a p-value
summary(ri_out)
#>   coefficient estimate two_tailed_p_value null_ci_lower null_ci_upper
#> 1           Z       -8                0.2       -7.8125        8.5625
于 2018-02-05T17:42:01.963 回答
0

这很简单:

n <- 1000 # iterations
replicate(n, diff( by(df$Y[sample(1:nrow(df),nrow(df),FALSE)], 
                      df$Treatment,
                      mean) ) )

输出是均值差向量。

于 2013-10-23T14:48:31.037 回答
0

我假设您想彻底检查所有 120 个排列 -

library(data.table)
library(reshape2)

y <- c(10,20,15,5,9)

#getting all combinations    
allwduplicate <- data.table(expand.grid(p1 = 1:5, p2 = 1:5, p3 = 1:5, p4 = 1:5, p5 = 1:5) )
perms <- allwduplicate[(p1+p2+p3+p4+p5 == sum(1:5)) & (p1*p2*p3*p4*p5 == prod(1:5))] 

#melting dataset into easier structure
perms[,permid := 1:prod(1:5)]
perms <- data.table(melt(perms, id.vars = 'permid'))

# assigning treatement values
perms[,yvalue := y[value]]

# assigning whether treated or not
perms[,treated := 1]
perms[variable %in% c('p1','p2'),treated := 0]

# calculating means of treated 3 vs. non treated 2
perms <- merge(
   perms[treated == 1,list(yvmean1 = mean(yvalue)), by = c('permid')],
   perms[treated == 0,list(yvmean0 = mean(yvalue)), by = c('permid')],
   by = 'permid'
   )

# treatementdiff is the value you want, I think
perms[,treatementdiff := ymean1 - ymean0]
于 2013-10-23T15:11:23.043 回答