大编辑 - 试图简化输出和问题**
我在从之前的一组组合中提取变量组合时遇到问题(可能有一种更简单的方法,但这是我用 grepl 和循环完成这项工作的笨拙方法)。
它可以工作,但是使用更大的数据集需要很长时间,而且我找不到And
没有循环的标准答案。
在此示例中,我试图从“comb1”中删除包含 3 个“comb”组合的每个组。
如果有人可以提供帮助,这是我的代码。非常感激
mydata <- read.csv("Test.csv", sep=",", header=FALSE)
n1 <- 4
comb1 <- combn(mydata,n1,simplify=FALSE)
tofind <- c("V1 ","V3 ","V4","V6")
n2 <- 3
comb2 <- combn(tofind,n2)
temp <- list(data=NA,ncol=dim(comb2)[2])
for (j in 1:dim(comb2)[2]){
temp[[j]] <- which(grepl(comb2[1,j],comb1) & grepl(comb2[2,j],comb1) & grepl(comb2[3,j],comb1))
}
Remove <- sort(unique(unlist(temp)),decreasing=FALSE)
comb <- comb1[-Remove]
mydata
:
> mydata
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
1 A B C D E F G H I J
2 0.0908 0.0937 0.0871 0.0557 0.0144 0.0005 0.0189 0.0053 0.0154 -0.0021
3 0.0603 0.05 0.1312 -0.0204 -0.0186 -0.0256 0.0033 -0.0178 -0.014 -0.0686
4 0.0234 0.0144 -0.0955 -0.0076 -0.0101 0.0152 0.0215 0.0036 -0.0031 0.006
5 0.0385 -0.004 -0.1423 0.0479 -0.0013 -0.0109 0.0172 0.0277 0.005 0.0296
编辑 - 谢谢@MLavoie,这是dput
我的缩短mydata
> dput(mydata)
structure(list(V1 = structure(c(5L, 4L, 3L, 1L, 2L), .Label = c("0.0234",
"0.0385", "0.0603", "0.0908", "A"), class = "factor"), V2 = structure(c(5L,
4L, 3L, 2L, 1L), .Label = c("-0.004", "0.0144", "0.05", "0.0937",
"B"), class = "factor"), V3 = structure(c(5L, 3L, 4L, 1L, 2L), .Label = c("-0.0955",
"-0.1423", "0.0871", "0.1312", "C"), class = "factor"), V4 = structure(c(5L,
4L, 2L, 1L, 3L), .Label = c("-0.0076", "-0.0204", "0.0479", "0.0557",
"D"), class = "factor"), V5 = structure(c(5L, 4L, 3L, 2L, 1L), .Label = c("-0.0013",
"-0.0101", "-0.0186", "0.0144", "E"), class = "factor"), V6 = structure(c(5L,
3L, 2L, 4L, 1L), .Label = c("-0.0109", "-0.0256", "0.0005", "0.0152",
"F"), class = "factor"), V7 = structure(c(5L, 3L, 1L, 4L, 2L), .Label = c("0.0033",
"0.0172", "0.0189", "0.0215", "G"), class = "factor"), V8 = structure(c(5L,
3L, 1L, 2L, 4L), .Label = c("-0.0178", "0.0036", "0.0053", "0.0277",
"H"), class = "factor"), V9 = structure(c(5L, 4L, 2L, 1L, 3L), .Label = c("-0.0031",
"-0.014", "0.005", "0.0154", "I"), class = "factor"), V10 = structure(c(5L,
1L, 2L, 3L, 4L), .Label = c("-0.0021", "-0.0686", "0.006", "0.0296",
"J"), class = "factor")), .Names = c("V1", "V2", "V3", "V4",
"V5", "V6", "V7", "V8", "V9", "V10"), class = "data.frame", row.names = c(NA,
-5L))
该代码适用于我获取数据及其格式的方式,但问题在于顺序:which(grepl(combM[1,j],comb1) & grepl(combM[2,j],comb1) & grepl(combM[3,j],comb1))
在这个小示例中它很快,但对于更大的数据集则需要很长时间