0

大编辑 - 试图简化输出和问题**

我在从之前的一组组合中提取变量组合时遇到问题(可能有一种更简单的方法,但这是我用 grepl 和循环完成这项工作的笨拙方法)。

它可以工作,但是使用更大的数据集需要很长时间,而且我找不到And没有循环的标准答案。

在此示例中,我试图从“comb1”中删除包含 3 个“comb”组合的每个组。

如果有人可以提供帮助,这是我的代码。非常感激

mydata <- read.csv("Test.csv", sep=",", header=FALSE)
n1 <- 4
comb1 <- combn(mydata,n1,simplify=FALSE)
tofind <- c("V1 ","V3 ","V4","V6") 
n2 <- 3
comb2 <- combn(tofind,n2)
temp <- list(data=NA,ncol=dim(comb2)[2])
for (j in 1:dim(comb2)[2]){
  temp[[j]] <- which(grepl(comb2[1,j],comb1) & grepl(comb2[2,j],comb1) & grepl(comb2[3,j],comb1))
}
Remove <- sort(unique(unlist(temp)),decreasing=FALSE)
comb <- comb1[-Remove]

mydata

> mydata
      V1     V2      V3      V4      V5      V6     V7      V8      V9     V10
1      A      B       C       D       E       F      G       H       I       J
2 0.0908 0.0937  0.0871  0.0557  0.0144  0.0005 0.0189  0.0053  0.0154 -0.0021
3 0.0603   0.05  0.1312 -0.0204 -0.0186 -0.0256 0.0033 -0.0178  -0.014 -0.0686
4 0.0234 0.0144 -0.0955 -0.0076 -0.0101  0.0152 0.0215  0.0036 -0.0031   0.006
5 0.0385 -0.004 -0.1423  0.0479 -0.0013 -0.0109 0.0172  0.0277   0.005  0.0296

编辑 - 谢谢@MLavoie,这是dput我的缩短mydata

> dput(mydata)
structure(list(V1 = structure(c(5L, 4L, 3L, 1L, 2L), .Label = c("0.0234", 
"0.0385", "0.0603", "0.0908", "A"), class = "factor"), V2 = structure(c(5L, 
4L, 3L, 2L, 1L), .Label = c("-0.004", "0.0144", "0.05", "0.0937", 
"B"), class = "factor"), V3 = structure(c(5L, 3L, 4L, 1L, 2L), .Label = c("-0.0955", 
"-0.1423", "0.0871", "0.1312", "C"), class = "factor"), V4 = structure(c(5L, 
4L, 2L, 1L, 3L), .Label = c("-0.0076", "-0.0204", "0.0479", "0.0557", 
"D"), class = "factor"), V5 = structure(c(5L, 4L, 3L, 2L, 1L), .Label = c("-0.0013", 
"-0.0101", "-0.0186", "0.0144", "E"), class = "factor"), V6 = structure(c(5L, 
3L, 2L, 4L, 1L), .Label = c("-0.0109", "-0.0256", "0.0005", "0.0152", 
"F"), class = "factor"), V7 = structure(c(5L, 3L, 1L, 4L, 2L), .Label = c("0.0033", 
"0.0172", "0.0189", "0.0215", "G"), class = "factor"), V8 = structure(c(5L, 
3L, 1L, 2L, 4L), .Label = c("-0.0178", "0.0036", "0.0053", "0.0277", 
"H"), class = "factor"), V9 = structure(c(5L, 4L, 2L, 1L, 3L), .Label = c("-0.0031", 
"-0.014", "0.005", "0.0154", "I"), class = "factor"), V10 = structure(c(5L, 
1L, 2L, 3L, 4L), .Label = c("-0.0021", "-0.0686", "0.006", "0.0296", 
"J"), class = "factor")), .Names = c("V1", "V2", "V3", "V4", 
"V5", "V6", "V7", "V8", "V9", "V10"), class = "data.frame", row.names = c(NA, 
-5L))

该代码适用于我获取数据及其格式的方式,但问题在于顺序:which(grepl(combM[1,j],comb1) & grepl(combM[2,j],comb1) & grepl(combM[3,j],comb1))在这个小示例中它很快,但对于更大的数据集则需要很长时间

4

0 回答 0