0

背景: 我正在尝试递归地选择行并根据组合检查条件。我可以使用 iterpc 函数转储组合。但是,当我在更大的行数据集(输入文件)> 200 上重复 for 循环时,我收到错误 - “无法分配大小为 n GB 的向量”。

代码

带有 PARA 到 PARL 的 DATA 文件(提供的参数,主要是数字)
            data1 <- file.choose(read.csv(), stringasFactors =FALSE)



    #### Combination prediction using iterpc and a loop 
         to check condition success of subset rows #####

    require(iterpc)
                getComboChunks <- function(n, k, chunkSize, totalCombos, myFile, myTestFile) {
                 myIter <- iterpc(n, k)

                  ## initialized myFile
                  myCombs <- getnext(myIter, chunkSize)
                  write.table(myCombs, file = myFile, sep = ",", col.names = FALSE)

                  maxIteration <- (totalCombos - chunkSize) %/% chunkSize

                  for (i in 1:maxIteration) {
                    ## get the next "chunkSize" of combinations
                    myCombs <- getnext(myIter, chunkSize)

                    ## append the above combinations to your file
                    write.table(myCombs, file = myFile, sep = ",",
                                col.names = FALSE , append = TRUE)


                      o <- 1
                      namee <-  subset(data1, SNO %in% myCombs)

                      a <- sum(namee$Weight)
                      h <- (sum(namee$PARA*namee$Weight))/a
                      f <- (sum(namee$PARB*namee$Weight))/a
                      g <- (sum(namee$PARC*namee$Weight))/a
                      l <- (sum(namee$PARE*namee$Weight))/a
                      m <- (sum(namee$PARF*namee$Weight))/a
                      n <- (sum(namee$PARD*namee$Weight))/a
                      p <- (sum(namee$PARG*namee$Weight))/a
                      q <- (sum(namee$ParH*namee$Weight))/a
                      r <- (sum(namee$PARI)) 
                      pr <- (sum(namee$pr))
                      le <- (sum(namee$PARJ*namee$Weight))/a
                      PM[is.na(PM)] <- 0

                      k <- ifelse(aska <= a && askle <= le  
                           && askh <= h  && askf <= f  
                           &&  askg <= g  && askl <= l && askm <= m 
                           && askn <= n  && askp <= p  && askq <= q  
                           && askr <= r && pr >=askpr && a <aska2 
                           && le < askle2 && g <askg2 && f <askf2 
                           && h <askh2&& l <askl2 && m <askm2 
                           && n <askn2 && p <askp2 && q <askq2 
                           && r <askr2, "Success","Failure")

                      if (k == "Success")
                              {
                        PM$SNO <- as.character(PM$SNO)
                        Masterlist$SNO <- as.character(Masterlist$SNO)
                        PM[is.na(PM)] <- 0
                        List   <- rbind(List, as.vector(c(i,a,h,f,g,l,m,n,p,q,le,r,k)))
                        print("Success")
                        }
                      if (k == "Failure"){
                        print("Failure")
                        print(i)
                      }


                      rm(namee)
                      o <- o+1
                      gc() 
                        }


                    myTests <- List

                    ## append the above combinations to your file
                    write.table(myTests, file = myTestFile, sep = ",",
                                col.names = FALSE , append = TRUE)
                   }
包含 200 行的数据集
getComboChunks(200, 5, 1, 2535650040, "myCombos1.csv", "myTests.csv")

解决方案 通过顺序刷新内存来递归检查条件以处理更大的数据集(200 或更多行)。我相信我收到错误“无法分配大小为 n GB 的向量”,因为以下行

 List   <- rbind(List, as.vector(c(i,a,h,f,g,l,m,n,p,q,le,r,k)))

这可以通过预先分配一个巨大的向量并转储值而不是 rbind 函数来消除。另外,每次运行后是否可以刷新内存?

4

0 回答 0