我研究缺失,我试图在 R 中现有的缺失之上随机降级数据集。基本上我想要做的是随机选择我的数据框中的位置并将它们更改为 NA,但前提是它们已经不是 NA。我已经编写了下面的函数,但是在较大的数据帧上它确实很慢,即使运行了几个小时后它也不会生成解决方案。有没有人有更好的方法来随机降级数据(使用 NA 检查)或知道提高功能效率的方法?非常感谢您!
degradefunction<- function(x,nrow1, ncol1,del.amount){
repeat{
#Generate random row numbers
rowsample<-sample(1:nrow1,del.amount,replace=TRUE)
#Generate random column numbers
colsample<-sample(1:ncol1, del.amount, replace=TRUE)
x.del<-x
#For each deletion, with a randomly selected row and column location
for (i in 1:del.amount){
#Test if the randomly selected location is not an NA
if(is.na(x.del[rowsample[i],colsample[i]])==FALSE)
#If it is not NA but is a number make it NA now
{x.del[rowsample[i],colsample[i]]<-NA}
#If our randomly selected location IS an NA
else {
#Get a new randomly selected location (row and column)
resamplenrow1<-sample(1:nrow1,1,replace=TRUE)
resamplencol1<-sample(1:ncol1,1,replace=TRUE)
#See if the new location is an NA
if(is.na(x.del[resamplenrow1,resamplencol1])==FALSE){
#If the randomly selected location is not an NA, make it an NA
x.del[resamplenrow1,resamplencol1]<-NA}}}
#Break the loop when the amount of data in the dataframe has the number of
#missing values we want (in a case starting with 0 NAs we want it to break
#when the number of NAs in x.del is equal to the del.amount, but when we
#do this with data including missingness we want the number of missing
#values to equal the deletion amount plus the number of NAs previously
#occurring in the data.
if(sum(is.na(x.del))==(del.amount + (sum(is.na(x))))){break}
}
#Give back the degraded data
return(x.del)
}
#Test the degrade function
#On data with 0 previous missingness
mypractice<-matrix(c(1,2,3, 4), nrow=10, ncol=4)
mypractice
mypractice<-as.data.frame(mypractice)
str(mypractice)
sum(is.na(mypractice))
newvarx<-degradefunction(mypractice, 10,4,16)
newvarx
sum(is.na(newvarx))
#The sum of missingness is now 16
#On data with previous missingness
mypractice<-matrix(c(1,2,3,NA), nrow=10, ncol=4)
mypractice
mypractice<-as.data.frame(mypractice)
str(mypractice)
sum(is.na(mypractice))
newvarx<-degradefunction(mypractice, 10,4,16)
newvarx
sum(is.na(newvarx))
#We get a total missingness (26) of the missingness we induced (16)
#and the missingness in the original data (10)