5

我运行了一个循环 200 次,其中我:

  • 将我的数据集随机分为训练集和测试集

  • nnet()在训练集上使用 R 的命令拟合神经网络模型

  • 评估测试集的性能

我将每个模型保存到一个列表中。

现在我想使用组合模型进行样本外预测。combine为此,我在对象上使用了该功能randomForest对象是否有类似的组合命令nnet

我无法上传数据集,但下面是我目前正在使用的代码。它按原样工作,除了我寻求组合模型的命令的最后一行。

    n <- 200
    nnet_preds <- matrix(NA,  ncol = 1,  nrow = n)
    nnet_predstp <- matrix(NA,  ncol = 1,  nrow = n)
    nnet_predstn <- matrix(NA,  ncol = 1,  nrow = n)
    nnet_predsfptp <- matrix(NA,  ncol = 1,  nrow = n)
    nnet_predsfntp <- matrix(NA,  ncol = 1,  nrow = n)
    NN_predictions <- matrix(NA,  ncol = 1,  nrow = 10) 
    outcome_2010_NN <- testframe2[, "ytest"] 
    nn_model <- vector("list", n)

    data<-testframe2[, c("sctownpop", "sctownhh", "scnum_tribes", "sctownmusl_percap", "scmuslim", "scprop_stranger", "sctownstrg_percap", "scprop_domgroup", "scexcom", "sctownexcm_percap", "sctownretn_percap", "scprop_under30", "scprop_male", "scedulevel", "scprop_noeduc", "scprop_anypeace", "scgroup_prtcptn", "scpubcontr", "scsafecommdum", "scciviccommdum", "scoll_action_add", "scngodependent", "scgovtdependent", "scpolicourtscorr", "screlmarry", "scmslmnolead", "sccrime_scale", "scviol_scale", "sclandconf_scale", "sctownnlnd_percap", "scnolandnofarm", "scfarmocc", "scunemployed", "scwealthindex", "scwealth_inequality", "scviol_experienced", "scviol_part", "scanylndtake", "scdisp_ref", "sfacilities", "sfreq_visits", "sctot_resources", "scmeanprice_above75", "scmosquesdum", "scmnrt_ldrshp", "scany_majorconf", "sstate_presence", "sremote", "scmobilec", "scradio_low")]

    data = cbind(outcome_2010_NN, data)

    sampleSplit = round(nrow(data)-(nrow(data)/5))

    for(i in 1:n) {     

set.seed(06511+i)
    data_randomization <- data[sample(1:nrow(data),  dim(data)[1],  replace=FALSE), ]

    train <- data_randomization[1:sampleSplit, ]
    test  <- data_randomization[(sampleSplit+1):nrow(data), ]

    nn_model[[i]] <- nnet(outcome_2010_NN ~ sctownpop +           sctownhh+ scnum_tribes+ sctownmusl_percap+ scmuslim+          scprop_stranger+   sctownstrg_percap+ scprop_domgroup+     scexcom+  sctownexcm_percap+   sctownretn_percap+   scprop_under30 +  scprop_male+         scedulevel+          scprop_noeduc+       scprop_anypeace+     scgroup_prtcptn+     scpubcontr+          scsafecommdum+       scciviccommdum+      scoll_action_add+    scngodependent+      scgovtdependent+     scpolicourtscorr+    screlmarry+          scmslmnolead+        sccrime_scale+       scviol_scale+        sclandconf_scale+    sctownnlnd_percap+   scnolandnofarm+      scfarmocc+           scunemployed+        scwealthindex+       scwealth_inequality+ scviol_experienced+  scviol_part+         scanylndtake+        scdisp_ref+          sfacilities+         sfreq_visits+        sctot_resources+     scmeanprice_above75+ scmosquesdum+        scmnrt_ldrshp+       scany_majorconf+     sstate_presence+     sremote+             scmobilec+           scradio_low, 
    data=train,  size = 3,  decay = 0.1)# size=number of units/nodes in the (single_hidden layer); decay=parameter for weight decay. Default 0.

    predictions <- predict(nn_model[[i]],  test)

    nnpredorder<-rank(predictions)
    nncvpredictionsA50 <- ifelse( nnpredorder > 24,  1,  0 )    # manually optimized

    errors <- table(test[, "outcome_2010_NN"],  nncvpredictionsA50)             

    accuracy.rate <- (errors[1, 1]+errors[2, 2])/sum(errors)
    true.pos.rate <- (errors[2, 2]/(errors[2, 2]+errors[2, 1]))
    true.neg.rate <- (errors[1, 1]/(errors[1, 1]+errors[1, 2]))
    FPTP <- (errors[1, 2]/errors[2, 2])
    FNTP <- (errors[2, 1]/errors[2, 2])

    nnet_preds[i, ] <- accuracy.rate
    nnet_predstp[i, ] <- true.pos.rate
    nnet_predstn[i, ] <- true.neg.rate
    nnet_predsfptp[i, ] <- FPTP
    nnet_predsfntp[i, ] <- FNTP
}

    mean(nnet_preds); sd(nnet_preds)
    mean(nnet_predstp); sd(nnet_predstp)

    NN_predictions[1, ] <- mean(nnet_predstp) # TP accuracy rate (sensitivity)
    NN_predictions[2, ] <- sd(nnet_predstp) # TP accuracy rate (sensitivity)
    NN_predictions[3, ] <- mean(nnet_predstn)  # TN accuracy rate (specificity)
    NN_predictions[4, ] <- sd(nnet_predstn) # TN accuracy rate (specificity)
    NN_predictions[5, ] <- mean(nnet_preds)  # Accuracy rate
    NN_predictions[6, ] <- sd(nnet_preds) # Accuracy rate
    NN_predictions[7, ] <- mean(nnet_predsfptp)  # Ratio FP:TP
    NN_predictions[8, ] <- sd(nnet_predsfptp) # Ratio FP:TP
    NN_predictions[9, ] <- mean(nnet_predsfntp)  # Ratio FN:TP
    NN_predictions[10, ] <- sd(nnet_predsfntp) # Ratio FN:TP

    print(NN_predictions)

### Combine NN models  #Where `combine` is the randomForest command     
aggNNmodel <- do.call(combine, nn_model)
4

1 回答 1

1

您不应该使用随机森林的 combine 方法,因为它是用于决策树的。但是随机森林是增强算法,因此您应该能够使用增强算法来组合神经网络模型。

Boosting是一种结合弱学习器的方法,但没有规则反对使用像神经网络这样的强学习器进行提升。

Can a set of weak learners create a single strong learner?

使用像 AdaBoost 这样的提升算法,将神经网络作为弱学习器。谷歌搜索显示了 R 中的几个增强包。

于 2013-11-03T20:50:33.773 回答