0

我试图通过插入符号的confusionMatrix() 函数来理解预测对象的结果,该函数需要根据http://artax.karlin.mff.cuni.cz/r-help/library/caret/html/confusionMatrix 输入表格。在index.html中,我的 table() 创建了我理解的结果,但它对 chaosMatrix() 函数不友好。

这是相关的代码片段:

#MODEL CREATION
#convert categorical A to E values , into numeric 1 to 5 in order to be regression friendly
training_data_subset_numeric <- training_data_subset;
testing_data_subset_numeric <- testing_data_subset;
training_data_subset_numeric$classe <- as.numeric(training_data_subset$classe)
testing_data_subset_numeric$classe <- as.numeric(testing_data_subset$classe)
#model
exercise.model <- glm(formula = classe ~ ., data = training_data_subset_numeric)
#MODEL EVALUATION
exercise.prediction <- predict(exercise.model,newdata = testing_data_subset_numeric)
eval_table <- table(exercise.prediction,testing_data_subset$classe)
tail(eval_table)

exercise.prediction A B C D E
   4.35504232913594 1 0 0 0 0
   4.47219097065568 1 0 0 0 0
   4.50838854075835 1 0 0 0 0
   4.6173551930011  0 1 0 0 0
   4.69261223447305 0 1 0 0 0
   4.73297946213265 0 1 0 0 0

基本上我需要将上述输出转换为具有 1 col 的数据帧,对应于遵循此规则的预测值:

如果 A 列是 1 ,则预测值为 1

如果 B 列是 1 ,则预测值为 2

如果列 C 为 1 ,则预测值为 3

如果列 D 为 1 ,则预测值为 4

如果列 E 为 1 ,则预测值为 5

因此,我编写了这个函数来完成工作:

getPredictResults<- function(x)
{
# create 1 column & n row data frame
num <- data.frame(matrix(0, ncol = 1, nrow = nrow(x)));

for (r in 1:nrow(x) ) {

for (c in 1:ncol(x) ) {
    #if column A has value 1 than num[1,r] <- 1
     if (x[r,'A']== 1)
    {
        num[1,r] <- 1;
    }
    #if column B has value 1 than num[1,r] <- 2
    else if (x[r,'B']== 1)
    {
        num[1,r] <- 2;
    }
    #if column C has value 1 than num[1,r] <- 3
    else if (x[r,'C']== 1)
    {
        num[1,r] <- 3;
    }
    #if column D has value 1 than num[1,r] <- 4
    else if (x[r,'D']== 1)
    {
        num[1,r] <- 4;
    }
    #if column E has value 1 than num[1,r] <- 5
    else if (x[r,'E']== 1)
    {
        num[1,r] <- 5;
    }
    else
    {
    }

}#end inner for 

}#end outer for 

return (num);
}#end function

exercise.prediction_df <- getPredictResults(eval_table)

但是,在键入时:

head(exercise.prediction_df)

我得到一个不寻常的输出,这是底部的片段:

2    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
  V4840 V4841 V4842 V4843 V4844 V4845 V4846 V4847 V4848 V4849 V4850 V4851 V4852 V4853 V4854 V4855 V4856 V4857
1     5     1     4     5     2     2     5     5     1     2     5     4     5     5     1     5     5     4
2    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
  V4858 V4859 V4860 V4861 V4862 V4863 V4864 V4865 V4866 V4867 V4868 V4869 V4870 V4871 V4872 V4873 V4874 V4875
1     4     2     1     2     5     1     4     5     2     1     4     5     2     4     2     4     4     2
2    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
  V4876 V4877 V4878 V4879 V4880 V4881 V4882 V4883 V4884 V4885 V4886 V4887 V4888 V4889 V4890 V4891 V4892 V4893
1     5     1     1     4     1     2     2     1     1     5     1     4     1     1     1     1     1     1
2    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
  V4894 V4895 V4896 V4897 V4898 V4899 V4900 V4901 V4902 V4903 V4904
1     1     1     1     1     1     1     1     1     2     2     2
2    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
 [ reached getOption("max.print") -- omitted 4 rows ]

进一步调查显示:

> ncol(exercise.prediction_df)
[1] 4904
> nrow(exercise.prediction_df)
[1] 4904

其中 ncol() 应该只返回 1 & nrow() 显然可以是任何整数值。

如何修复此函数,以便创建正确的数据框作为confusionMatrix() 函数的输入?

谢谢。

4

1 回答 1

0
classe <- cut(runif(100), seq(0, 1, length.out = 5))
levels(a) <- c("A", "B", "C", "D", "E")
exercise.prediction <- rnorm(100)
eval_table <- table(exercise.prediction, classe)

eval_matrix <- as.matrix(tab)

transform <- apply(eval_matrix, 1, function(x) sum(x * c(1:5)))
head(as.data.frame(transform))
于 2015-07-21T03:37:35.187 回答