1

我在 Knime 中使用 R Learner。我想离散化一个矩阵,如下所示:

> my_matrix= as(knime.in,"matrix");
> dput(head(my_matrix, 5))
structure(c("KS", "OH", "NJ", "OH", "OK", "128", "107", "137", 
" 84", " 75", "415", "415", "415", "408", "415", "No", "No", 
"No", "Yes", "Yes", "Yes", "Yes", "No", "No", "No", "25", "26", 
" 0", " 0", " 0", "265.1", "161.6", "243.4", "299.4", "166.7", 
"110", "123", "114", " 71", "113", "45.07", "27.47", "41.38", 
"50.90", "28.34", "197.4", "195.5", "121.2", " 61.9", "148.3", 
" 99", "103", "110", " 88", "122", "16.78", "16.62", "10.30", 
" 5.26", "12.61", "244.7", "254.4", "162.6", "196.9", "186.9", 
" 91", "103", "104", " 89", "121", "11.01", "11.45", " 7.32", 
" 8.86", " 8.41", "10.0", "13.7", "12.2", " 6.6", "10.1", " 3", 
" 3", " 5", " 7", " 3", "2.70", "3.70", "3.29", "1.78", "2.73", 
"1", "1", "0", "2", "3", "False", "False", "False", "False", 
"False"), .Dim = c(5L, 20L), .Dimnames = list(c("Row0", "Row1", 
"Row2", "Row3", "Row4"), c("State", "Account length", "Area code", 
"International plan", "Voice mail plan", "Number vmail messages", 
"Total day minutes", "Total day calls", "Total day charge", "Total eve minutes", 
"Total eve calls", "Total eve charge", "Total night minutes", 
"Total night calls", "Total night charge", "Total intl minutes", 
"Total intl calls", "Total intl charge", "Customer service calls", 
"Churn")))

我正在使用以下代码来离散化矩阵:

require(arules)
#require(arulesViz)
my_matrix= as(knime.in,"matrix");
my_rows= nrow(my_matrix);
my_cols= ncol(my_matrix);
#discretize(x, method="interval", categories = 3, labels = NULL,     
#  ordered=FALSE, onlycuts=FALSE, ...)
typeof(my_matrix)
vector = my_matrix[,2]
my_matrix[,2] = discretize(vector, method="interval", categories = 3, labels=c("length0","length1","length2"))
my_matrix[,3] = ...
etc...

在对应的代码行中:

my_matrix[,2] = discretize(vector, method="interval", categories = 3, labels=c("length0","length1","length2"))

我收到以下错误:

seq.default 中的错误(from = min(x, na.rm = TRUE), to = max(x, na.rm = TRUE), : 'from' 不能是 NA、NaN 或无穷大

如果我把 "sum(is.na(vector)) 放在这里:

vector = my_matrix[,2]
sum(is.na(vector))
my_matrix[,2] = discretize(vector, method="interval", categories = 3, labels=c("length0","length1","length2"))

我得到:

> sum(is.na(vector))
[1] 0

所以我在向量中没有 NA 元素。无论如何, typeof(matrix) 是“字符”。如果我打印矢量,我会得到以下信息:

> vector = my_matrix[,2]
> sum(is.na(vector))
[1] 0
> head(vector, 20)
 Row0  Row1  Row2  Row3  Row4  Row5  Row6  Row7  Row8  Row9 Row10 Row11 Row12 
"128" "107" "137" " 84" " 75" "118" "121" "147" "117" "141" " 65" " 74" "168" 
Row13 Row14 Row15 Row16 Row17 Row18 Row19 
" 95" " 62" "161" " 85" " 93" " 76" " 73" 
4

1 回答 1

0

问题是您的向量由字符串组成。理想情况下,您可以在 knime 中解决此问题。这种转换的节点确实存在。

但是,您也可以替换

vector = my_matrix[,2]

经过

vector = as.numeric(my_matrix[,2])
于 2016-01-10T20:00:15.933 回答