我正在尝试将PipelineModel
每个经过训练的决策树模型中的一个存储到一个Array
. 虽然我创建了一个数组,PipelineModelS
但我有以下不匹配错误:
<console>:96: error: type mismatch;
found : model.type (with underlying type org.apache.spark.ml.PipelineModel)
required: org.apache.spark.ml.PipelineModel.type
bestModels(i) = model // Here is the problem!!!
任何人都可以帮助我吗?鸢尾花的数据集(libsvm格式)可以在这里找到https://1drv.ms/u/s!Antm9EMPXrQmgP9zQhgdAdxUBSAtSA。这是示例代码:
import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.ml.classification.DecisionTreeClassificationModel
import org.apache.spark.ml.classification.DecisionTreeClassifier
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.{IndexToString, StringIndexer, VectorIndexer}
import org.apache.spark.sql.types._
val folds = 10
val data = spark.read.format("libsvm").load("/home/vitrion/Documents/iris.libsvm")
var accuracies = Array.fill(folds)(0.0)
var bestModels = Array.fill(folds)(PipelineModel) // This is the array of PipelineModelS
val Array(trainData, testData) = data.orderBy(rand()).randomSplit(Array(0.7, 0.3), seed = 1234L)
val foldedData = trainData.orderBy(rand()).randomSplit(Array.fill(10)(1.0 / folds))
val evaluator1 = new MulticlassClassificationEvaluator().setLabelCol("indexedLabel").setPredictionCol("prediction").setMetricName("accuracy")
for( i <- 0 to folds - 1 ){
var provTrainData = data.limit(0)
var provTestData = data.limit(0)
var foldStr = ""
for( j <- 0 to folds - 1){
var str = ""
if (i != j) {
provTrainData = provTrainData.union(foldedData(j))
str = "T"
} else {
provTestData = foldedData(i)
str = "S"
}
foldStr += str
}
println(foldStr)
val labelIndexer = new StringIndexer().setInputCol("label").setOutputCol("indexedLabel").fit(trainData)
val featureIndexer = new VectorIndexer().setInputCol("features").setOutputCol("indexedFeatures").setMaxCategories(6).fit(trainData)
val dt = new DecisionTreeClassifier().setLabelCol("indexedLabel").setFeaturesCol("indexedFeatures")
val labelConverter = new IndexToString().setInputCol("prediction").setOutputCol("predictedLabel").setLabels(labelIndexer.labels)
val pipeline = new Pipeline().setStages(Array(labelIndexer, featureIndexer, dt, labelConverter))
val model = pipeline.fit(provTrainData)
var provPredictions = model.transform(provTestData)
accuracies(i) = evaluator1.evaluate(provPredictions)
bestModels(i) = model // Here is the problem!!!
println("FOLD " + i + "\nAccuracy: " + accuracies(i))
}