1

我正在尝试在 Eclipse 中使用 Scala 语言中的 Spark MLlib 算法。编译期间没有问题,运行时出现“NoSuchMethodError”错误。

这是我的代码#Copied

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.regression.LinearRegressionWithSGD
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib._


object LinearRegression {
   def truncate(k: Array[String], n: Int): List[String] = {
        var trunced = k.take(n - 1) ++ k.drop(n)
       // println(trunced.length)
       return trunced.toList
       }
    }
  def main(args: Array[String]): Unit = {

    val sc = new SparkContext(new SparkConf().setAppName("linear     regression").setMaster("local"))

//Loading Data
    val data = sc.textFile("D://Innominds//DataSets//Regression//Regression Dataset.csv")
    println("Total no of instances :" + data.count())

//Split the data into training and testing
    val split = data.randomSplit(Array(0.8, 0.2))
    val train = split(0).cache()
    println("Training instances :" + train.count())
    val test = split(1).cache()
    println("Testing instances :" + test.count())
    //Mapping the data
    val trainingRDD = train.map {
      line =>
        val parts = line.split(',')
        //println(parts.length)
        LabeledPoint(parts(5).toDouble, Vectors.dense(truncate(parts,    5).map(x => x.toDouble).toArray))
    }
    val testingRDD = test.map {
      line =>
    val parts = line.split(',')
    LabeledPoint(parts(5).toDouble, Vectors.dense(truncate(parts, 5).map(x => x.toDouble).toArray))
}

val model = LinearRegressionWithSGD.train(trainingRDD, 20)

val predict = testingRDD.map { x =>
  val score = model.predict(x.features)
  (score, x.label)
}

val loss = predict.map {
  case (p, l) =>
    val err = p - l
    err * err
}.reduce(_ + _)

val rmse = math.sqrt(loss / test.count())

println("Test RMSE = " + rmse)

sc.stop()

}

开发模型时出现错误,即

Var model = LInearRegressionWithSGD(trainingRDD,20).

此行之前的打印语句完美地在控制台上打印值。

pom.Xml 中的依赖项是:

<dependencies>
    <dependency>
        <groupId>org.scala-lang</groupId>
        <artifactId>scala-library</artifactId>
        <version>${scala.version}</version>
    </dependency>
    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.4</version>
        <scope>test</scope>
    </dependency>
    <dependency>
        <groupId>org.specs</groupId>
        <artifactId>specs</artifactId>
        <version>1.2.5</version>
        <scope>test</scope>
    </dependency>

    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-core_2.11</artifactId>
        <version>1.2.1</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-mllib_2.11</artifactId>
        <version>1.3.0</version>
    </dependency>
    <dependency>
        <groupId>com.google.guava</groupId>
        <artifactId>guava</artifactId>
        <version>14.0.1</version>
    </dependency>
</dependencies>

日食中的错误:

15/03/19 15:11:32 INFO SparkContext: Created broadcast 6 from broadcast at     GradientDescent.scala:185
    Exception in thread "main" java.lang.NoSuchMethodError:     org.apache.spark.rdd.RDD.treeAggregate$default$4(Ljava/lang/Object;)I
        at          org.apache.spark.mllib.optimization.GradientDescent$$anonfun$runMiniBatchSGD$1.a pply$mcVI$sp(GradientDescent.scala:189)
        at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:166)
        at     org.apache.spark.mllib.optimization.GradientDescent$.runMiniBatchSGD(GradientDes cent.scala:184)
        at     org.apache.spark.mllib.optimization.GradientDescent.optimize(GradientDescent.sca la:107)
        at org.apache.spark.mllib.regression.GeneralizedLinearAlgorithm.run(GeneralizedLine arAlgorithm.scala:263)
        at 
    org.apache.spark.mllib.regression.GeneralizedLinearAlgorithm.run(GeneralizedLine arAlgorithm.scala:190)
        at     org.apache.spark.mllib.regression.LinearRegressionWithSGD$.train(LinearRegressio n.scala:150)
        at     org.apache.spark.mllib.regression.LinearRegressionWithSGD$.train(LinearRegressio n.scala:184)
        at Algorithms.LinearRegression$.main(LinearRegression.scala:46)
        at Algorithms.LinearRegression.main(LinearRegression.scala)
4

1 回答 1

1

您正在使用spark-core1.2.1 和spark-mllib1.3.0。确保对两个依赖项使用相同的版本。

于 2015-03-19T12:25:10.507 回答