我想在 apache beam 提供的 spark 集群中运行 wordcount 示例。但是当我将这个项目提交给 spark 时,它会抛出这个异常。
与stackoverflow中的相同问题相同的问题 但是我的spark集群是v2.0.0,无法通过将spark版本更改为v1.6.0来解决。有什么想法可以在不更改 spark 版本的情况下解决这个问题吗?
17/11/15 09:51:38 WARN SparkConf: In Spark 1.0 and later spark.local.dir will be overridden by the value set by the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone and LOCAL_DIRS in YARN).
Exception in thread "main" java.lang.RuntimeException: java.lang.AssertionError: assertion failed: copyAndReset must return a zero value copy
at org.apache.beam.runners.spark.SparkPipelineResult.runtimeExceptionFrom(SparkPipelineResult.java:55)
at org.apache.beam.runners.spark.SparkPipelineResult.beamExceptionFrom(SparkPipelineResult.java:72)
at org.apache.beam.runners.spark.SparkPipelineResult.waitUntilFinish(SparkPipelineResult.java:99)
at org.apache.beam.runners.spark.SparkPipelineResult.waitUntilFinish(SparkPipelineResult.java:87)
at org.apache.beam.examples.WordCount.main(WordCount.java:219)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:729)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.AssertionError: assertion failed: copyAndReset must return a zero value copy
at scala.Predef$.assert(Predef.scala:170)
at org.apache.spark.util.AccumulatorV2.writeReplace(AccumulatorV2.scala:157)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at java.io.ObjectStreamClass.invokeWriteReplace(ObjectStreamClass.java:1118)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1136)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:348)
at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:43)
at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:100)
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:295)
at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:288)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:108)
at org.apache.spark.SparkContext.clean(SparkContext.scala:2037)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1.apply(RDD.scala:763)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1.apply(RDD.scala:762)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:358)
at org.apache.spark.rdd.RDD.mapPartitions(RDD.scala:762)
at org.apache.spark.api.java.JavaRDDLike$class.mapPartitionsToPair(JavaRDDLike.scala:187)
at org.apache.spark.api.java.AbstractJavaRDDLike.mapPartitionsToPair(JavaRDDLike.scala:45)
at org.apache.beam.runners.spark.translation.TransformTranslator$6.evaluate(TransformTranslator.java:393)
at org.apache.beam.runners.spark.translation.TransformTranslator$6.evaluate(TransformTranslator.java:351)
at org.apache.beam.runners.spark.SparkRunner$Evaluator.doVisitTransform(SparkRunner.java:412)
at org.apache.beam.runners.spark.SparkRunner$Evaluator.visitPrimitiveTransform(SparkRunner.java:398)
at org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:602)
at org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:594)
at org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:594)
at org.apache.beam.sdk.runners.TransformHierarchy$Node.visit(TransformHierarchy.java:594)
at org.apache.beam.sdk.runners.TransformHierarchy$Node.access$500(TransformHierarchy.java:276)
at org.apache.beam.sdk.runners.TransformHierarchy.visit(TransformHierarchy.java:210)
at org.apache.beam.sdk.Pipeline.traverseTopologically(Pipeline.java:440)
at org.apache.beam.runners.spark.SparkRunner$2.run(SparkRunner.java:211)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)