我在独立模式下运行 Spark + Alluxio 进行数据访问。更具体地说,我有 1 个火花大师和 1 个火花工人。
运行我的工作时,我收到以下错误:
17/03/22 14:35:43 WARN TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, 10.254.22.6): java.io.IOException: Frame size (67108864) larger than max length (16777216)!
at alluxio.AbstractClient.checkVersion(AbstractClient.java:112)
at alluxio.AbstractClient.connect(AbstractClient.java:175)
at alluxio.AbstractClient.retryRPC(AbstractClient.java:322)
at alluxio.client.file.FileSystemMasterClient.getStatus(FileSystemMasterClient.java:183)
at alluxio.client.file.BaseFileSystem.getStatus(BaseFileSystem.java:175)
at alluxio.client.file.BaseFileSystem.getStatus(BaseFileSystem.java:167)
at alluxio.hadoop.HdfsFileInputStream.<init>(HdfsFileInputStream.java:86)
at alluxio.hadoop.AbstractFileSystem.open(AbstractFileSystem.java:514)
at alluxio.hadoop.FileSystem.open(FileSystem.java:25)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:766)
at net.atos.hadoop.ImageRecordReader.initialize(ImageRecordReader.java:47)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:153)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:124)
at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:65)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:300)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:300)
at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:69)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:262)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:300)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:300)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
环境:
- 火花 1.5.2
- Alluxio 1.3.0