1

我将 Spark Job Server 0.6.2 与 Spark 1.6.0 一起使用,在一些作业提交尝试中,我得到以下异常:

[ERROR] 2016-11-16 08:01:59,595 spark.jobserver.context.DefaultSparkContextFactory$$anon$1 logError - Error initializing SparkContext.
java.lang.NullPointerException
at org.apache.spark.scheduler.TaskSchedulerImpl.<init>(TaskSchedulerImpl.scala:106)
at org.apache.spark.scheduler.TaskSchedulerImpl.<init>(TaskSchedulerImpl.scala:60)
at org.apache.spark.SparkContext$.org$apache$spark$SparkContext$$createTaskScheduler(SparkContext.scala:2630)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:522)
at spark.jobserver.context.DefaultSparkContextFactory$$anon$1.<init>(SparkContextFactory.scala:53)
at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:53)
at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48)
at spark.jobserver.context.SparkContextFactory$class.makeContext(SparkContextFactory.scala:37)
at spark.jobserver.context.DefaultSparkContextFactory.makeContext(SparkContextFactory.scala:48)
at spark.jobserver.JobManagerActor.createContextFromConfig(JobManagerActor.scala:378)
at spark.jobserver.JobManagerActor$$anonfun$wrappedReceive$1.applyOrElse(JobManagerActor.scala:122)

可能是什么原因?

4

1 回答 1

0

看起来 jobserver 无法在您的配置文件中找到 spark 上下文配置。请使用有效的配置文件。例子:

spark {
  # spark.master will be passed to each job's JobContext
  # master = "local[4]"
  # master = "mesos://vm28-hulk-pub:5050"
   master = "yarn-client"

  # Default # of CPUs for jobs to use for Spark standalone cluster
  job-number-cpus = 2

  jobserver {
    port = 8090
    jar-store-rootdir = /opt/test/jobserver/jars

    jobdao = spark.jobserver.io.JobFileDAO

    filedao {
      rootdir = /opt/test/jobserver/data
    }
  }

  # predefined Spark contexts
  contexts {
  #   my-low-latency-context {
  #     num-cpu-cores = 1           # Number of cores to allocate.  Required.
  #     memory-per-node = 512m         # Executor memory per node, -Xmx style eg 512m, 1G, etc.
  #   }

  # define additional contexts here
  shared {
      num-cpu-cores = 1 # shared tasks work best in parallel.                                                                                                                                    
        memory-per-node = 1024M # trial-and-error discovered memory per nodes
    spark.yarn.executor.memoryOverhead = 512
        spark.yarn.am.memory = 1024m
        spark.yarn.am.memoryOverhead = 512

        spark.executor.instances = 14 # 4 r3.xlarge instances with 4 cores each = 16 + 1 master                                                                                                   
        spark.scheduler.mode = "FAIR"    
    }

  }

  # universal context configuration.  These settings can be overridden, see README.md
  context-settings {
    num-cpu-cores = 2           # Number of cores to allocate.  Required.
    memory-per-node = 512m         # Executor memory per node, -Xmx style eg 512m, #1G, etc.

    # in case spark distribution should be accessed from HDFS (as opposed to being installed on every mesos slave)
    # spark.executor.uri = "hdfs://namenode:8020/apps/spark/spark.tgz"

    # uris of jars to be loaded into the classpath for this context. Uris is a string list, or a string separated by commas ','
    # dependent-jar-uris = ["file:///some/path/present/in/each/mesos/slave/somepackage.jar"]

    # If you wish to pass any settings directly to the sparkConf as-is, add them here in passthrough,
    # such as hadoop connection settings that don't use the "spark." prefix
    passthrough {
      #es.nodes = "192.1.1.1"
    }
  }

  # This needs to match SPARK_HOME for cluster SparkContexts to be created successfully
  # home = "/home/spark/spark"
}
于 2017-01-12T05:53:42.853 回答