我正在做一个将数据从 Elastic Search 加载到 HDFS 的 Spark 程序,但在初始化 SparkContext 时出现错误。运行作业时出错。错误是在制作火花会话期间。
Hadoop:3.2.1
火花:2.4.4
Elasticsearch Spark(适用于 Spark 2.X)» 7.5.1
电子病历:6.0.0
代码:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, to_date
spark = SparkSession \
.builder \
.getOrCreate()
错误日志:
20/10/22 10:09:12 ERROR SparkContext: Error initializing SparkContext.
java.util.ServiceConfigurationError: org.apache.spark.deploy.yarn.security.ServiceCredentialProvider: Provider org.elasticsearch.spark.deploy.yarn.security.EsServiceCredentialProvider could not be instantiated
at java.util.ServiceLoader.fail(ServiceLoader.java:232)
at java.util.ServiceLoader.access$100(ServiceLoader.java:185)
at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:384)
at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:404)
at java.util.ServiceLoader$1.next(ServiceLoader.java:480)
at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:44)
at scala.collection.Iterator.foreach(Iterator.scala:941)
at scala.collection.Iterator.foreach$(Iterator.scala:941)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
at scala.collection.IterableLike.foreach(IterableLike.scala:74)
at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62)
at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53)
at scala.collection.mutable.ListBuffer.$plus$plus$eq(ListBuffer.scala:184)
at scala.collection.mutable.ListBuffer.$plus$plus$eq(ListBuffer.scala:47)
at scala.collection.TraversableLike.to(TraversableLike.scala:678)
at scala.collection.TraversableLike.to$(TraversableLike.scala:675)
at scala.collection.AbstractTraversable.to(Traversable.scala:108)
at scala.collection.TraversableOnce.toList(TraversableOnce.scala:299)
at scala.collection.TraversableOnce.toList$(TraversableOnce.scala:299)
at scala.collection.AbstractTraversable.toList(Traversable.scala:108)
at org.apache.spark.deploy.yarn.security.YARNHadoopDelegationTokenManager.loadCredentialProviders(YARNHadoopDelegationTokenManager.scala:82)
at org.apache.spark.deploy.yarn.security.YARNHadoopDelegationTokenManager.getCredentialProviders(YARNHadoopDelegationTokenManager.scala:73)
at org.apache.spark.deploy.yarn.security.YARNHadoopDelegationTokenManager.<init>(YARNHadoopDelegationTokenManager.scala:46)
at org.apache.spark.deploy.yarn.Client.setupSecurityToken(Client.scala:308)
at org.apache.spark.deploy.yarn.Client.createContainerLaunchContext(Client.scala:1013)
at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:178)
at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:57)
at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:183)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:501)
at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:238)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.NoClassDefFoundError: org/apache/spark/deploy/yarn/security/ServiceCredentialProvider$class
at org.elasticsearch.spark.deploy.yarn.security.EsServiceCredentialProvider.<init>(EsServiceCredentialProvider.scala:63)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at java.lang.Class.newInstance(Class.java:442)
at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:380)
... 40 more
Caused by: java.lang.ClassNotFoundException: org.apache.spark.deploy.yarn.security.ServiceCredentialProvider$class
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:419)
at java.lang.ClassLoader.loadClass(ClassLoader.java:352)
... 47 more
20/10/22 10:09:12 INFO SparkUI: Stopped Spark web UI at http://ip-172-31-1-155.us-east-2.test:4040
20/10/22 10:09:12 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Attempted to request executors before the AM has registered!
20/10/22 10:09:12 INFO YarnClientSchedulerBackend: Stopped
20/10/22 10:09:12 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
20/10/22 10:09:12 INFO MemoryStore: MemoryStore cleared
20/10/22 10:09:12 INFO BlockManager: BlockManager stopped
20/10/22 10:09:12 INFO BlockManagerMaster: BlockManagerMaster stopped
20/10/22 10:09:12 WARN MetricsSystem: Stopping a MetricsSystem that is not running
20/10/22 10:09:12 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
20/10/22 10:09:12 INFO SparkContext: Successfully stopped SparkContext
20/10/22 10:09:13 INFO ShutdownHookManager: Shutdown hook called
20/10/22 10:09:13 INFO ShutdownHookManager: Deleting directory /vol1/tmp/spark-b39bb8cc-5bc7-4721-89bd-8bd62b9e527e
20/10/22 10:09:13 INFO ShutdownHookManager: Deleting directory /vol1/tmp/spark-d94995f0-05b6-476f-935e-8ba501acbed3
at com.company.utils.ResourceScriptUtils.executeScript(ResourceScriptUtils.java:114)
at com.company.utils.ResourceScriptUtils.executeScript(ResourceScriptUtils.java:135)
at com.company.loader.impl.realTimeProcessing.RealTimeEsLoader.processJob(RealTimeEsLoader.java:232)
at com.company.loader.App.main(App.java:37)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:323)
at org.apache.hadoop.util.RunJar.main(RunJar.java:236)
org.apache.commons.exec.ExecuteException: Process exited with an error: 1 (Exit value: 1)
at org.apache.commons.exec.DefaultExecutor.executeInternal(DefaultExecutor.java:404)
at org.apache.commons.exec.DefaultExecutor.execute(DefaultExecutor.java:166)
at org.apache.commons.exec.DefaultExecutor.execute(DefaultExecutor.java:153)
at com.company.multijob.MultiJob$HadoopJob.call(MultiJob.java:50)
at com.company.multijob.MultiJob$HadoopJob.call(MultiJob.java:38)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
有人可以帮忙吗?谢谢。