0

我正在尝试在我的 kubernetes 环境中使用 Spark 运算符运行 spark 作业。我所有的应用程序 jar 和依赖项都存储在 s3 中。当我使用简单的 spark 命令时,我的工作运行良好。但是,当我提供以下操作员 yaml 时。作业未启动。

apiVersion: "sparkoperator.k8s.io/v1beta2"
kind: SparkApplication
metadata:
  name: sparkapplication-25
spec:
  type: Scala
  mode: cluster
  image: "artifactory-sdf.com/spark/spark-py:v3.1.1-h3-2"
  imagePullPolicy: IfNotPresent
  mainClass: com.sdf.test.SparkOperator
  mainApplicationFile: "s3a://test/jars/sparkoperatortest.jar"
  arguments:
    - "s3a://test/json/batch_job.json"
  deps:
    jars:
    - "s3a:///test/jars/dep-jar.jar" 
  sparkVersion: "3.1.1"
  restartPolicy:
    type: Never
  volumes:
    - name: "test-volume"
      hostPath:
        path: "/tmp"
        type: Directory
  driver:
    cores: 1
    coreLimit: "1200m"
    memory: "512m"
    labels:
      version: 3.1.1
    serviceAccount: spark-user
    volumeMounts:
      - name: "test-volume"
        mountPath: "/tmp"
  executor:
    cores: 1
    instances: 2
    memory: "512m"
    labels:
      version: 3.1.1
    volumeMounts:
      - name: "test-volume"
        mountPath: "/tmp"
  sparkConf:
    "spark.kubernetes.file.upload.path": "s3a://spark/jars/tmp"
    "spark.hadoop.fs.s3a.access.key": "user"
    "spark.hadoop.fs.s3a.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem"
    "spark.hadoop.fs.s3a.fast.upload": "true"
    "spark.hadoop.fs.s3a.secret.key": "user@123"
    "spark.hadoop.fs.s3a.endpoint": "http://[457b:206:654e:m757:2:1:0]:32333"
    "spark.hadoop.fs.s3a.path.style.access": "true"
    "spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.claimName": "OnDemand"
    "spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.storageClass": "robin"
    "spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.sizeLimit": "200Gi"
    "spark.kubernetes.executor.volumes.persistentVolumeClaim.data.mount.path": "/tmp/spark-local-dir"
    "spark.kubernetes.executor.volumes.persistentVolumeClaim.data.mount.readOnly": "false"
    "spark.ui.port": "4041"

使用以下命令描述 sparkapplication 时

kubectl describe sparkapplications sparkapplication-25 -n spark-jobs

它显示以下错误

Name:         sparkapplication-25
Namespace:    spark-jobs
Labels:       <none>
Annotations:  API Version:  sparkoperator.k8s.io/v1beta2
Kind:         SparkApplication
Metadata:
  Creation Timestamp:  2021-08-24T06:03:24Z
  Generation:          1
  Managed Fields:
    API Version:  sparkoperator.k8s.io/v1beta2
    Fields Type:  FieldsV1
    fieldsV1:
      f:metadata:
        f:annotations:
          .:
          f:kubectl.kubernetes.io/last-applied-configuration:
      f:spec:
        .:
        f:arguments:
        f:deps:
          .:
          f:jars:
        f:driver:
          .:
          f:coreLimit:
          f:cores:
          f:labels:
            .:
            f:version:
          f:memory:
          f:serviceAccount:
          f:volumeMounts:
        f:executor:
          .:
          f:cores:
          f:instances:
          f:labels:
            .:
            f:version:
          f:memory:
          f:volumeMounts:
        f:image:
        f:imagePullPolicy:
        f:mainApplicationFile:
        f:mainClass:
        f:mode:
        f:restartPolicy:
          .:
          f:type:
        f:sparkConf:
          .:
          f:spark.hadoop.fs.s3a.access.key:
          f:spark.hadoop.fs.s3a.endpoint:
          f:spark.hadoop.fs.s3a.fast.upload:
          f:spark.hadoop.fs.s3a.impl:
          f:spark.hadoop.fs.s3a.path.style.access:
          f:spark.hadoop.fs.s3a.secret.key:
          f:spark.kubernetes.executor.volumes.persistentVolumeClaim.data.mount.path:
          f:spark.kubernetes.executor.volumes.persistentVolumeClaim.data.mount.readOnly:
          f:spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.claimName:
          f:spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.sizeLimit:
          f:spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.storageClass:
          f:spark.kubernetes.file.upload.path:
          f:spark.ui.port:
        f:sparkVersion:
        f:type:
        f:volumes:
    Manager:      kubectl
    Operation:    Update
    Time:         2021-08-24T06:03:24Z
    API Version:  sparkoperator.k8s.io/v1beta2
    Fields Type:  FieldsV1
    fieldsV1:
      f:status:
        .:
        f:applicationState:
          .:
          f:errorMessage:
          f:state:
        f:driverInfo:
        f:lastSubmissionAttemptTime:
        f:submissionAttempts:
        f:terminationTime:
    Manager:         spark-operator
    Operation:       Update
    Time:            2021-08-24T06:03:27Z
  Resource Version:  155238262
  UID:               f4db9dfe-3339-4923-a771-6720ca59c511
spec:
  type: Scala
  mode: cluster
  image: "artifactory-sdf.com/spark/spark-py:v3.1.1-h3-2"
  imagePullPolicy: IfNotPresent
  mainClass: com.sdf.test.SparkOperator
  mainApplicationFile: "s3a://test/jars/sparkoperatortest.jar"
  arguments:
    - "s3a://test/json/batch_job.json"
  deps:
    jars:
    - "s3a:///test/jars/dep-jar.jar" 
  sparkVersion: "3.1.1"
  restartPolicy:
    type: Never
  volumes:
    - name: "test-volume"
      hostPath:
        path: "/tmp"
        type: Directory
  driver:
    cores: 1
    coreLimit: "1200m"
    memory: "512m"
    labels:
      version: 3.1.1
    serviceAccount: spark-user
    volumeMounts:
      - name: "test-volume"
        mountPath: "/tmp"
  executor:
    cores: 1
    instances: 2
    memory: "512m"
    labels:
      version: 3.1.1
    volumeMounts:
      - name: "test-volume"
        mountPath: "/tmp"
  sparkConf:
    "spark.kubernetes.file.upload.path": "s3a://spark/jars/tmp"
    "spark.hadoop.fs.s3a.access.key": "user"
    "spark.hadoop.fs.s3a.impl": "org.apache.hadoop.fs.s3a.S3AFileSystem"
    "spark.hadoop.fs.s3a.fast.upload": "true"
    "spark.hadoop.fs.s3a.secret.key": "user@123"
    "spark.hadoop.fs.s3a.endpoint": "http://[457b:206:654e:m757:2:1:0]:32333"
    "spark.hadoop.fs.s3a.path.style.access": "true"
    "spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.claimName": "OnDemand"
    "spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.storageClass": "robin"
    "spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.sizeLimit": "200Gi"
    "spark.kubernetes.executor.volumes.persistentVolumeClaim.data.mount.path": "/tmp/spark-local-dir"
    "spark.kubernetes.executor.volumes.persistentVolumeClaim.data.mount.readOnly": "false"
    "spark.ui.port": "4041"

Status:
  Application State:
    Error Message:  failed to run spark-submit for SparkApplication spark-jobs/sparkapplication-25: WARNING: An illegal reflective access operation has occurred
WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/opt/spark/jars/spark-unsafe_2.12-3.1.1.jar) to constructor java.nio.DirectByteBuffer(long,int)
WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform
WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations
WARNING: All illegal access operations will be denied in a future release
21/08/24 06:03:26 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Exception in thread "main" java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.fs.s3a.S3AFileSystem not found
  at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2197)
  at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2654)
  at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2667)
  at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:94)
  at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2703)
  at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2685)
  at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:373)
  at org.apache.spark.deploy.DependencyUtils$.resolveGlobPath(DependencyUtils.scala:191)
  at org.apache.spark.deploy.DependencyUtils$.$anonfun$resolveGlobPaths$2(DependencyUtils.scala:147)
  at org.apache.spark.deploy.DependencyUtils$.$anonfun$resolveGlobPaths$2$adapted(DependencyUtils.scala:145)
  at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245)
  at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
  at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
  at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:38)
  at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245)
  at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242)
  at scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
  at org.apache.spark.deploy.DependencyUtils$.resolveGlobPaths(DependencyUtils.scala:145)
  at org.apache.spark.deploy.SparkSubmit.$anonfun$prepareSubmitEnvironment$4(SparkSubmit.scala:363)
  at scala.Option.map(Option.scala:230)
  at org.apache.spark.deploy.SparkSubmit.prepareSubmitEnvironment(SparkSubmit.scala:363)
  at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:894)
  at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
  at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
  at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
  at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1030)
  at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1039)
  at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassNotFoundException: Class org.apache.hadoop.fs.s3a.S3AFileSystem not found
  at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101)
  at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2195)
  ... 27 more

    State:  FAILED
  Driver Info:
  Last Submission Attempt Time:  2021-08-24T06:03:27Z
  Submission Attempts:           1
  Termination Time:              <nil>
Events:
  Type     Reason                            Age   From            Message
  ----     ------                            ----  ----            -------
  Normal   SparkApplicationAdded             90s   spark-operator  SparkApplication sparkapplication-25 was added, enqueuing it for submission
  Warning  SparkApplicationSubmissionFailed  87s   spark-operator  failed to submit SparkApplication sparkapplication-25: failed to run spark-submit for SparkApplication spark-jobs/sparkapplication-25: WARNING: An illegal reflective access operation has occurred
WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/opt/spark/jars/spark-unsafe_2.12-3.1.1.jar) to constructor java.nio.DirectByteBuffer(long,int)
WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform
WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations
WARNING: All illegal access operations will be denied in a future release
21/08/24 06:03:26 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Exception in thread "main" java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.fs.s3a.S3AFileSystem not found
  at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2197)
  at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2654)
  at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2667)
  at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:94)
  at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2703)
  at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2685)
  at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:373)
  at org.apache.spark.deploy.DependencyUtils$.resolveGlobPath(DependencyUtils.scala:191)
  at org.apache.spark.deploy.DependencyUtils$.$anonfun$resolveGlobPaths$2(DependencyUtils.scala:147)
  at org.apache.spark.deploy.DependencyUtils$.$anonfun$resolveGlobPaths$2$adapted(DependencyUtils.scala:145)
  at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245)
  at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
  at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
  at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:38)
  at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245)
  at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242)
  at scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
  at org.apache.spark.deploy.DependencyUtils$.resolveGlobPaths(DependencyUtils.scala:145)
  at org.apache.spark.deploy.SparkSubmit.$anonfun$prepareSubmitEnvironment$4(SparkSubmit.scala:363)
  at scala.Option.map(Option.scala:230)
  at org.apache.spark.deploy.SparkSubmit.prepareSubmitEnvironment(SparkSubmit.scala:363)
  at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:894)
  at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
? at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
  at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
  at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1030)
  at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1039)
  at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassNotFoundException: Class org.apache.hadoop.fs.s3a.S3AFileSystem not found
           at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101)
           at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2195)
           ... 27 more
  Warning  SparkApplicationFailed  87s  spark-operator  SparkApplication sparkapplication-25 failed: failed to run spark-submit for SparkApplication spark-jobs/sparkapplication-25: WARNING: An illegal reflective access operation has occurred
WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/opt/spark/jars/spark-unsafe_2.12-3.1.1.jar) to constructor java.nio.DirectByteBuffer(long,int)
WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform
WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations
WARNING: All illegal access operations will be denied in a future release
21/08/24 06:03:26 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Exception in thread "main" java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.fs.s3a.S3AFileSystem not found
  at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2197)
  at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2654)
  at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2667)
  at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:94)
  at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2703)
  at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2685)
  at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:373)
  at org.apache.spark.deploy.DependencyUtils$.resolveGlobPath(DependencyUtils.scala:191)
  at org.apache.spark.deploy.DependencyUtils$.$anonfun$resolveGlobPaths$2(DependencyUtils.scala:147)
  at org.apache.spark.deploy.DependencyUtils$.$anonfun$resolveGlobPaths$2$adapted(DependencyUtils.scala:145)
  at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245)
  at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
  at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
  at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:38)
  at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245)
  at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242)
  at scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
  at org.apache.spark.deploy.DependencyUtils$.resolveGlobPaths(DependencyUtils.scala:145)
  at org.apache.spark.deploy.SparkSubmit.$anonfun$prepareSubmitEnvironment$4(SparkSubmit.scala:363)
  at scala.Option.map(Option.scala:230)
  at org.apache.spark.deploy.SparkSubmit.prepareSubmitEnvironment(SparkSubmit.scala:363)
  at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:894)
  at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
  at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
  at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
  at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1030)
  at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1039)
  at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassNotFoundException: Class org.apache.hadoop.fs.s3a.S3AFileSystem not found
  at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101)
  at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2195)
  ... 27 more

所有的 hadoop-aws 依赖 jar 都已经存在于我们的镜像中,并且其他没有提供依赖 jar 和文件的作业运行良好,即使应用程序 jar 仍然存在于 s3 中。

我在这里错过了一些财产吗?

4

1 回答 1

0

我们最终通过构建具有所有必要依赖项的映像来解决此问题。只是使用了类似 local:///path/to/jar/in/container/dep.jar

于 2022-01-13T14:47:31.900 回答