我得到以下错误
Error::java.lang.RuntimeException: 配置对象时出错。Spark 1.6.1 和 scala 2.10.5
使用 spark-shell --packages com.databricks:spark-csv_2.10:1.5.0 --driver-class-path /usr/iop/4.2.0.0/spark/lib/sqljdbc4.jar 启动 spark-shell
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.SQLContext
import sqlContext.implicits._
val conf = new SparkConf().setAppName("test").setMaster("local").set("spark.driver.allowMultipleContexts", "true");
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val df = sqlContext.read().format("com.databricks.spark.csv").option("inferScheme","true").option("header","true").load("path_to/data.csv");
df.show()
下面是完整的跟踪
./test.sh
Ivy Default Cache set to: /home/xxxx/at732615/.ivy2/cache
The jars for the packages stored in: /home/xxxx/at732615/.ivy2/jars
:: loading settings :: url = jar:file:/usr/iop/4.2.0.0/spark/lib/spark-assembly- 1.6.1_IBM_4-hadoop2.7.2-IBM-12.jar!/org/apache/ivy/core/settings/ivysettings.xml
com.databricks#spark-csv_2.10 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent;1.0
confs: [default]
found com.databricks#spark-csv_2.10;1.5.0 in central
found org.apache.commons#commons-csv;1.1 in central
found com.univocity#univocity-parsers;1.5.1 in central
:: resolution report :: resolve 331ms :: artifacts dl 5ms
:: modules in use:
com.databricks#spark-csv_2.10;1.5.0 from central in [default]
com.univocity#univocity-parsers;1.5.1 from central in [default]
org.apache.commons#commons-csv;1.1 from central in [default]
---------------------------------------------------------------------
| | modules || artifacts |
| conf | number| search|dwnlded|evicted|| number|dwnlded|
---------------------------------------------------------------------
| default | 3 | 0 | 0 | 0 || 3 | 0 |
---------------------------------------------------------------------
:: retrieving :: org.apache.spark#spark-submit-parent
confs: [default]
0 artifacts copied, 3 already retrieved (0kB/8ms)
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 1.6.1
/_/
Using Scala version 2.10.5 (OpenJDK 64-Bit Server VM, Java 1.8.0_77)
Type in expressions to have them evaluated.
Type :help for more information.
spark.yarn.driver.memoryOverhead is set but does not apply in client mode.
Spark context available as sc.
SQL context available as sqlContext.
Loading csv.scala...
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.SQLContext
import sqlContext.implicits._
conf: org.apache.spark.SparkConf = org.apache.spark.SparkConf@4ccfd904
sc: org.apache.spark.SparkContext = org.apache.spark.SparkContext@32a53a59
sqlContext: org.apache.spark.sql.SQLContext = org.apache.spark.sql.SQLContext@219acda9
java.lang.RuntimeException: Error in configuring object
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112)
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:185)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:198)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1307)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.RDD.take(RDD.scala:1302)
at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1342)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
at org.apache.spark.rdd.RDD.first(RDD.scala:1341)
at com.databricks.spark.csv.CsvRelation.firstLine$lzycompute(CsvRelation.scala:269)
at com.databricks.spark.csv.CsvRelation.firstLine(CsvRelation.scala:265)
at com.databricks.spark.csv.CsvRelation.inferSchema(CsvRelation.scala:242)
at com.databricks.spark.csv.CsvRelation.<init>(CsvRelation.scala:74)
at com.databricks.spark.csv.DefaultSource.createRelation(DefaultSource.scala:171)
at com.databricks.spark.csv.DefaultSource.createRelation(DefaultSource.scala:44)
at org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:158)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:119)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:109)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:42)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:44)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:46)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:48)
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:50)
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:52)
at $iwC$$iwC$$iwC.<init>(<console>:54)
at $iwC$$iwC.<init>(<console>:56)
at $iwC.<init>(<console>:58)
at <init>(<console>:60)
at .<init>(<console>:64)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$interpretAllFrom$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(SparkILoop.scala:680)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$interpretAllFrom$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$mcV$sp$2.apply(SparkILoop.scala:677)
at scala.reflect.io.Streamable$Chars$class.applyReader(Streamable.scala:104)
at scala.reflect.io.File.applyReader(File.scala:82)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$interpretAllFrom$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(SparkILoop.scala:677)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$interpretAllFrom$1$$anonfun$apply$mcV$sp$1.apply(SparkILoop.scala:677)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$interpretAllFrom$1$$anonfun$apply$mcV$sp$1.apply(SparkILoop.scala:677)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$savingReplayStack(SparkILoop.scala:162)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$interpretAllFrom$1.apply$mcV$sp(SparkILoop.scala:676)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$interpretAllFrom$1.apply(SparkILoop.scala:676)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$interpretAllFrom$1.apply(SparkILoop.scala:676)
at org.apache.spark.repl.SparkILoop.savingReader(SparkILoop.scala:167)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$interpretAllFrom(SparkILoop.scala:675)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$loadCommand$1.apply(SparkILoop.scala:740)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$loadCommand$1.apply(SparkILoop.scala:739)
at org.apache.spark.repl.SparkILoop.withFile(SparkILoop.scala:733)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loadCommand(SparkILoop.scala:739)
at org.apache.spark.repl.SparkILoop$$anonfun$standardCommands$7.apply(SparkILoop.scala:344)
at org.apache.spark.repl.SparkILoop$$anonfun$standardCommands$7.apply(SparkILoop.scala:344)
at scala.tools.nsc.interpreter.LoopCommands$LineCmd.apply(LoopCommands.scala:81)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:809)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$loadFiles$1.apply(SparkILoop.scala:910)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$loadFiles$1.apply(SparkILoop.scala:908)
at scala.collection.immutable.List.foreach(List.scala:318)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loadFiles(SparkILoop.scala:908)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:995)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109)
... 110 more
Caused by: java.lang.IllegalArgumentException: Compression codec com.hadoop.compression.lzo.LzopCodec not found.
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:139)
at org.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:180)
at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45)
... 115 more
Caused by: java.lang.ClassNotFoundException: Class com.hadoop.compression.lzo.LzopCodec not found
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101)
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132)
... 117 more
<console>:32: error: not found: value df
df.show()