0

我在 apache spark 中读取本地文件时出错。scala> val f=sc.textFile("/home/cloudera/Downloads/sample.txt")

f: org.apache.spark.rdd.RDD[String] = /home/cloudera/Downloads/sample.txt MapPartitionsRDD[9] at textFile at <console>:27

scala> f.count()

org.apache.hadoop.mapred.InvalidInputException: 输入路径不存在: hdfs://quickstart.cloudera:8020/home/cloudera/Downloads/sample.txt at org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat. java:287) 在 org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:229) 在 org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:315) 在 org.apache.spark.rdd .HadoopRDD.getPartitions(HadoopRDD.scala:202) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239) at org.apache.spark.rdd.RDD$$anonfun$ partitions$2.apply(RDD.scala:237) 在 org.apache.spark.rdd.RDD.partitions(RDD.scala:237) 在 org.apache.spark 的 scala.Option.getOrElse(Option.scala:120)。 rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) 在 org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239) 在 org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237) 在 scala。 Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.partitions(RDD.scala:237) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1959) at org.apache .spark.rdd.RDD.count(RDD.scala:1157) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:30) at $iwC$$iwC $$iwC$$iwC$$iwC$$iwC$$iwC.(:35) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:37) at $iwC$$iwC$ $iwC$$iwC$$iwC.(:39) at $iwC$$iwC$$iwC$$iwC.(:41) at $iwC$$iwC$$iwC.(:43) at $iwC$$iwC .(:45) at $iwC.(:47) at (:49) at .(:53) at .() at .(:7) at .() at $print() at sun.reflect.NativeMethodAccessorImpl。在 sun.reflect.NativeMethodAccessorImpl 处调用0(本机方法)。invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.repl.SparkIMain $ReadEvalPrint.call(SparkIMain.scala:1045) at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1326) at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:第821章.reallyInterpret$1(SparkILoop.scala:857) at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) at org .apache.spark.repl。SparkILoop.processLine$1(SparkILoop.scala:657) at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665) at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$ SparkILoop$$loop(SparkILoop.scala:670) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997 ) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) at org.apache.spark.repl.SparkILoop$$anonfun $org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) 在 scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) 在 org.apache.spark。 repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945) 在 org.apache.spark.repl.SparkILoop。进程(SparkILoop.scala:1064)在 org.apache.spark.repl.Main$.main(Main.scala:35) 在 org.apache.spark.repl.Main.main(Main.scala) 在 sun.reflect。 NativeMethodAccessorImpl.invoke0(Native Method) 在 sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 在 sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 在 java.lang.reflect.Method.invoke(Method. java:606) 在 org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:730) 在 org.apache.spark.deploy.SparkSubmit$.doRunMain$1( SparkSubmit.scala:181) 在 org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) 在 org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) 在 org.apache .spark.deploy.SparkSubmit.main(SparkSubmit.scala)第1064章) 在 sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 在 sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 在 java.lang.reflect.Method.invoke(Method.java:606) 在 org .apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:730) 在 org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)在 org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) 在 org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) 在 org.apache.spark.deploy.SparkSubmit .main(SparkSubmit.scala)第1064章) 在 sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 在 sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 在 java.lang.reflect.Method.invoke(Method.java:606) 在 org .apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:730) 在 org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)在 org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) 在 org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) 在 org.apache.spark.deploy.SparkSubmit .main(SparkSubmit.scala)main(Main.scala:35) 在 org.apache.spark.repl.Main.main(Main.scala) 在 sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 在 sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java: 57) 在 sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 在 java.lang.reflect.Method.invoke(Method.java:606) 在 org.apache.spark.deploy.SparkSubmit$.org$apache$ spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:730) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit( SparkSubmit.scala:206) 在 org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) 在 org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)main(Main.scala:35) 在 org.apache.spark.repl.Main.main(Main.scala) 在 sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 在 sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java: 57) 在 sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 在 java.lang.reflect.Method.invoke(Method.java:606) 在 org.apache.spark.deploy.SparkSubmit$.org$apache$ spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:730) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit( SparkSubmit.scala:206) 在 org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) 在 org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)reflect.NativeMethodAccessorImpl.invoke0(Native Method) 在 sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 在 sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 在 java.lang.reflect.Method.invoke( Method.java:606) 在 org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:730) 在 org.apache.spark.deploy.SparkSubmit$.doRunMain $1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) at org .apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)reflect.NativeMethodAccessorImpl.invoke0(Native Method) 在 sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 在 sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 在 java.lang.reflect.Method.invoke( Method.java:606) 在 org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:730) 在 org.apache.spark.deploy.SparkSubmit$.doRunMain $1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) at org .apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$ runMain(SparkSubmit.scala:730) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) 在 org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$ runMain(SparkSubmit.scala:730) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) 在 org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)181) 在 org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) 在 org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) 在 org.apache.spark.deploy .SparkSubmit.main(SparkSubmit.scala)181) 在 org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) 在 org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) 在 org.apache.spark.deploy .SparkSubmit.main(SparkSubmit.scala)

4

1 回答 1

1

您必须指定文件路径。当您设置了 hadoop 路径时,需要指定路径。

sc.textFile("file:///home/cloudera/Downloads/sample.txt")

希望这可以帮助!

于 2017-05-30T03:51:37.820 回答