SparkSql.scala
import org.apache.spark.sql.SparkSession
object SparkSql
{
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().master("local[*]").appName("SparkSql1").getOrCreate()
//val data = spark.sparkContext.textFile(("C:\\Users\\blaskar\\Desktop\\Biswajit\\data\\sample.txt"))
val data = spark.read.format("csv").option("inferSchema","true").option("header","true").load("C:\\Users\\blaskar\\Desktop\\Biswajit\\data\\SampleCSVFile.csv")
val columnsRenamed = Seq("S.no", "Eldonbase", "platnium","Macintyre","count1","count2","count3","Nunavut","Storage","count4")
val original_data = data.toDF(columnsRenamed:_*).persist()
original_data.show(10)
}
}
构建.sbt
name := "Spark-Sample1"
version := "1.0"
scalaVersion := "2.11.8"
libraryDependencies ++= Seq(
"org.apache.spark" % "spark-core_2.11" % "2.1.0" exclude("org.apache.hadoop", "hadoop-yarn-server-web-proxy"),
"org.apache.spark" % "spark-sql_2.11" % "2.1.0" exclude("org.apache.hadoop", "hadoop-yarn-server-web-proxy"))
系统使用的软件版本 spark:2.1.0 scala:2.11.8
我正在使用 IntelliJ。代码运行良好,但是当我将作业提交到我的独立集群时,它显示以下错误:
火花提交错误:
spark-submit --class "SparkSql" C:\Users\blaskar\Desktop\sbt_projects\Spark-Sample1
\out\artifacts\Spark_Sample1_jar\Spark-Sample1.jar
错误
java.lang.ClassNotFoundException: SparkSql
at java.net.URLClassLoader.findClass(Unknown Source)
at java.lang.ClassLoader.loadClass(Unknown Source)
at java.lang.ClassLoader.loadClass(Unknown Source)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Unknown Source)
at org.apache.spark.util.Utils$.classForName(Utils.scala:229)
at
org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:695)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)