我试图在 spark 1.4.1 的 spark-shell 中使用 spark -csv 包处理 CSV 文件。
scala> import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.hive.HiveContext
scala> import org.apache.spark.sql.hive.orc._
import org.apache.spark.sql.hive.orc._
scala> import org.apache.spark.sql.types.{StructType, StructField, StringType, IntegerType};
import org.apache.spark.sql.types.{StructType, StructField, StringType, IntegerType}
scala> val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc)
15/12/21 02:06:24 WARN SparkConf: The configuration key 'spark.yarn.applicationMaster.waitTries' has been deprecated as of Spark 1.3 and and may be removed in the future. Please use the new key 'spark.yarn.am.waitTime' instead.
15/12/21 02:06:24 INFO HiveContext: Initializing execution hive, version 0.13.1
hiveContext: org.apache.spark.sql.hive.HiveContext = org.apache.spark.sql.hive.HiveContext@74cba4b
scala> val customSchema = StructType(Seq(StructField("year", IntegerType, true),StructField("make", StringType, true),StructField("model", StringType, true),StructField("comment", StringType, true),StructField("blank", StringType, true)))
customSchema: org.apache.spark.sql.types.StructType = StructType(StructField(year,IntegerType,true), StructField(make,StringType,true), StructField(model,StringType,true), StructField(comment,StringType,true), StructField(blank,StringType,true))
scala> val customSchema = (new StructType).add("year", IntegerType, true).add("make", StringType, true).add("model", StringType, true).add("comment", StringType, true).add("blank", StringType, true)
:24: error: not enough arguments for constructor StructType: (fields: Array[org.apache.spark.sql.types.StructField])org.apache.spark.sql.types.StructType. Unspecified value parameter fields.
val customSchema = (new StructType).add("year", IntegerType, true).add("make", StringType, true).add("model", StringType,true).add("comment", StringType, true).add("blank", StringType, true)