0

在查看了所有解决方案之后,我问了这个问题。我一直在尝试从数据框创建视图。我能够连接数据框,但无法在此之上创建视图。我无法使用最新版本的 Intellij 连接到本地元存储。我正在使用 Hadoop 3.0.0。下面是我的 POM。

<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.12.12</version>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>3.0.1</version>
</dependency>

<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>3.0.1</version>
</dependency>

我的 Spark 配置如下所示。

def getSparkSession(): SparkSession = {​​​​​
var session: SparkSession = null
var flag: Boolean = false
try {​​​​​
System.setProperty("HADOOP_HOME", constants.HADOOP_HOME)
System.setProperty("hadoop.home.dir", constants.HADOOP_HOME_DIR)
session = SparkSession
.builder()
.appName("Validation").master("local[1]")
.config("hive.metastore.uris", "thrift://localhost:9083")
.config("fs.s3.impl", constants.S3_IMPL)
.config("fs.s3.awsAccessKeyId", prop.keys.get(constants.S3_FS_ACCESS_KEY))
.config("fs.s3.awsSecretAccessKey", prop.keys.get(constants.S3_FS_SECRET_KEY))
.enableHiveSupport()
.getOrCreate()

println("-:Spark session created successfully:-" + session)
flag = true
}​​​​​ catch {​​​​​
case t: Exception =>
log.info(t.printStackTrace())
}​​​​​
session
}​​​​​

下面是读取数据框和创建视图的代码。

val jdbcDF = session.read
.format("jdbc")
.option("url", "jdbc:postgresql://hostname/dbname?user=xxxxxxx&password=xxxxxxx")
.option("dbtable","offer")
.option("driver","org.postgresql.Driver")
.load().cache()
println(jdbcDF.show(20, false))
jdbcDF.createOrReplaceGlobalTempView("abc")

请在下面找到日志。

21/03/26 15:05:27 WARN metastore: Failed to connect to the MetaStore Server...
21/03/26 15:05:27 INFO metastore: Waiting 1 seconds before next connection attempt.
21/03/26 15:05:27 WARN ProcfsMetricsGetter: Exception when trying to compute pagesize, as a result reporting of ProcessTree metrics is stopped
21/03/26 15:05:28 WARN Hive: Failed to register all functions.
java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1709)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:83)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:133)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3600)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3652)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3632)
at org.apache.hadoop.hive.ql.metadata.Hive.getAllFunctions(Hive.java:3894)
at org.apache.hadoop.hive.ql.metadata.Hive.reloadFunctions(Hive.java:248)
at org.apache.hadoop.hive.ql.metadata.Hive.registerAllFunctionsOnce(Hive.java:231)
at org.apache.hadoop.hive.ql.metadata.Hive.<init>(Hive.java:388)
at org.apache.hadoop.hive.ql.metadata.Hive.create(Hive.java:332)
at org.apache.hadoop.hive.ql.metadata.Hive.getInternal(Hive.java:312)
at org.apache.hadoop.hive.ql.metadata.Hive.get(Hive.java:288)
at org.apache.spark.sql.hive.client.HiveClientImpl.client(HiveClientImpl.scala:260)
at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:286)
at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:227)
at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:226)
at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:276)
at org.apache.spark.sql.hive.client.HiveClientImpl.databaseExists(HiveClientImpl.scala:389)
at org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$databaseExists$1(HiveExternalCatalog.scala:225)
at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:103)
at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:225)
at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:137)
at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:127)
at org.apache.spark.sql.internal.SharedState.globalTempViewManager$lzycompute(SharedState.scala:157)
at org.apache.spark.sql.internal.SharedState.globalTempViewManager(SharedState.scala:155)
at org.apache.spark.sql.hive.HiveSessionStateBuilder.$anonfun$catalog$2(HiveSessionStateBuilder.scala:60)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.globalTempViewManager$lzycompute(SessionCatalog.scala:93)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.globalTempViewManager(SessionCatalog.scala:93)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.getGlobalTempView(SessionCatalog.scala:587)
at org.apache.spark.sql.execution.command.CreateViewCommand.run(views.scala:120)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229)
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3618)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3616)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:229)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$1(Dataset.scala:92)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:89)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:3646)
at org.apache.spark.sql.Dataset.createOrReplaceGlobalTempView(Dataset.scala:3294)
at com.amp.eolas.aim.validation.checks.dataqualitychecksoperations$.createTempTables(dataqualitycheckoperations.scala:262)
at com.amp.eolas.aim.validation.checks.dataqualitychecksoperations$.$anonfun$allDqandTruncateReloadFunctions$1(dataqualitycheckoperations.scala:303)
at com.amp.eolas.aim.validation.checks.dataqualitychecksoperations$.$anonfun$allDqandTruncateReloadFunctions$1$adapted(dataqualitycheckoperations.scala:300)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
at scala.collection.IterableLike.foreach(IterableLike.scala:74)
at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
at com.amp.eolas.aim.validation.checks.dataqualitychecksoperations$.allDqandTruncateReloadFunctions(dataqualitycheckoperations.scala:300)
at com.amp.eolas.aim.validation.jobs.jobs$.main(jobs.scala:10)
at com.amp.eolas.aim.validation.jobs.jobs.main(jobs.scala)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1707)
... 61 more
Caused by: MetaException(message:Could not connect to meta store using any of the URIs provided. Most recent failure: org.apache.thrift.transport.TTransportException: java.net.ConnectException: Connection refused: connect
at org.apache.thrift.transport.TSocket.open(TSocket.java:226)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.open(HiveMetaStoreClient.java:480)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:247)
at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:70)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1707)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:83)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:133)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3600)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3652)
at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3632)
at org.apache.hadoop.hive.ql.metadata.Hive.getAllFunctions(Hive.java:3894)
at org.apache.hadoop.hive.ql.metadata.Hive.reloadFunctions(Hive.java:248)
at org.apache.hadoop.hive.ql.metadata.Hive.registerAllFunctionsOnce(Hive.java:231)
at org.apache.hadoop.hive.ql.metadata.Hive.<init>(Hive.java:388)
at org.apache.hadoop.hive.ql.metadata.Hive.create(Hive.java:332)
at org.apache.hadoop.hive.ql.metadata.Hive.getInternal(Hive.java:312)
at org.apache.hadoop.hive.ql.metadata.Hive.get(Hive.java:288)
at org.apache.spark.sql.hive.client.HiveClientImpl.client(HiveClientImpl.scala:260)
at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:286)
at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:227)
at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:226)
at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:276)
at org.apache.spark.sql.hive.client.HiveClientImpl.databaseExists(HiveClientImpl.scala:389)
at org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$databaseExists$1(HiveExternalCatalog.scala:225)
at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:103)
at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:225)
at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:137)
at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:127)
at org.apache.spark.sql.internal.SharedState.globalTempViewManager$lzycompute(SharedState.scala:157)
at org.apache.spark.sql.internal.SharedState.globalTempViewManager(SharedState.scala:155)
at org.apache.spark.sql.hive.HiveSessionStateBuilder.$anonfun$catalog$2(HiveSessionStateBuilder.scala:60)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.globalTempViewManager$lzycompute(SessionCatalog.scala:93)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.globalTempViewManager(SessionCatalog.scala:93)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.getGlobalTempView(SessionCatalog.scala:587)
at org.apache.spark.sql.execution.command.CreateViewCommand.run(views.scala:120)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229)
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3618)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3616)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:229)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$1(Dataset.scala:92)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:89)
at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(Dataset.scala:3646)
at org.apache.spark.sql.Dataset.createOrReplaceGlobalTempView(Dataset.scala:3294)
at com.amp.eolas.aim.validation.checks.dataqualitychecksoperations$.createTempTables(dataqualitycheckoperations.scala:262)
at com.amp.eolas.aim.validation.checks.dataqualitychecksoperations$.$anonfun$allDqandTruncateReloadFunctions$1(dataqualitycheckoperations.scala:303)
at com.amp.eolas.aim.validation.checks.dataqualitychecksoperations$.$anonfun$allDqandTruncateReloadFunctions$1$adapted(dataqualitycheckoperations.scala:300)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
at scala.collection.IterableLike.foreach(IterableLike.scala:74)
at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
at com.amp.eolas.aim.validation.checks.dataqualitychecksoperations$.allDqandTruncateReloadFunctions(dataqualitycheckoperations.scala:300)
at com.amp.eolas.aim.validation.jobs.jobs$.main(jobs.scala:10)
at com.amp.eolas.aim.validation.jobs.jobs.main(jobs.scala)
Caused by: java.net.ConnectException: Connection refused: connect
at java.net.DualStackPlainSocketImpl.waitForConnect(Native Method)
at java.net.DualStackPlainSocketImpl.socketConnect(DualStackPlainSocketImpl.java:81)
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:476)
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:218)
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:200)
at java.net.PlainSocketImpl.connect(PlainSocketImpl.java:162)
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:394)
at java.net.Socket.connect(Socket.java:606)
at org.apache.thrift.transport.TSocket.open(TSocket.java:221)
... 69 more
)

任何帮助表示赞赏。

4

0 回答 0