在 SparkSQL 中是否可以连接来自 mysql 和 Oracle 数据库的数据?我尝试加入他们,但在 SPARK_CLASSPATH 中设置多个 jar(用于 mysql 和 Oracle 的 jdbc 驱动程序)时遇到了一些麻烦。这是我的代码:
import os
import sys
os.environ['SPARK_HOME']="/home/x/spark-1.5.2"
sys.path.append("/home/x/spark-1.5.2/python/")
try:
from pyspark import SparkContext, SparkConf
from pyspark.sql import SQLContext
MYSQL_DRIVER_PATH = "/home/x/spark-1.5.2/python/lib/mysql-connector-java-5.1.38-bin.jar"
MYSQL_CONNECTION_URL = "jdbc:mysql://192.111.333.999:3306/db?user=us&password=pasw"
ORACLE_DRIVER_PATH = "/home/x/spark-1.5.2/python/lib/ojdbc6.jar"
Oracle_CONNECTION_URL = "jdbc:oracle:thin:user/pasw@192.111.333.999:1521:xe"
# Define Spark configuration
conf = SparkConf()
conf.setMaster("local")
conf.setAppName("MySQL_Oracle_imp_exp")
# Initialize a SparkContext and SQLContext
sc = SparkContext(conf=conf)
#sc.addJar(MYSQL_DRIVER_PATH)
sqlContext = SQLContext(sc)
ora_tmp=sqlContext.read.format('jdbc').options(
url=Oracle_CONNECTION_URL,
dbtable="TABLE1",
driver="oracle.jdbc.OracleDriver"
).load()
ora_tmp.show()
tmp2=sqlContext.load(
source="jdbc",
path=MYSQL_DRIVER_PATH,
url=MYSQL_CONNECTION_URL,
dbtable="(select city,zip from TABLE2 limit 10) as tmp2",
driver="com.mysql.jdbc.Driver")
c_rows=tmp2.collect()
....
except Exception as e:
print e
sys.exit(1)
有人可以帮我解决这个问题吗?提前致谢 :)