1

我已经在 spark 中安装了 GraphFrames 包,我已按照此链接中的说明进行操作:https ://www.datareply.co.uk/blog/2016/9/20/running-graph-analytics-with-spark-graphframes-a -简单示例

当我尝试执行以下代码时,我收到一条错误消息:

from pyspark import SparkContext
from pyspark import SparkConf 
from pyspark.sql import SQLContext
from pyspark.graphframes import graphframe as gf
import pandas as pd
import os 
import sys
os.environ["PYSPARK_SUBMIT_ARGS"] = ( "--packages graphframes:graphframes:0.2.0-spark2.0-s_2.11 pyspark-shell" )
SPARK_HOME="/usr/local/spark/" sys.path.append(SPARK_HOME + "/python")
sys.path.append(SPARK_HOME + "/python" + "/lib/py4j-0.10.1-src.zip")
conf = SparkConf()
SC = SparkContext(conf=conf)
sqlcontext = SQLContext(SC)
v = sqlcontext.createDataFrame([ ("a", "Alice", 34), ("b", "Bob", 36), ("c", "Charlie", 30), ], ["id", "name", "age"])
e = sqlcontext.createDataFrame([ ("a", "b", "friend"), ("b", "c", "follow"), ("c", "b", "follow"), ], ["src", "dst", "relationship"]) 
g = gf.GraphFrame(v, e)
g.inDegrees.show()
g.edges.filter("relationship = 'follow'").count()
results = g.pageRank(resetProbability=0.01, maxIter=20) 
results.vertices.select("id", "pagerank").show()

这是错误消息:

Traceback (most recent call last):
  File "TP2.py", line 35, in <module>
    g = gf.GraphFrame(v, e)
  File "/usr/local/spark/python/pyspark/graphframes/graphframe.py", line 63, in __init__
    self._jvm_graph = self._jvm_gf_api.createGraph(v._jdf, e._jdf)
  File "/Users/Khaled/anaconda2/lib/python2.7/site-packages/py4j/java_gateway.py", line 1160, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "/usr/local/spark/python/pyspark/sql/utils.py", line 63, in deco
    return f(*a, **kw)
  File "/Users/Khaled/anaconda2/lib/python2.7/site-packages/py4j/protocol.py", line 320, in get_return_value
    format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o54.createGraph.
: java.lang.IncompatibleClassChangeError: Implementing class
    at java.lang.ClassLoader.defineClass1(Native Method)
    at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
    at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
    at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
    at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
    at java.lang.ClassLoader.defineClass1(Native Method)
    at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
    at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
    at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
    at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
    at org.graphframes.GraphFrame$.apply(GraphFrame.scala:556)
    at org.graphframes.GraphFramePythonAPI.createGraph(GraphFramePythonAPI.scala:9)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
    at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
    at py4j.Gateway.invoke(Gateway.java:280)
    at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
    at py4j.commands.CallCommand.execute(CallCommand.java:79)
    at py4j.GatewayConnection.run(GatewayConnection.java:214)
    at java.lang.Thread.run(Thread.java:745)
4

1 回答 1

-1

每当您遇到 时java.lang.IncompatibleClassChangeError,您就知道依赖项存在冲突。在这种情况下,您有两个不同的包被用于 GraphFrame 功能 -pyspark.graphframesgraphframes. 修改调用和导入应该可以解决问题:

from graphframes import GraphFrame
....
g = GraphFrame(v, e)
于 2019-01-04T16:27:41.013 回答