使用 Python/PySpark/Jupyter 我正在使用 networkx 库中的绘图功能。诀窍是从grapheframe图创建一个networkx图
import networkx as nx
from graphframes import GraphFrame
def PlotGraph(edge_list):
Gplot=nx.Graph()
for row in edge_list.select('src','dst').take(1000):
Gplot.add_edge(row['src'],row['dst'])
plt.subplot(121)
nx.draw(Gplot, with_labels=True, font_weight='bold')
spark = SparkSession \
.builder \
.appName("PlotAPp") \
.getOrCreate()
sqlContext = SQLContext(spark)
vertices = sqlContext.createDataFrame([
("a", "Alice", 34),
("b", "Bob", 36),
("c", "Charlie", 30),
("d", "David", 29),
("e", "Esther", 32),
("e1", "Esther2", 32),
("f", "Fanny", 36),
("g", "Gabby", 60),
("h", "Mark", 61),
("i", "Gunter", 62),
("j", "Marit", 63)], ["id", "name", "age"])
edges = sqlContext.createDataFrame([
("a", "b", "friend"),
("b", "a", "follow"),
("c", "a", "follow"),
("c", "f", "follow"),
("g", "h", "follow"),
("h", "i", "friend"),
("h", "j", "friend"),
("j", "h", "friend"),
("e", "e1", "friend")
], ["src", "dst", "relationship"])
g = GraphFrame(vertices, edges)
PlotGraph(g.edges)
一些图表的情节