0

我有一个如下的数据框

    Quote ID    Quote Date       Email                Phone       VIN
0   1410095416  6/6/2021    DAMIONADAE@GMAIL.COM    4049366688  1C4RJFBG9EC2267
1   1410143058  6/6/2021    BEEZZZHAPPY@YAHOO.COM   3122340791  NaN
2   1408893417  6/3/2021    MONEYKAY38@YAHOO.COM    2149004015  1J8HG48NX6C2470
3   1408764243  6/2/2021    TIFFANYLESTER419@GMAIL.COM  5024647900  JN8AZ08W57W6527
4   1408639003  6/2/2021    MONEYTEAM799@YAHOO.COM  2149001015  1ZVBP8AM3E52605

我正在使用一个函数来生成网络图。下面的函数使用数据框、节点和边来生成图表。

def create_network(df, node, column_edge, column_edge1=None, column_edge2=None):

    #  select columns, remove NaN
    df_edge1 = df[[node, column_edge]].dropna(subset=[column_edge]).drop_duplicates()

    # To create connections between "node" who have the same "edge",
    # join data with itself on the "node" column.
    df_edge1 = df_edge1.merge(
                              df_edge1[[node, column_edge]].rename(columns={node:node+"_2"}), 
                              on=column_edge
                              )

    # By joining the data with itself, node will have a connection with themselves.
    # Remove self connections, to keep only connected nodes which are different.
    edge1 = df_edge1[~(df_edge1[node]==df_edge1[node+"_2"])].dropna()[[node, node +"_2", column_edge]]
        
    # To avoid counting twice the connections (person 1 connected to person 2 and person 2 connected to person 1)
    # we force the first ID to be "lower" then ID_2
    edge1.drop(edge1.loc[edge1[node+"_2"]<edge1[node]].index.tolist(), inplace=True)

    G = nx.from_pandas_edgelist(df=edge1, source=node, target=node + '_2', edge_attr=column_edge)

    G.add_nodes_from(nodes_for_adding=df[node].tolist())


    if column_edge1:


        df_edge2 = df[[node, column_edge1]].dropna(subset=[column_edge1]).drop_duplicates()

        df_edge2 = df_edge2.merge(
            df_edge2[[node, column_edge1]].rename(columns={node:node+"_2"}), 
            on=column_edge1
        )

        edge2 = df_edge2[~(df_edge2[node]==df_edge2[node+"_2"])].dropna()[[node, node+"_2", column_edge1]]

        edge2.drop(edge2.loc[edge2[node+"_2"]<edge2[node]].index.tolist(), inplace=True)

        # Create the connections in the graph
        links_attributes = {tuple(row[[node, node+"_2"]]): {column_edge1: row[column_edge1]} for i,row in edge2.iterrows()}

        # create the connection, without attribute.
        G.add_edges_from(links_attributes)
        # adds the attribute.
        nx.set_edge_attributes(G=G, values=links_attributes)


    if column_edge2:

        df_edge3 = df[[node, column_edge2]].dropna(subset=[column_edge2]).drop_duplicates()

        df_edge3 = df_edge3.merge(
                                  df_edge3[[node, column_edge2]].rename(columns={node:node+"_2"}), 
                                  on=column_edge2
                                  )


        edge3 = df_edge3[~(df_edge3[node]==df_edge3[node+"_2"])].dropna()[[node, node+"_2", column_edge2]]

        edge3.drop(edge3.loc[edge3[node+"_2"]<edge3[node]].index.tolist(), inplace=True)

        # Create the connections in the graph
        links_attributes2 = {tuple(row[[node, node+"_2"]]): {column_edge2: row[column_edge2]} for i,row in edge3.iterrows()}

        # create the connection, without attribute.
        G.add_edges_from(links_attributes2) 
        # adds the attribute.
        nx.set_edge_attributes(G=G, values=links_attributes2)
    
    return G

调用上述函数

GE3 = create_network(data, 'Quote ID', "Email", column_edge1="Phone", column_edge2="VIN")

图表信息

Name: 
Type: Graph
Number of nodes: 2441
Number of edges: 8374
Average degree:   6.8611
# import pyvis
from pyvis.network import Network

# # create vis network
net = Network(notebook=True, width=1000, height=600)
# load the networkx graph
net.from_nx(GE3)
# show
net.show("pyvis_example.html")

当我尝试使用pyvis. 我收到 TypeError:int64 类型的对象不是 JSON 可序列化的

以下是完整的错误

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-35-b34699ea995d> in <module>()
     17 net.from_nx(GE3)
     18 # show
---> 19 net.show("pyvis_example.html")

10 frames
/usr/local/lib/python3.7/dist-packages/pyvis/network.py in show(self, name)
    474         check_html(name)
    475         if self.template is not None:
--> 476             return self.write_html(name, notebook=True)
    477         else:
    478             self.write_html(name)

/usr/local/lib/python3.7/dist-packages/pyvis/network.py in write_html(self, name, notebook)
    457                                     bgcolor=self.bgcolor,
    458                                     conf=self.conf,
--> 459                                     tooltip_link=use_link_template)
    460 
    461         with open(name, "w+") as out:

/usr/local/lib/python3.7/dist-packages/jinja2/environment.py in render(self, *args, **kwargs)
   1088             return concat(self.root_render_func(self.new_context(vars)))
   1089         except Exception:
-> 1090             self.environment.handle_exception()
   1091 
   1092     def render_async(self, *args, **kwargs):

/usr/local/lib/python3.7/dist-packages/jinja2/environment.py in handle_exception(self, source)
    830         from .debug import rewrite_traceback_stack
    831 
--> 832         reraise(*rewrite_traceback_stack(source=source))
    833 
    834     def join_path(self, template, parent):

/usr/local/lib/python3.7/dist-packages/jinja2/_compat.py in reraise(tp, value, tb)
     26     def reraise(tp, value, tb=None):
     27         if value.__traceback__ is not tb:
---> 28             raise value.with_traceback(tb)
     29         raise value
     30 

<template> in top-level template code()

/usr/local/lib/python3.7/dist-packages/jinja2/filters.py in do_tojson(eval_ctx, value, indent)
   1258         options = dict(options)
   1259         options["indent"] = indent
-> 1260     return htmlsafe_json_dumps(value, dumper=dumper, **options)
   1261 
   1262 

/usr/local/lib/python3.7/dist-packages/jinja2/utils.py in htmlsafe_json_dumps(obj, dumper, **kwargs)
    617         dumper = json.dumps
    618     rv = (
--> 619         dumper(obj, **kwargs)
    620         .replace(u"<", u"\\u003c")
    621         .replace(u">", u"\\u003e")

/usr/lib/python3.7/json/__init__.py in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
    236         check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    237         separators=separators, default=default, sort_keys=sort_keys,
--> 238         **kw).encode(obj)
    239 
    240 

/usr/lib/python3.7/json/encoder.py in encode(self, o)
    197         # exceptions aren't as detailed.  The list call should be roughly
    198         # equivalent to the PySequence_Fast that ''.join() would do.
--> 199         chunks = self.iterencode(o, _one_shot=True)
    200         if not isinstance(chunks, (list, tuple)):
    201             chunks = list(chunks)

/usr/lib/python3.7/json/encoder.py in iterencode(self, o, _one_shot)
    255                 self.key_separator, self.item_separator, self.sort_keys,
    256                 self.skipkeys, _one_shot)
--> 257         return _iterencode(o, 0)
    258 
    259 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,

/usr/lib/python3.7/json/encoder.py in default(self, o)
    177 
    178         """
--> 179         raise TypeError(f'Object of type {o.__class__.__name__} '
    180                         f'is not JSON serializable')
    181 

TypeError: Object of type int64 is not JSON serializable

我认为错误来自 Graph 网络,但我不确定这到底发生在哪里以及如何解决这个问题......

任何人都可以解决这个问题吗?

4

2 回答 2

0

尝试转换为 int 所有你知道是 int 的变量。在 python 中,你可以这样转换:int( int64var )

如果您不想强制转换所有变量,则可以调试代码并查看每个变量的变量(如果变量类型正确)。

于 2021-10-03T15:37:30.230 回答
0

确保您的节点索引是int类型而不是int64or int32。您的边缘构造也是如此。我建议检查type你的df[node]熊猫系列。

于 2021-07-16T14:35:43.863 回答