1

我正在尝试实现一个函数,以基于初始 lcc(每一步中 1%)删除部分高度节点,直到 lcc 将被完全破坏(大小为零)。为了测试我的代码,我正在使用 DBLP coauthor ship network,该网络已经表明,在删除 1.5% 的高度节点后,该图的 lcc 将被破坏。但是我的代码没有做到这一点。我不确定的一件事是,当我们屏蔽一些节点然后使用 GraphView(G,vfilt=mask) 时,屏蔽后不会使用孤立的顶点。所以我不得不手动检查它们。感谢您的任何提示!

  def _breake_lcc(self, G, remove_ratio):
        '''
        G:graph
        remove_ratio = ratio for removing high degree nodes 
        '''
        #lcc of complete graph
        l = gt.label_largest_component(G)
        lcc_graph = gt.GraphView(G,vfilt=l)
        lcc_org_size = float(lcc_graph.num_vertices())
        lcc_size = [lcc_org_size/lcc_org_size]
        #print 'lcc_size_zero: ', lcc_graph.num_vertices()
        #extract the vertices inside lcc
        node_ind = np.where(l.a==1)[0] 
        #compute the degree of all nodes 
        degree = lcc_graph.degree_property_map('total') 
        #extract the degreee of nodes in the lcc
        lcc_vertex_degree =  degree.a[node_ind]
        #sort the nodes based on degree
        sorted_idx = np.argsort(lcc_vertex_degree)[::-1]
        sorted_vertex = set(node_ind[sorted_idx])
        #number of nodes 
        n = len(node_ind)
        #number of top x% degree nodes to be removed -ratio to size
        remove_size = int((n*remove_ratio)/100.)
        print 'remove size',remove_size
        step = 1
        #extract top x% vertices to remove
        nodes_to_be_removed = list(sorted_vertex)[:remove_size]
        #removed the above vertices from the list of valid vertices
        valid_vertices = list(sorted_vertex - set(nodes_to_be_removed))
        #continue until there is no other node to remove
        while len(valid_vertices)>0:
            print step * remove_ratio
#            print len(valid_vertices)
            #filter out those nodes from lcc.
            #THIS IS AUTOMATICALLY UPDATE LCC_GRAPH
            l.a[np.array(nodes_to_be_removed)] = False
            #I REALIZED GRAPHVIEW DOES NOT TAKE CARE OF ISOLATED NODE AUTOMATICALLY
            #SO I CHECK FOR THEM MANUALLY
            ind_remove = [int(i) for i in lcc_graph.vertices() if i.out_degree()==0]
            if len(ind_remove)>0:
                print len(ind_remove)
                mask = lcc_graph.new_vertex_property("bool") 
                #mask.a[::] = True
                mask.a = l.a.copy()
                #l.a[np.array(ind_remove)] = False
                mask.a[np.array(ind_remove)] = False
                #mask.a[nodes_to_be_removed] = False
                #print np.all(mask.a==l.a)
                lcc_graph = gt.GraphView(lcc_graph, vfilt=mask)
                #update the valid nodes
                valid_vertices = list(set(valid_vertices) - set(ind_remove))

            print len(valid_vertices), len(sorted_vertex),lcc_graph.num_vertices()
            lcc_temp_size = float(lcc_graph.num_vertices())
            #print 'valid', len(valid_vertices),lcc_temp_size
            valid_vertices = list(sorted_vertex -set(nodes_to_be_removed))

             nodes_to_be_removed = valid_vertices[:int(remove_size)]

            print lcc_temp_size,lcc_org_size
            rel_size = lcc_temp_size/lcc_org_size 
            lcc_size.append(rel_size)
            print 'lcc size', rel_size
            print '------------'
            step+=1
        return lcc_size
4

0 回答 0