0

我正在实现 Kosaraju 的两遍算法,该算法可以计算有向图中的强连通分量。

我可以用小输入数据得到正确的结果,但是当输入数据较大时

( 70M txt ,警告!!这个文本文件有将近70M的大小,用这个url的下载软件下载这个大文件。如果你没有下载软件,你可以复制这个url到你的浏览器中http://pan.baidu.com/s/1i5Hmf5N 下载它),

大约 1 小时后,它显示“pythonw.exe 已停止工作”。Python 应该运行以获得正确的答案。

我该如何解决?是不是内存有问题?请帮我一个忙。

这是大数据结果:

我的代码在这里:

import time
import datetime
import sys


start = time.time()
print datetime.datetime.now()

with open('test.txt') as f:
    #SCC
    #a = [[int(x) for x in ln.split()] for ln in f]
    data_set_u = []
    data_set_v = []
    for ln in f:
        #print ln
        #print type(ln)
        #print len(ln)
        if len(ln) >1:
            u,v = ln.split()
            u = int(u)
            v = int(v)            
            data_set_u.append(u)
            data_set_v.append(v)
f.close()

print 'open file time: '+ str(time.time() - start) + 's'
print datetime.datetime.now()

sys.setrecursionlimit((max(data_set_u+data_set_v)+ len(data_set_u))*100)

def DFS_Loop():
    num = max(data_set_u+data_set_v)

    start_time_DFS_Loop = time.time()
    global t
    t = 0
    global s
    s = None
    global visited
    visited = [False]* num
    global leader
    leader = [None] * num
    global f
    f = [None] * num


    for i in range(num,0,-1):
        #print i
        #print (i in visited)
        #if (i in visited)==False:
        if visited[i-1] == False:
            s = i
            #print s
            DFS(i)
    print 'end with func DFS_Loop() time: '+ str(time.time() - start_time_DFS_Loop)+ 's'
    print 'end with func DFS_Loop() whole time: '+ str(time.time() - start)+ 's'    
#print data_set_u
#print data_set_v


def DFS(node):
    start_time_DFS = time.time()

    global t
    visited[node-1] = True
    #print visited
    #print visited
    leader[node-1] = s
    #print leader
    arc = []
    arc = [data_set_v[i] for i,x in enumerate(data_set_u) if x==node] 
    #print arc
    for i in arc:
        #print arc
        #print i
        if visited[i-1]==0:
            #print i
            DFS(i)

    t+=1
    #print t
    f[node-1] = t
    #print f
    print 'end with func DFS time: '+ str(time.time() - start_time_DFS)+ 's'
    print 'end with func DFS whole time: '+ str(time.time() - start)+ 's'

DFS_Loop()
print 'DFS_Loop time: '+ str(time.time() - start)+ 's'


##reverse tail and head data
##
##

rev_u,rev_v = data_set_v,data_set_u
new_u = [None] * (len(rev_u))
new_v = [None] * (len(rev_v))
#print rev_v
#print rev_u
for i,val in enumerate(f):
    #rev_u[rev_u.index(i+1)] = val
    #print i+1,val
    #rev_v[rev_v.index(i+1,0,len(rev_v))] = val
    #print rev_v
    #print i,val
    for i_v,val_v in enumerate(rev_v):
        if val_v == i+1:
            #print val_v
            new_v[i_v] = val

    for i_u,val_u in enumerate(rev_u):
        if val_u == i+1:
            #print i_u,val_u
            new_u[i_u] = val    

#print new_u
#print new_v
data_set_u = new_u
data_set_v = new_v
#print data_set_u
#print data_set_v

print 'reverse data time: '+ str(time.time() - start)+ 's'

DFS_Loop()
print 'DFS_Loop time: '+ str(time.time() - start)+ 's'

#print leader


##calculate repeated times appearancing in leader list
##
##


count_list = [0]*len(leader)
indices = [0]*len(leader)

#for i_lea,val_lea in enumerate(leader):
i_count_list = 0
while len(leader) > 0:
    #print i_lea,val_lea

    count_list[i_count_list] = leader.count(leader[0])
    #print 'count_list: '+ str(count_list)
    indices = [i for i, x in enumerate(leader) if x == leader[0]]
    #print 'indices: '+ str(indices)
    for i in xrange(len(indices)):
        #print 'leader before del: '+ str(leader)
        del leader[leader.index(leader[0])]
        #print 'leader after del: '+ str(leader)
    #print 'leader: '+ str(leader)
    i_count_list = i_count_list+1
    #print 'i_count_list: ' + str(i_count_list)
print 'calc time: '+ str(time.time() - start)+ 's'

sorted_count_list = sorted(count_list, key=int, reverse=True)
print sorted_count_list[0:5]
print datetime.datetime.now()

这是小测试文件:

1 4

2 8

3 6

4 7

5 2

6 9

7 1

8 5

8 6

9 7

9 3

这是小测试文件的正确部分结果:

calc time: 0.121000051498s
[3, 3, 3, 0, 0]
2017-01-19 08:07:44.802000
4

0 回答 0