我正在编写一个脚本,该脚本生成数百万个项目的列表,然后根据第一个列表生成另一个列表。它很快填满内存,脚本无法继续。我认为将列表直接存储在文件中然后直接在文件行上循环可能是个好主意。最有效的方法是什么?
编辑:
我正在尝试逐行生成树。row5_nodes 可以得到一百万个项目,我不能删除它,因为我用它来生成 row6_nodes
import random
class Node:
def __init__(self, id, name, parent=None):
self.id = id
self.name = name
self.parent = parent
def write_roots(root_nodes, roots):
global index
index = 0
for x in xrange(0,roots):
node = Node(index,"root"+str(x))
root_nodes.append(node);
f.write(str(node.id)+","+str(node.name)+","+str(node.parent)+"\n")
index += 1;
return
def write_row(parent_nodes, new_nodes, children):
global index
for parent_node in parent_nodes:
for x in xrange(0,children):
node = Node(index,"cat"+str(parent_node.id)+"-"+str(x), parent_node.id)
new_nodes.append(node);
f.write(str(node.id)+","+str(node.name)+","+str(node.parent)+"\n")
index += 1;
return
f = open("data.csv", "wb")
roots = 1000
root_nodes =[]
row1_nodes =[]
row2_nodes =[]
row3_nodes =[]
row4_nodes =[]
row5_nodes =[]
row6_nodes =[]
row7_nodes =[]
row8_nodes =[]
row9_nodes =[]
write_roots(root_nodes, roots)
print "1"
write_row(root_nodes, row1_nodes, random.randrange(0,10))
print "2"
write_row(row1_nodes, row2_nodes, random.randrange(0,10))
print "3"
write_row(row2_nodes, row3_nodes, random.randrange(0,10))
print "4"
write_row(row3_nodes, row4_nodes, random.randrange(0,10))
print "5"
write_row(row4_nodes, row5_nodes, random.randrange(0,10))
print "6"
f.close()