0

我有一个文件,其中有许多以“|”结尾的字段 (管道)字符。我想读取这个文件并创建与特定字段的值一样多的文件。这里有一个例子:

L219| |791|P|PIPPO|PLUTO|1|18081926|I262|XYZXCV12D35F345S||
L219| |1241|P|PAPERINO|TOPOLINO|2|21041937|F335|FVGHWU54G56S456U||
L219| |437793|G|TOPOLANDIA SAS|L219|12345678910|
L219| |437794|G|PAPERANDIA|L219|10987654321|

如果第四个字段等于“G”,则记录进入“file_pg.txt”,否则如果等于“P”,则进入“file_pf.txt”。

我写了下面的代码(我是 Python 新手),但是执行大尺寸(300mb)的文件需要很长时间,你有什么改进的建议吗?

file = open('D:\\mydirectory\\soggetti.txt','r')
file_pf = open("D:\\mydirectory\\file_pf.txt","w")
file_pg = open("D:\\mydirectory\\file_pg.txt","w")
file_pf.close()
file_pg.close()

i = 0
with file:
    for line in file:
        i = 0
        c = 0
        while i < len(line):
            carattere = line[i]
            if carattere == "|":
                c = c + 1
                if c == 4:
                    if line[i-1] == "P":
                        file_pf = open("D:\\mydirectory\\file_pf.txt","a")
                        file_pf.write(line)
                        file_pf.close()
                        break
                    elif line[i-1] == "G":
                        file_pg = open("D:\\mydirectory\\file_pg.txt","a")
                        file_pg.write(line)
                        file_pg.close()
                        break
            i = i + 1
file.close()

谢谢!

阿尔贝托

4

5 回答 5

1

我会去:

with open('D:\\mydirectory\\soggetti.txt','r') as source_file:
    with open("D:\\mydirectory\\file_pf.txt","w") as file_pf:
        with open("D:\\mydirectory\\file_pg.txt","w") as file_pg:

            for line in source_file:
                if line.split("|")[3] == "P":
                    file_pf.write(line)
                elif line.split("|")[3] == "G":
                    file_pg.write(line)

如果您关心速度,最好这样做:

with open('D:\\mydirectory\\soggetti.txt','r') as source_file:
    listP = []
    listG = []        
    for line in source_file:
        char = line.split("|")[3]
        if char == "P":
            listP.append(line)
            file_pf.write(line)
        elif char == "G":
            listG.append(line)
            file_pg.write(line)

with open("D:\\mydirectory\\file_pf.txt","w") as file_pf:
    for line in listP
        file_pf.write(line)

with open("D:\\mydirectory\\file_pg.txt","w") as file_pg:
    for line in listG
        file_pg.write(line)
于 2013-10-10T13:42:50.653 回答
0

打开和关闭文件是相对较慢的操作。如果可能,您应该只打开和关闭一个文件一次。在您的情况下,您可以将 p 和 g 行存储在列表中,然后在循环结束后一次写入所有行。

file = open('D:\\mydirectory\\soggetti.txt','r')
file_pf = open("D:\\mydirectory\\file_pf.txt","w")
file_pg = open("D:\\mydirectory\\file_pg.txt","w")
file_pf.close()
file_pg.close()


p_lines = []
g_lines = []
i = 0
with file:
    for line in file:
        i = 0
        c = 0
        while i < len(line):
            carattere = line[i]
            if carattere == "|":
                c = c + 1
                if c == 4:
                    if line[i-1] == "P":
                        p_lines.append(line)
                        break
                    elif line[i-1] == "G":
                        g_lines.append(line)
                        break
            i = i + 1
file.close()

file_pf = open("D:\\mydirectory\\file_pf.txt","w")
file_pf.writelines(p_lines)
file_pf.close()

file_pg = open("D:\\mydirectory\\file_pg.txt","w")
file_pg.writelines(g_lines)
file_pg.close()

您还可以使用 更轻松地识别每行中字段的内容split

file = open('D:\\mydirectory\\soggetti.txt','r')
file_pf = open("D:\\mydirectory\\file_pf.txt","w")
file_pg = open("D:\\mydirectory\\file_pg.txt","w")
file_pf.close()
file_pg.close()


p_lines = []
g_lines = []
with file:
    for line in file:
        fields = line.split("|")
        if fields[3] == "P":
            p_lines.append(line)
        elif fields[3] == "G":
            g_lines.append(line)
file.close()

file_pf = open("D:\\mydirectory\\file_pf.txt","w")
file_pf.writelines(p_lines)
file_pf.close()

file_pg = open("D:\\mydirectory\\file_pg.txt","w")
file_pg.writelines(g_lines)
file_pg.close()

顺便说一句,严格来说,一旦你完成了文件,你就不需要使用with 和显式关闭它。你可以做一个或另一个。并且没有必要在脚本开始时打开file_pf并立即关闭。file_pg

p_lines = []
g_lines = []
with open('D:\\mydirectory\\soggetti.txt','r') as file:
    for line in file:
        fields = line.split("|")
        if fields[3] == "P":
            p_lines.append(line)
        elif fields[3] == "G":
            g_lines.append(line)

file_pf = open("D:\\mydirectory\\file_pf.txt","w")
file_pf.writelines(p_lines)
file_pf.close()

file_pg = open("D:\\mydirectory\\file_pg.txt","w")
file_pg.writelines(g_lines)
file_pg.close()

如果您希望将来拥有除“p”和“g”之外的更多线型,将每种线型存储在字典中可能会节省一些时间:

from collections import defaultdict
lines_to_write = defaultdict(list)
with file as open('D:\\mydirectory\\soggetti.txt','r'):
    for line in file:
        fields = line.split("|")
        lineType = fields[3].lower()
        lines_to_write[lineType].append(line)

for lineType, lines in lines_to_write.iteritems():
    filename = "D:\\mydirectory\\file_{}f.txt".format(lineType)
    with file as open(filename,"w"):
        file.writelines(lines)

您可以通过跟踪您所在的行号并定期打印消息来向用户报告已处理了多少行。

how_often_to_report = 100 #prints message every one hundred lines
with file as open('D:\\mydirectory\\soggetti.txt','r'):
    for line_number, line in enumerate(file):
        if line_number % how_often_to_report == 0:
            print "{} lines processed", line_number
        #do rest of processing work here
于 2013-10-10T13:40:00.703 回答
0
Read line from file
split on |
P = empty list
G = empty list
if splitted_line[index] is equal to P
 add line to P
elif splitted_line[index] is equal to G
 add line to G
open file for P
write all lines in P
close file for P
open file for G
write all lines in G
close file for G
于 2013-10-10T13:40:29.853 回答
0

我没有对此进行测试,但类似下面的内容应该更快

file = open('D:\\mydirectory\\soggetti.txt','r')
file_pf = open("D:\\mydirectory\\file_pf.txt","a")
file_pg = open("D:\\mydirectory\\file_pg.txt","a")

for line in file:
    bits = line.split("|")
    if bits[3] == "P":
        file_pf.write(line)
    if bits[3] == "G":
        file_pg.write(line)


file.close()
file_pf.close()
file_pg.close()
于 2013-10-10T13:44:14.120 回答
0

下面的代码应该比你正在做的更快,因为。

  1. 您不会遍历每个字符。
  2. 您不必每次都打开文件。
  3. 如果要评估的条件较少。

file = open('D:\\mydirectory\\soggetti.txt','r')
file_pf = open("D:\\mydirectory\\file_pf.txt","w")
file_pg = open("D:\\mydirectory\\file_pg.txt","w")
file_pf.close()
file_pg.close()


file_pf = open("D:\\mydirectory\\file_pf.txt","a")
file_pg = open("D:\\mydirectory\\file_pg.txt","a")
with file:
    for line in file:
        switch = line.split('|')[3]
        write = file_pf.write if 'P' in switch else file_pg.write
        write(line)

file_pg.close()
file_pf.cloe()
file.close()
于 2013-10-10T13:47:27.267 回答