

import sys, bz2, string, os
#instead of hardcoding filename, get it from arguments
#filename = os.getcwd()
filename = raw_input("Enter the path of bz2 document e.g. files/access_log-20130301.bz2: ")
print "Using file : " + filename
source_file = bz2.BZ2File(filename, "r") 
for line in source_file:
    #Extract the date and put into a variable 
    logdate = string.split(line)[3][1:12]
    #Extract movie name and put into variable movie
    movie = string.split(line)[6]
    #extract who read the movie username = 
    usernames = string.split(line)[2]
    #Only process the movie line if we have /media/movie in it. 
    if movie.find('media/movies') > 0:
        #Prints all things prosscesed
        print "User:" + usernames + " On:" +  logdate + " Was watching:"+ movie
        #p=open(filename+"record.txt", "w")
        fp=open(filename+"record.txt", "wb+")
        fp.write("User: " + usernames + " On: " +  logdate + " Was watching: "+ movie+" File from:"+filename+"\n")

2 回答 2



  1. for在主循环之前打开要写入的文件。这样,您将只有一个文件句柄,并且缺少刷新不会导致此行为。确保完成后关闭文件。(考虑使用with块,这将导致文件在块终止时自动关闭with open(filename + "record.txt", "wb+") as f::)
  2. fp调用后立即关闭fp.write(),这将强制刷新任何缓冲的输出,至少刷新到内核 I/O 缓存。

我更喜欢选项 1,因为在这种情况下没有理由多次打开和关闭文件。(如果你在文件中写了很多行,这些打开/刷新/关闭循环最终会浪费很多时间!)

选项 1 看起来像这样:

import sys, bz2, string, os
#instead of hardcoding filename, get it from arguments
#filename = os.getcwd()
filename = raw_input("Enter the path of bz2 document e.g. files/access_log-20130301.bz2: ")
print "Using file : " + filename
with open(filename+"record.txt", "wb+") as fp:
    source_file = bz2.BZ2File(filename, "r") 
    for line in source_file:
        #Extract the date and put into a variable 
        logdate = string.split(line)[3][1:12]
        #Extract movie name and put into variable movie
        movie = string.split(line)[6]
        #extract who read the movie username = 
        usernames = string.split(line)[2]
        #Only process the movie line if we have /media/movie in it. 
        if movie.find('media/movies') > 0:
            #Prints all things prosscesed
            print "User:" + usernames + " On:" +  logdate + " Was watching:"+ movie
            #p=open(filename+"record.txt", "w")
            fp.write("User: " + usernames + " On: " +  logdate + " Was watching: "+ movie+" File from:"+filename+"\n")

# The with block has ended at this point, so the file will already be closed here.

于 2013-04-12T18:33:09.810 回答



with open(filename + "record.txt", "wb+") as fp:
    for line in source_file:


于 2013-04-12T18:34:14.793 回答