python - 读取文件十秒钟

Question

我现在正在使用日志文件。我的需要是我想在指定的时间段内逐行读取文件，比如 10 秒。如果有办法在 Python 中实现这一点，任何人都可以帮助我吗？

score 1 · Accepted Answer

运行tail或tac使用Popen并迭代输出，直到找到要停止的行。这是一个示例片段。

filename = '/var/log/nginx/access.log'
# Command to read file from the end
cmd = sys.platform == 'darwin' and ['tail', '-r', filename] or ['tac', filename]
# But if you want read it from beginning, use the following
#cmd = ['cat', filename]

proc = Popen(cmd, close_fds=True, stdout=PIPE, stderr=PIPE)
output = proc.stdout

FORMAT = [
    # 'foo',
    # 'bar',
]
def extract_log_data(line):
    '''Extact data in you log format, normalize it.
    '''
    return dict(zip(FORMAT, line))

csv.register_dialect('nginx', delimiter=' ', quoting=csv.QUOTE_MINIMAL)
lines = csv.reader(output, dialect='nginx')
started_at = dt.datetime.utcnow()
for line in lines:
    data = extract_log_data(line)
    print data
    if (dt.datetime.utcnow() - started_at) >= dt.timedelta(seconds=10):
        break

output.close()
proc.terminate()

score 1 · Accepted Answer

代码

from multiprocessing import Process
import time

def read_file(path):
    try:
        # open file for writing
        f = open(path, "r")
        try:
            for line in f:
                # do something
                pass

        # always close the file when leaving the try block 
        finally:
            f.close()

    except IOError:
        print "Failed to open/read from file '%s'" % (path)

def read_file_limited_time(path, max_seconds):

    # init Process
    p = Process(target=read_file, args=(path,))

    # start process
    p.start()

    # for max seconds 
    for i in range(max_seconds):

        # sleep for 1 seconds (you may change the sleep time to suit your needs)
        time.sleep(1)

        # if process is not alive, we can break the loop
        if not p.is_alive():
            break

    # if process is still alive after max_seconds, kiil it!
    if p.is_alive():
        p.terminate()

def main():
    path = "f1.txt"
    read_file_limited_time(path,10)

if __name__ == "__main__":
    main()

笔记

我们之所以每 1 秒“唤醒”一次并检查我们启动的进程是否还活着，只是为了防止我们在进程完成后继续休眠。如果进程在 1 秒后结束，则浪费时间睡眠 9 秒。

python - 读取文件十秒钟

2 回答 2

代码

笔记

Related

Reference