0

我是 Python 菜鸟。

我正在尝试读取文本文件中的关键字列表,打开并读取许多文本文件以搜索关键字列表,为每个关键字的每个实例提取文本切片,然后将所有文本切片输出到 csv 文件。

我有下面的代码,但是当我运行它时,它只针对第一个文件搜索第一个关键字,写入 csv,然后关闭。我已经搜索了一个答案,但还没有完全找到我正在寻找的东西。

关于我在哪里搞砸的任何想法?

import os
import re

def formatText(s):
    s = s.replace("[", "")
    s = s.replace("]", "")
    s = s.replace(",", "")
    s = s.replace("[", "")
    s = s.replace("\n", " ")
    s = s.replace("\r", " ")
    return s

fdoc = wlitems = None
termct = None

f = open(r'filenames.txt')
fdoc = f.read()
flist = fdoc.split('\n')
if f: f.close()

wlist = open(r'keywords.txt')
try:
    wltxt = wlist.read()
    wlitems = wltxt.split('\n')
finally:
    if wlist: wlist.close()

if wlitems and fdoc:
    op = open(r'output.csv', 'w')
    try:
        termct = {}
        for chnk in flist:
            if not chnk == "":
                chnknum = flist.index(chnk)
                curfile = open(chnk)
                curfilecnts = curfile.read()
                ucurfilecnts = curfilecnts.upper()
                filelength = len(ucurfilecnts)
                for kwrd in wlitems:
                    if len(kwrd)>0:
                        curpos = 0
#                   print keyword
                        keyd = re.compile(kwrd,re.IGNORECASE | re.MULTILINE | re.DOTALL)
                        while curpos <= filelength:
                            sumline = keyd.search(ucurfilecnts,curpos)
#                   print sumline
                            if sumline:
                                curpos = sumline.end()
                                keydtxt = curfilecnts[sumline.start()-20: sumline.end()+200]
                                op.write(chnk + "," + kwrd + "," + str(curpos) + "," + formatText(keydtxt) + "\n")
                            else:
                                curpose = filelength +1
    finally:
            if op: op.close()
4

0 回答 0