我是 Python 菜鸟。
我正在尝试读取文本文件中的关键字列表,打开并读取许多文本文件以搜索关键字列表,为每个关键字的每个实例提取文本切片,然后将所有文本切片输出到 csv 文件。
我有下面的代码,但是当我运行它时,它只针对第一个文件搜索第一个关键字,写入 csv,然后关闭。我已经搜索了一个答案,但还没有完全找到我正在寻找的东西。
关于我在哪里搞砸的任何想法?
import os
import re
def formatText(s):
s = s.replace("[", "")
s = s.replace("]", "")
s = s.replace(",", "")
s = s.replace("[", "")
s = s.replace("\n", " ")
s = s.replace("\r", " ")
return s
fdoc = wlitems = None
termct = None
f = open(r'filenames.txt')
fdoc = f.read()
flist = fdoc.split('\n')
if f: f.close()
wlist = open(r'keywords.txt')
try:
wltxt = wlist.read()
wlitems = wltxt.split('\n')
finally:
if wlist: wlist.close()
if wlitems and fdoc:
op = open(r'output.csv', 'w')
try:
termct = {}
for chnk in flist:
if not chnk == "":
chnknum = flist.index(chnk)
curfile = open(chnk)
curfilecnts = curfile.read()
ucurfilecnts = curfilecnts.upper()
filelength = len(ucurfilecnts)
for kwrd in wlitems:
if len(kwrd)>0:
curpos = 0
# print keyword
keyd = re.compile(kwrd,re.IGNORECASE | re.MULTILINE | re.DOTALL)
while curpos <= filelength:
sumline = keyd.search(ucurfilecnts,curpos)
# print sumline
if sumline:
curpos = sumline.end()
keydtxt = curfilecnts[sumline.start()-20: sumline.end()+200]
op.write(chnk + "," + kwrd + "," + str(curpos) + "," + formatText(keydtxt) + "\n")
else:
curpose = filelength +1
finally:
if op: op.close()