我正在尝试计算从 PDF 中提取的一系列单词,但我只得到 0 并且它不正确。
total_number_of_keywords = 0
pdf_file = "CapitalCorp.pdf"
tables=[]
words = ['blank','warrant ','offering','combination ','SPAC','founders']
count={} # is a dictionary data structure in Python
with pdfplumber.open(pdf_file) as pdf:
pages = pdf.pages
for i,pg in enumerate(pages):
tbl = pages[i].extract_tables()
for elem in words:
count[elem] = 0
for line in f'{i} --- {tbl}' :
elements = line.split()
for word in words:
count[word] = count[word]+elements.count(word)
print (count)