有没有可以工作的代码片段?我已经尝试过将pdf转换为html
from pdfminer.pdfinterp import PDFResourceManager
from pdfminer.pdfpage import PDFPage
from pdfminer.converter import HTMLConverter, TextConverter
from pdfminer.layout import LAParams
import os
import contextlib
import tempfile
rsrcmgr = PDFResourceManager()
laparams = LAParams()
converter = HTMLConverter if format == 'html' else TextConverter
out_file = "A:\folder"
in_file = "A:\folder\pyhtml.html"
pdf_filename = 'insurance.pdf'
device = converter(rsrcmgr, out_file, codec='utf-8', laparams=laparams)
PDFPage.get_pages(rsrcmgr, device, in_file, pagenos=[1], maxpages=1)
with contextlib.closing(tempfile.NamedTemporaryFile(mode='r', suffix='.xml')) as xmlin:
cmd = 'pdftohtml -xml -nodrm -zoom 1.5 -enc UTF-8 -noframes "%s" "%s"' % (
pdf_filename, xmlin.name.rpartition('.')[0])
os.system(cmd + " >/dev/null 2>&1")
result = xmlin.read().decode('utf-8')
当我运行上面的代码时,它给了我以下错误
Traceback (most recent call last):
File "a:\folder\new - Copy.py", line 14, in <module>
device = converter(rsrcmgr, out_file, codec='utf-8', laparams=laparams)
AttributeError: 'str' object has no attribute 'write'