您可以使用下面的代码
import PyPDF2
def convert_pdf_to_text (document):
read_pdf = PyPDF2.PdfFileReader(document, strict=False)
number_of_pages = read_pdf.getNumPages()
alltext1=""
for page_number in range(number_of_pages):
page = read_pdf.getPage(page_number)
alltext1 += page.extractText()
return alltext1.replace("\n", "")
convert_pdf_to_text ('pdf_test.pdf')
输出
'A Simple PDF File This is a small demonstration .pdf file - just for use in the Virtual Mechanics tutorials. More text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. Boring, zzzzz. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. And more text. Even more. Continued on page 2 ... Details State: State_name City: City_name Country: Country_name Rig No: 4455555 Source Id: k4-3k44 '