我尝试将目录的所有 pdf 转换为图像,但我有一个问题,因为它只转换一个 pdf 而不是全部。
import matplotlib
import pytesseract
import os
import argparse
import cv2
from PIL import Image
import PyPDF2
from wand.image import Image as wi
for filename in os.listdir(src_path):
count = count + 1
# apply tesseract OCR and write to text file in specified target directory
target_path = args.trg_dir
# check if a file is a directory
if os.path.isdir(filename):
pass
else:
# check if a file is a pdf
try:
PyPDF2.PdfFileReader(open(os.path.join(src_path, filename), "rb"))
except PyPDF2.utils.PdfReadError:
else:
pdf = wi(filename=filename, resolution=300)
pdfimage = pdf.convert("jpeg")
i=1
for img in pdfimage.sequence:
page = wi(image=img)
page.save(filename=str(i)+".jpg")
i +=1