使用 fitz(PyMuPDF 模块) 使用以下代码提取所有图像,以及小图标。我必须避免提取这些图标并仅获取图像。
import fitz
file = fitz.open("example.pdf")
pdf = fitz.open(file)
page = len(file)
for pic in range(page):
image_list = pdf.getPageImageList(pic)
j = 1
for image in image_list:
xref = image[0]
pix = fitz.Pixmap(pdf, xref)
#print(len(pix)+ 88)
if pix.n < 5:
pix.writePNG(f'{pic}_{j}.png')
else:
pix1 = fitz.open(fitz.csRGB, pix)
pix1.writePNG(f'{xref}_{pic}.png')
pix1 = None
pix = None
j = j + 1
print(f'Total images on page {pic} are {len(image_list)}')