import os
import io
from PIL import Image
import pytesseract
from wand.image import Image as wi
import gc
pdfim=wi(filename="salem-father.pdf",resolution=300)
PolicyError: 未授权“salem-father.pdf”@error/constitute.c/ReadImage/412
import os
import io
from PIL import Image
import pytesseract
from wand.image import Image as wi
import gc
pdfim=wi(filename="salem-father.pdf",resolution=300)
PolicyError: 未授权“salem-father.pdf”@error/constitute.c/ReadImage/412
You can convert or extract images from pdf file to jpg or its format using below code.
requirements.txt :
PyMuPDF==1.16.5
python-dateutil==2.8.0
pytz==2019.3
six==1.12.0
code:
import fitz
import random, string
doc = "mypdf.pdf" # path to pdf file
doc = fitz.open(doc)
pno = doc.loadPage(4) # enter the page
text = pno.getText('dict')# dict format of the file
blocks = text["blocks"]
imgblocks = [b for b in blocks if b["type"] == 1]
x = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase +
string.digits) for _ in range(16))
if imgblocks:
for index, img in enumerate(imgblocks):
img_name1 = "%s-%s.%s" % (x, index, img['ext']) # png
img_name2 = "%s-%s.jpg" % (x, index) # jpg
with open(img_name1, 'wb') as f:
f.write(img['image'])
with open(img_name2, 'wb') as f:
f.write(img['image'])