我尝试检查 Windows 环境中的 PDF 文件是否损坏,并提出以下 python 代码。
只是想检查它是检查损坏的 PDF 文件的最佳方法还是有其他简单的方法?
注意:C:\Temp\python\sample-map (1).pdf
是损坏的PDF文件
这是示例代码
import os
import subprocess
import re
from subprocess import Popen, PIPE
def checkFile(fullfile):
proc=subprocess.Popen(["file", "-b", fullfile], shell=True, stdout=PIPE, stderr=PIPE, bufsize=0)
# -b, --brief : do not prepend filenames to output lines
out, err = proc.communicate()
exitcode = proc.returncode
return exitcode, out, err
def searchFiles(dirpath):
pwdpath=os.path.dirname(os.path.realpath(__file__))
print("running path : %s" %pwdpath )
if os.access(dirpath, os.R_OK):
print("Path %s validation OK \n" %dirpath)
listfiles=os.listdir(dirpath)
for files in listfiles:
fullfile=os.path.join(dirpath, files)
if os.access(fullfile, os.R_OK):
code, out, error = checkFile(fullfile)
if str(code) !="0" or str(error, "utf-8") != "" or re.search("^(?!PDF(\s)).*", str(out,'utf-8')):
print("ERROR " + fullfile+"\n################")
else:
print("OK " + fullfile+"\n################")
else:
print("$s : File not readable" %fullfile)
else:
print("Path is not valid")
if __name__ == "__main__":
searchFiles('C:\Temp\python')
样本输出:
$ "C:/Program Files (x86)/Python37-32/python.exe" c:/Users/myuser/python/check_pdf_file.py
running path : c:\Users\myuser\python
Path C:\Temp\python validation OK
OK C:\Temp\python\Induction Guide.pdf
################
ERROR C:\Temp\python\sample-map (1).pdf
################
OK C:\Temp\python\sample-map.pdf
################