我正在寻找用数据库数据表格填充预制pdf并“展平”它的最佳方法。现在我使用 pdftk,但它不能正确处理国家字符
是否有任何 python 库或示例如何填写 pdf 表单并将其呈现为不可编辑的 PDF?
试试 fillpdf 库,它使这个过程非常简单(pip install fillpdf
和 poppler 依赖conda install -c conda-forge poppler
)
基本用法:
from fillpdf import fillpdfs
fillpdfs.get_form_fields("blank.pdf")
# returns a dictionary of fields
# Set the returned dictionary values a save to a variable
# For radio boxes ('Off' = not filled, 'Yes' = filled)
data_dict = {
'Text2': 'Name',
'Text4': 'LastName',
'box': 'Yes',
}
fillpdfs.write_fillable_pdf('blank.pdf', 'new.pdf', data_dict)
# If you want it flattened:
fillpdfs.flatten_pdf('new.pdf', 'newflat.pdf')
更多信息在这里: https ://github.com/t-houssian/fillpdf
好像填的很好。
有关更多信息,请参见此处的答案:https ://stackoverflow.com/a/66809578/13537359
我们也可以考虑使用 API 而不是导入包来处理 PDF。这种方式有它自己的优点/缺点,但是它给了我们新的视角来增强我们的应用程序!
一个例子是使用 PDF.co API 来填写 PDF 表单。您还可以考虑其他替代方案,例如 Adobe API、DocSpring、pdfFiller 等。以下代码片段在演示使用预定义 JSON 有效负载填充 PDF 表单时可能很有用。
import os
import requests # pip install requests
# The authentication key (API Key).
# Get your own by registering at https://app.pdf.co/documentation/api
API_KEY = "**************************************"
# Base URL for PDF.co Web API requests
BASE_URL = "https://api.pdf.co/v1"
def main(args = None):
fillPDFForm()
def fillPDFForm():
"""Fill PDF form using PDF.co Web API"""
# Prepare requests params as JSON
# See documentation: https://apidocs.pdf.co
payload = "{\n \"async\": false,\n \"encrypt\": false,\n \"name\": \"f1040-filled\",\n \"url\": \"https://bytescout-com.s3-us-west-2.amazonaws.com/files/demo-files/cloud-api/pdf-form/f1040.pdf\",\n \"fields\": [\n {\n \"fieldName\": \"topmostSubform[0].Page1[0].FilingStatus[0].c1_01[1]\",\n \"pages\": \"1\",\n \"text\": \"True\"\n },\n {\n \"fieldName\": \"topmostSubform[0].Page1[0].f1_02[0]\",\n \"pages\": \"1\",\n \"text\": \"John A.\"\n }, \n {\n \"fieldName\": \"topmostSubform[0].Page1[0].f1_03[0]\",\n \"pages\": \"1\",\n \"text\": \"Doe\"\n }, \n {\n \"fieldName\": \"topmostSubform[0].Page1[0].YourSocial_ReadOrderControl[0].f1_04[0]\",\n \"pages\": \"1\",\n \"text\": \"123456789\"\n },\n {\n \"fieldName\": \"topmostSubform[0].Page1[0].YourSocial_ReadOrderControl[0].f1_05[0]\",\n \"pages\": \"1\",\n \"text\": \"Joan B.\"\n },\n {\n \"fieldName\": \"topmostSubform[0].Page1[0].YourSocial_ReadOrderControl[0].f1_05[0]\",\n \"pages\": \"1\",\n \"text\": \"Joan B.\"\n },\n {\n \"fieldName\": \"topmostSubform[0].Page1[0].YourSocial_ReadOrderControl[0].f1_06[0]\",\n \"pages\": \"1\",\n \"text\": \"Doe\"\n },\n {\n \"fieldName\": \"topmostSubform[0].Page1[0].YourSocial_ReadOrderControl[0].f1_07[0]\",\n \"pages\": \"1\",\n \"text\": \"987654321\"\n } \n\n\n\n ],\n \"annotations\":[\n {\n \"text\":\"Sample Filled with PDF.co API using /pdf/edit/add. Get fields from forms using /pdf/info/fields\",\n \"x\": 10,\n \"y\": 10,\n \"size\": 12,\n \"pages\": \"0-\",\n \"color\": \"FFCCCC\",\n \"link\": \"https://pdf.co\"\n }\n ], \n \"images\": [ \n ]\n}"
# Prepare URL for 'Fill PDF' API request
url = "{}/pdf/edit/add".format(BASE_URL)
# Execute request and get response as JSON
response = requests.post(url, data=payload, headers={"x-api-key": API_KEY, 'Content-Type': 'application/json'})
if (response.status_code == 200):
json = response.json()
if json["error"] == False:
# Get URL of result file
resultFileUrl = json["url"]
# Download result file
r = requests.get(resultFileUrl, stream=True)
if (r.status_code == 200):
with open(destinationFile, 'wb') as file:
for chunk in r:
file.write(chunk)
print(f"Result file saved as \"{destinationFile}\" file.")
else:
print(f"Request error: {response.status_code} {response.reason}")
else:
# Show service reported error
print(json["message"])
else:
print(f"Request error: {response.status_code} {response.reason}")
if __name__ == '__main__':
main()
您不需要一个库来展平 PDF,根据 Adobe Docs,您可以将可编辑表单字段的位位置更改为 1 以使字段只读。我在这里提供了一个完整的解决方案,但它使用了 Django:
https://stackoverflow.com/a/55301804/8382028
Adobe Docs(第 552 页):
https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/pdf_reference_archives/PDFReference.pdf
使用 PyPDF2 填充字段,然后循环注释以更改位位置:
from io import BytesIO
import PyPDF2
from PyPDF2.generic import BooleanObject, NameObject, IndirectObject, NumberObject
# open the pdf
input_stream = open("YourPDF.pdf", "rb")
pdf_reader = PyPDF2.PdfFileReader(input_stream, strict=False)
if "/AcroForm" in pdf_reader.trailer["/Root"]:
pdf_reader.trailer["/Root"]["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
pdf_writer = PyPDF2.PdfFileWriter()
set_need_appearances_writer(pdf_writer)
if "/AcroForm" in pdf_writer._root_object:
# Acro form is form field, set needs appearances to fix printing issues
pdf_writer._root_object["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
data_dict = dict() # this is a dict of your DB form values
pdf_writer.addPage(pdf_reader.getPage(0))
page = pdf_writer.getPage(0)
# update form fields
pdf_writer.updatePageFormFieldValues(page, data_dict)
for j in range(0, len(page['/Annots'])):
writer_annot = page['/Annots'][j].getObject()
for field in data_dict:
if writer_annot.get('/T') == field:
writer_annot.update({
NameObject("/Ff"): NumberObject(1) # make ReadOnly
})
output_stream = BytesIO()
pdf_writer.write(output_stream)
# output_stream is your flattened PDF
def set_need_appearances_writer(writer):
# basically used to ensured there are not
# overlapping form fields, which makes printing hard
try:
catalog = writer._root_object
# get the AcroForm tree and add "/NeedAppearances attribute
if "/AcroForm" not in catalog:
writer._root_object.update({
NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)})
need_appearances = NameObject("/NeedAppearances")
writer._root_object["/AcroForm"][need_appearances] = BooleanObject(True)
except Exception as e:
print('set_need_appearances_writer() catch : ', repr(e))
return writer