本节从 PyPDF2 库中导入必要的类
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.pdf import ContentStream
from PyPDF2.generic import TextStringObject, NameObject
from PyPDF2.utils import b_
>The watermark says SAMPLE on it so I've tried different capitalization cases
wm_text = 'Sample'
replace_with = ''
>I'm hoping to just replace the SAMPLE watermark with nothing so a space could suffice
> Load PDF into pyPDF
source = PdfFileReader(open('input.pdf', "rb"))
output = PdfFileWriter()
> For each page
for page in range(source.getNumPages()):
# Get the current page and it's contents
page = source.getPage(page)
content_object = page["/Contents"].getObject()
content = ContentStream(content_object, source)
> Loop over all pdf elements
for operands, operator in content.operations:
被告知根据我的 PDF 文件调整这部分
if operator == b_("TJ"):
text = operands[0][0]
if isinstance(text, TextStringObject) and text.startswith(wm_text):
operands[0] = TextStringObject(replace_with)
将修改后的内容设置为页面上的内容对象
page.__setitem__(NameObject('/Contents'), content)
将页面添加到输出
output.addPage(page)
写流 outputStream = open("output.pdf", "wb") output.write(outputStream)