1

如果我们假设一个 PDF 文档“doc.pdf”包含“hello world”作为一个简单的字符串。

让我们考虑这段代码:

//read the document
    DDocument doc;
    doc= PDDocument.load("doc.pdf");
//extract all the pages from the document and put them in a list
    List pages = doc.getDocumentCatalog().getAllPages(); 
//extract the page number 0
    PDPage page = (PDPage) pages.get(0);
//analyse the content stream
    PDStream contents = page.getContents();
    PDFStreamParser parser = new PDFStreamParser(contents.getStream());
//parsing the extracted contents
    parser.parse(); 
    List tokens = parser.getTokens();
    for (int o = 0; o < tokens.size(); o++)  
    {
    Object next = tokens.get(o); 
//if this content is an operator
    if (next instanceof PDFOperator)  {
    PDFOperator op = (PDFOperator) next;
/and if this operator is a Tj
    if (op.getOperation().equals("Tj"))
    {
//now i want to access to this string
    COSString previous = (COSString) tokens.get(o - 1);  
    String string = previous.getString();
//rendering mode invisible the string in the document
    tokens.set(o-1, COSInteger.get(3));
    tokens.set(o, PDFOperator.getOperator("Tr"));
    tokens.add(++o, new COSString(string));
    tokens.add(++o, PDFOperator.getOperator("Tj"));
    tokens.add(++o, COSInteger.get(0));
    tokens.add(++o, PDFOperator.getOperator("Tr"));
    tokens.add(++o, new COSString(""));
    tokens.add(++o, PDFOperator.getOperator("Tj"));
    }
//update the modified stream
        PDStream updatedStream = new PDStream(doc);  
        OutputStream out = updatedStream.createOutputStream();  
        ContentStreamWriter tokenWriter = new ContentStreamWriter(out);  
        tokenWriter.writeTokens(tokens);  
        page.setContents(updatedStream);
        }
//construct a new object that contains the string "My name is Liszt" and take (15 31) as a specific position
        PDPageContentStream content = new PDPageContentStream(doc, page, true, false);
        PDFont font= PDType1Font.HELVETICA;
        content.setFont(font, 12);
        content.beginText();
        content.appendRawCommands("15 31 Td");
        content.appendRawCommands("(My name is Liszt)Tj\n");
        content.close();
        content.endText();
        doc.save("modified_doc.pdf");          
            }
        }

现在,让我们考虑同一个文档“doc.pdf”,但我想编写另一个代码来验证文档是否还包含 TJ 运算符,而不仅仅是 Tj。

所以我尝试编写第二个代码,但我需要帮助来编辑它,以解决我的所有错误并获得与第一个代码相同的结果。

PDDocument doc;
doc= PDDocument.load("doc.pdf");
List pages = doc.getDocumentCatalog().getAllPages(); 
PDPage page = (PDPage) pages.get(0);
PDStream contents = page.getContents();
PDFStreamParser parser = new PDFStreamParser(contents.getStream());
parser.parse(); 
List tokens = parser.getTokens();
for (int o = 0; o < tokens.size(); o++)  
{
Object next = tokens.get(o); 
if (next instanceof PDFOperator)  {
PDFOperator op = (PDFOperator) next;
if (op.getOperation().equals("Tj"))
{
COSString previous = (COSString) tokens.get(o - 1);  
String string = previous.getString();
tokens.set(o-1, COSInteger.get(3));
tokens.set(o, PDFOperator.getOperator("Tr"));
tokens.add(++o, new COSString(string));
tokens.add(++o, PDFOperator.getOperator("Tj"));
tokens.add(++o, COSInteger.get(0));
tokens.add(++o, PDFOperator.getOperator("Tr"));
tokens.add(++o, new COSString(""));
tokens.add(++o, PDFOperator.getOperator("Tj"));

}else if(op.getOperation().equals("TJ")){
COSArray previous = (COSArray) tokens.get(o - 1); 
for (int k = 0; k < previous.size(); k++)  
{
Object arrElement = previous.getObject(k);
if (arrElement instanceof COSString)  
{
COSString cosString = (COSString) arrElement; 
String string = cosString.getString(); 
// i get errors in the instructions below 
tokens.set(o-1, COSInteger.get(3));
tokens.set(o, PDFOperator.getOperator("Tr"));
tokens.add(++o, new COSString(string));
tokens.add(++o, PDFOperator.getOperator("TJ"));
tokens.add(++o, COSInteger.get(0));
tokens.add(++o, PDFOperator.getOperator("Tr"));
tokens.add(++o, new COSString(""));
tokens.add(++o, PDFOperator.getOperator("TJ"));
}
}
}                        
}
PDStream updatedStream = new PDStream(doc);  
OutputStream out = updatedStream.createOutputStream();  
ContentStreamWriter tokenWriter = new ContentStreamWriter(out);  
tokenWriter.writeTokens(tokens);  
page.setContents(updatedStream);
}

//how to write this object for both Tj and TJ ?
    PDPageContentStream content = new PDPageContentStream(doc, page, true, false);
    PDFont font= PDType1Font.HELVETICA;
    content.setFont(font, 12);
    content.beginText();
    content.appendRawCommands("15 31 Td");
    content.appendRawCommands("(My name is Liszt)TJ\n");
    content.close();
    content.endText();
    doc.save("modified_doc.pdf");          
    }
    }  

最好的祝福,

李斯特。

4

1 回答 1

1

您的代码中有很多问题。

在您的顶级代码部分中,您有

content.close();
content.endText();

你应该endText先打电话close.

稍后,您的TJ特定代码如下所示(格式化后):

else if (op.getOperation().equals("TJ"))
{
    COSArray previous = (COSArray) tokens.get(o - 1); 
    for (int k = 0; k < previous.size(); k++)  
    {
        Object arrElement = previous.getObject(k);
        if (arrElement instanceof COSString)  
        {
            COSString cosString = (COSString) arrElement; 
            String string = cosString.getString(); 
            // i get errors in the instructions below 
            tokens.set(o-1, COSInteger.get(3));
            tokens.set(o, PDFOperator.getOperator("Tr"));
            tokens.add(++o, new COSString(string));
            tokens.add(++o, PDFOperator.getOperator("TJ"));
            tokens.add(++o, COSInteger.get(0));
            tokens.add(++o, PDFOperator.getOperator("Tr"));
            tokens.add(++o, new COSString(""));
            tokens.add(++o, PDFOperator.getOperator("TJ"));
        }
    }
}                        

在你的循环中k

  1. 您覆盖列表的位置o-1和。虽然第一次这样做是有道理的(删除原来的TJ操作),但之后就不行了。otokens

    我建议在阅读数组参数后明确删除,稍后仅使用add;

  2. 您仅使用之前添加TJ操作,而COSString不是TJ期望的。COSArray

    我建议使用Tj作为COSString参数,或者如果它应该是TJ,则将字符串包装到一个数组中;

  3. 您忽略原始previous数组的数字内容。

顺便说一句,你为什么不干脆做那个循环

else if (op.getOperation().equals("TJ"))
{
    COSArray previous = (COSArray) tokens.get(o - 1);

    tokens.set(o-1, COSInteger.get(3));
    tokens.set(o, PDFOperator.getOperator("Tr"));
    tokens.add(++o, previous);
    tokens.add(++o, PDFOperator.getOperator("TJ"));
    tokens.add(++o, COSInteger.get(0));
    tokens.add(++o, PDFOperator.getOperator("Tr"));
}                        

在这种情况下,您将不得不说出您尝试实现的目标。

于 2013-09-16T14:55:30.150 回答