当我有坐标时,pdfbox 是否提供一些实用程序来突出显示文本?
文本的边界是已知的。
我知道还有其他库提供与 pdfclow 等相同的功能。但是 pdfbox 是否提供类似的功能?
好吧,我发现了这一点。很简单。
PDDocument doc = PDDocument.load(/*path to the file*/);
PDPage page = (PDPage)doc.getDocumentCatalog.getAllPages.get(i);
List annots = page.getAnnotations;
PDAnnotationTextMarkup markup = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.Su....);
markup.setRectangle(/*your PDRectangle*/);
markup.setQuads(/*float array of size eight with all the vertices of the PDRectangle in anticlockwise order*/);
annots.add(markup);
doc.save(/*path to the output file*/);
这是此处数字 1 的扩展答案,基本上与上面的代码相同。
改进了当前文档中相对于页面大小的坐标点,以及非常浅的黄色,有时如果单词短而小则很难看到。
还要突出显示从左上角到右上角的 X、Y 坐标的完整单词。从字符串中的第一个字符和最后一个字符获取坐标。
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
public class MainSource extends PDFTextStripper {
public MainSource() throws IOException {
super();
}
public static void main(String[] args) throws IOException {
PDDocument document = null;
String fileName = "C:/AnyPDFFile.pdf";
try {
document = PDDocument.load( new File(fileName) );
PDFTextStripper stripper = new MainSource();
stripper.setSortByPosition( true );
stripper.setStartPage( 0 );
stripper.setEndPage( document.getNumberOfPages() );
Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream());
stripper.writeText(document, dummy);
File file1 = new File("C:/AnyPDFFile-New.pdf");
document.save(file1);
}
finally {
if( document != null ) {
document.close();
}
}
}
/**
* Override the default functionality of PDFTextStripper.writeString()
*/
@Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
boolean isFound = false;
float posXInit = 0,
posXEnd = 0,
posYInit = 0,
posYEnd = 0,
width = 0,
height = 0,
fontHeight = 0;
String[] criteria = {"Word1", "Word2", "Word3", ....};
for (int i = 0; i < criteria.length; i++) {
if (string.contains(criteria[i])) {
isFound = true;
}
}
if (isFound) {
posXInit = textPositions.get(0).getXDirAdj();
posXEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth();
posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj();
posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1).getYDirAdj();
width = textPositions.get(0).getWidthDirAdj();
height = textPositions.get(0).getHeightDir();
System.out.println(string + "X-Init = " + posXInit + "; Y-Init = " + posYInit + "; X-End = " + posXEnd + "; Y-End = " + posYEnd + "; Font-Height = " + fontHeight);
/* numeration is index-based. Starts from 0 */
float quadPoints[] = {posXInit, posYEnd + height + 2, posXEnd, posYEnd + height + 2, posXInit, posYInit - 2, posXEnd, posYEnd - 2};
List<PDAnnotation> annotations = document.getPage(this.getCurrentPageNo() - 1).getAnnotations();
PDAnnotationTextMarkup highlight = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
PDRectangle position = new PDRectangle();
position.setLowerLeftX(posXInit);
position.setLowerLeftY(posYEnd);
position.setUpperRightX(posXEnd);
position.setUpperRightY(posYEnd + height);
highlight.setRectangle(position);
// quadPoints is array of x,y coordinates in Z-like order (top-left, top-right, bottom-left,bottom-right)
// of the area to be highlighted
highlight.setQuadPoints(quadPoints);
PDColor yellow = new PDColor(new float[]{1, 1, 1 / 255F}, PDDeviceRGB.INSTANCE);
highlight.setColor(yellow);
annotations.add(highlight);
}
}
}
这适用于 pdfbox 2.0.7
PDDocument document = /* get doc */
/* numeration is index-based. Starts from 0 */
List<PDAnnotation> annotations = document.getPage(yourPageNumber - 1).getAnnotations();
PDAnnotationTextMarkup highlight = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
highlight.setRectangle(PDRectangle.A4);
// quadPoints is array of x,y coordinates in Z-like order (top-left, top-right, bottom-left,bottom-right)
// of the area to be highlighted
highlight.setQuadPoints(quadPoints);
PDColor yellow = new PDColor(new float[]{1, 1, 204 / 255F}, PDDeviceRGB.INSTANCE);
highlight.setColor(yellow);
annotations.add(highlight);
注意:如果您将 doc 保存在文件中,则会显示此类注释,但它不会出现在从页面创建的图像中,因为没有为此注释创建 AppearanceStream。我用PDFBOX-3353的代码草稿解决了这个问题
最简单的方法...在所需位置绘制一个矩形并将高度设置为 1 并将填充颜色设置为黑色。或者 ...
使用 PDFBox ...
//create the page PDDocument doc = new PDDocument();
PDPage page1 = new PDPage();
doc.addPage(page1);
//create the stream
PDPageContentStream stream1 = new PDPageContentStream(doc, page1);
//to simply draw an underscore with the coordinates
//where the first is x start, second y start, third x end, fourth y end
stream1.drawLine(20, 740, 590, 740);
//to draw an underscore thicker than one pixel
//first x begin second y begin third length fourth thickness
stream1.addRect(345, 568, 70, 2);
stream1.setNonStrokingColor(Color.BLACK); stream1.fill();