Pdf 正在生成,但它是空白的,我想在不丢失格式的情况下获取 pdf 中的 html 内容数据,所以我在这个唯一的空白 pdf 中尝试了这个代码正在生成
package config;
import com.lowagie.text.DocumentException;
import org.apache.commons.io.FileUtils;
import org.docx4j.org.xhtmlrenderer.pdf.ITextRenderer;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
public class removeHtmlTag {
public static void main(String [] args) throws DocumentException, IOException {
FileUtils.writeByteArrayToFile(new File("removeHtmlTag.pdf"), toPdf("<b>YouAAA gotta walk and don't look back</b>"));
}
/**
* Generate a PDF document
* @param html HTML as a string
* @return bytes of PDF document
*/
private static byte[] toPdf(String html) throws DocumentException, IOException {
final ITextRenderer renderer = new ITextRenderer();
renderer.setDocumentFromString(html);
renderer.layout();
try (ByteArrayOutputStream fos = new ByteArrayOutputStream(html.length())) {
renderer.createPDF(fos);
return fos.toByteArray();
}
}
}