我一直在尝试调试一些旨在通过字符串变量将网页转换为 PDF 文档的代码。它使用 iTextSharp c# 工具 (xmlworker),是对 sourceforge 源代码 html2pdf.csproj 附带的示例代码的修改。此示例代码将现有的 html 文件转换为 PDF 文件,并将其保存在与转换文件相同的目录中。我有一个包含 html 格式文本的字符串变量,我试图使它能够作为字节数组返回,该数组将传递给 Web 环境中的客户端以进行打印。问题是我收到一条“IOException 未被用户代码处理”消息,指出“文档没有页面”。我有点不确定这是什么意思,也不知道如何诊断问题。使用基于文件的系统的示例代码有效,我已成功将 html 字符串的静态版本转换为 PDF。下面是修改后的代码:
private byte[] createPDF(string html, string filename) {
MemoryStream msOutput = new MemoryStream();
string printPDFCSS = Server.MapPath("/content/printPDF.css");
Document doc = new Document(PageSize.LETTER);
doc.SetMargins(doc.LeftMargin, doc.RightMargin, 35, 0);
PdfWriter pdfWriter = PdfWriter.GetInstance(doc, msOutput);
doc.Open();
Dictionary<String, String> substFonts = new Dictionary<String, String>();
substFonts["Arial Unicode MS"] = "Helvetica";
CssFilesImpl cssFiles = new CssFilesImpl();
cssFiles.Add(XMLWorkerHelper.GetCSS(new FileStream(printPDFCSS, FileMode.Open)));
StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver(cssFiles);
HtmlPipelineContext hpc = new HtmlPipelineContext(new CssAppliersImpl(new UnembedFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS, substFonts)));
hpc.SetImageProvider(new ImageProvider(filename));
hpc.SetAcceptUnknown(true).AutoBookmark(true).SetTagFactory(Tags.GetHtmlTagProcessorFactory());
HtmlPipeline htmlPipeline = new HtmlPipeline(hpc, new PdfWriterPipeline(doc, pdfWriter));
IPipeline pipeline = new CssResolverPipeline(cssResolver, htmlPipeline);
XMLWorker worker = new XMLWorker(pipeline, true);
XMLParser xmlParse = new XMLParser(true, worker);
xmlParse.Parse(msOutput);
doc.Close();
return msOutput.ToArray();
}