0

当我尝试用 iTextSharp 解析一个大的 html 文件时,我的内存随着 200mb += 而增加

我尝试了以下方法:

public byte[] CreatePdfToBytArray(string foundText, StyleSheet styles = null, bool landscape = false)
    {
        using (var d = new Document())
        {
            if (landscape)
            {
                d.SetPageSize(PageSize.A4.Rotate());
            }
            using (var stream = new MemoryStream())
            {
                foundText = foundText.Replace("<br>", "/n");

                PdfWriter.GetInstance(d, stream);
                var stringReader = new StringReader(foundText);
                d.Open();
                var parser = new HTMLWorker(d);

                List<IElement> parsedList = HTMLWorker.ParseToList(stringReader, styles);

                foreach (object item in parsedList)
                {
                    d.Add((IElement) item);
                }

                d.Close();
                stringReader.Close();
                stringReader.Dispose();


                return stream.ToArray();
            }
        }
    }

protected byte[] ConvertHTMLToPDF(string HTMLCode)
    {
        using (var stream = new MemoryStream())
        {
            //Render PlaceHolder to temporary stream 
            var stringWrite = new StringWriter();
            var htmlWrite = new HtmlTextWriter(stringWrite);

            var reader = new StringReader(HTMLCode);

            //Create PDF document 
            var doc = new Document(PageSize.A4);
            var parser = new HTMLWorker(doc);
            PdfWriter.GetInstance(doc, stream);
            doc.Open();
            doc.Add(new Paragraph("I hope this works for you."));

            try
            {
                parser.Parse(reader);
            }
            catch (Exception ex)
            {
                //Display parser errors in PDF. 
                var paragraph = new Paragraph("Error!" + ex.Message);
                Chunk text = paragraph.Chunks[0];
                if (text != null)
                {
                    text.Font.Color = BaseColor.RED;
                }
                doc.Add(paragraph);
            }
            finally
            {
                doc.Close();
                parser.Close();
                parser.FlushContent();
                parser.Dispose();
                reader.Close();
                reader.Dispose();
            }

            return stream.ToArray();
        }
    }

两者都使我的内存激增,我知道问题出在 HTMLWorker.ParseToList 方法或 parse 方法中,有什么办法可以清除内存吗?

谢谢

4

0 回答 0