1

我有一个 Paragraph 对象,并希望将它包含的内部文本转换为 HTML 片段。

我使用 Microsoft 的 Open XML SDK 2.0。

4

1 回答 1

0
    [Test]
    public void GetHTMLOutOfParagraphsWithoutHeadingInformation()
    {

        var paragraphs = new List<Paragraph>();
        StyleDefinitionsPart styles = null;

        // Open the file read-only since we don't need to change it.
        using (var wordprocessingDocument = WordprocessingDocument.Open(documentFileName, true))
        {
            paragraphs = wordprocessingDocument.MainDocumentPart.Document.Body
                .OfType<Paragraph>().ToList();
            styles = wordprocessingDocument.MainDocumentPart.StyleDefinitionsPart;

            foreach (var p in paragraphs)
            {
                using (var memoryStream = new MemoryStream())
                {
                    var doc = WordprocessingDocument.Create(memoryStream, WordprocessingDocumentType.Document);
                    doc.AddMainDocumentPart().AddPart(styles);
                    doc.MainDocumentPart.Document = new Document();
                    doc.MainDocumentPart.Document.Body = new Body();
                    doc.MainDocumentPart.Document.Body.Append(p.CloneNode(true));
                    doc.MainDocumentPart.Document.Save();
                    Console.WriteLine(GetHTMLOfDoc(doc));
                }
            }
        }
    }

    string GetHTMLOfDoc(WordprocessingDocument doc)
    {

        HtmlConverterSettings settings = new HtmlConverterSettings()
        {
            PageTitle = "Test Title",
            CssClassPrefix = "Pt",
            Css = "",
            ConvertFormatting = false,

        };

        XElement html = HtmlConverter.ConvertToHtml(doc, settings);
        var notNullAnyMore = html.XPathSelectElement("//*[local-name() = 'body']");
        return notNullAnyMore.ToStringNewLineOnAttributes();
    }
}
于 2012-08-14T01:05:48.327 回答