我有一个 Paragraph 对象,并希望将它包含的内部文本转换为 HTML 片段。
我使用 Microsoft 的 Open XML SDK 2.0。
[Test]
public void GetHTMLOutOfParagraphsWithoutHeadingInformation()
{
var paragraphs = new List<Paragraph>();
StyleDefinitionsPart styles = null;
// Open the file read-only since we don't need to change it.
using (var wordprocessingDocument = WordprocessingDocument.Open(documentFileName, true))
{
paragraphs = wordprocessingDocument.MainDocumentPart.Document.Body
.OfType<Paragraph>().ToList();
styles = wordprocessingDocument.MainDocumentPart.StyleDefinitionsPart;
foreach (var p in paragraphs)
{
using (var memoryStream = new MemoryStream())
{
var doc = WordprocessingDocument.Create(memoryStream, WordprocessingDocumentType.Document);
doc.AddMainDocumentPart().AddPart(styles);
doc.MainDocumentPart.Document = new Document();
doc.MainDocumentPart.Document.Body = new Body();
doc.MainDocumentPart.Document.Body.Append(p.CloneNode(true));
doc.MainDocumentPart.Document.Save();
Console.WriteLine(GetHTMLOfDoc(doc));
}
}
}
}
string GetHTMLOfDoc(WordprocessingDocument doc)
{
HtmlConverterSettings settings = new HtmlConverterSettings()
{
PageTitle = "Test Title",
CssClassPrefix = "Pt",
Css = "",
ConvertFormatting = false,
};
XElement html = HtmlConverter.ConvertToHtml(doc, settings);
var notNullAnyMore = html.XPathSelectElement("//*[local-name() = 'body']");
return notNullAnyMore.ToStringNewLineOnAttributes();
}
}