所以,我按照承诺回答我自己的问题。
我开发了一种方法,可以从 word 文档段落中返回特定运行的“有效”运行属性。它根据标准 - ISO/IEC29500-1 考虑了默认文档属性、应用样式(包括相关样式层次结构和直接运行属性)。
有趣的是,Word 在这两个方面似乎并没有完全遵循标准: 1 - 如果一个段落没有应用任何样式,则 word 应用默认的段落样式。据我所知,我认为不应该应用任何样式。运行不会发生这种情况:当运行没有运行样式时,不会应用默认运行样式。2 - 为了获得有效的运行属性,有必要“汇总样式”。段落样式和运行样式遵循样式层次结构。为了获得特定的属性值,有必要在应用的样式中查找它,如果不存在则在父样式中查找它等等。如果属性具有相同的值,则不应将具有特定样式的特定值定义的属性添加到子样式中。Word 不遵循此规则的字符样式。事实上,从运行样式应用的所有运行属性都可以直接针对该运行样式获得,而无需遵循样式层次结构。这不符合标准。
现在,让我详细介绍一下我的解决方案:
首先,我的代码使用了 openxml 电动工具:
http ://powertools.codeplex.com/
接下来,为了汇总有关样式继承的样式,我改编并实现了 Eric White 在以下位置提供的解决方案:http:
//blogs.msdn.com/b/ericwhite/archive/2009/12/13/implementing-inheritance-in- xml.aspx
和
http://blogs.msdn.com/b/ericwhite/archive/2009/10/29/open-xml-wordprocessingml-style-inheritance.aspx
获取运行属性的完整算法可以在标准中找到,它也由 Eric White 提供,网址为:http:
//blogs.msdn.com/b/ericwhite/archive/2009/11/12/assemble-paragraph- and-run-properties-for-cells-in-a-table.aspx
在这种情况下,它涉及从表格内的单元格中提取属性。我的方法不适用于表格内的段落(我只是不需要它:-))但它可以扩展到处理这些情况(所有信息都在 Eric 的文章中)
请注意,我正确处理了切换属性和 word 的实际工作方式(我为与标准相关的差异所做的点。
最后,代码:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Wordprocessing;
using OpenXmlPowerTools;
namespace MyNameSpace
{
class OpenXmlPowerToolsUtilities
{
public static XElement GetEffectiveRunProperties(WordprocessingDocument wordDoc, XElement run)
{
XElement runProperties = null;
List<XElement> runPropertiesList = new List<XElement>();
XElement paragraph = run.Parent;
if (paragraph.Name != W.p)
return null;
StyleDefinitionsPart styleDefinitionsPart = wordDoc.MainDocumentPart
.StyleDefinitionsPart;
if (styleDefinitionsPart == null)
return null;
XElement styles = styleDefinitionsPart.GetXDocument().Root;
// 1 - Get run default
XElement runDefault = styles.Elements(W.docDefaults)
.Elements(W.rPrDefault)
.Elements(W.rPr)
.FirstOrDefault();
if (runDefault != null)
runPropertiesList.Add(runDefault);
// 2 - get paragraph style run properties
XElement pStyleRunProperties = null;
string pStyle = (string)paragraph.Elements(W.pPr)
.Elements(W.pStyle)
.Attributes(W.val)
.FirstOrDefault();
if (pStyle != null)
{
pStyleRunProperties = AssembleStyleInformation(styles, pStyle)
.Elements(W.rPr)
.FirstOrDefault();
}
else
{
XElement defaultParagraphStyle = styles
.Elements(W.style)
.Where(e =>
(string)e.Attribute(W.type) == "paragraph" &&
(string)e.Attribute(W._default) == "1")
.Select(s => s)
.FirstOrDefault();
pStyleRunProperties = defaultParagraphStyle.Elements(W.rPr).FirstOrDefault();
}
if (pStyleRunProperties != null)
runPropertiesList.Add(pStyleRunProperties);
// 3 - get run style run properties
string rStyle = (string)run.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault();
XElement rStyleRunProperties = null;
if (rStyle != null)
{
rStyleRunProperties = AssembleStyleInformation(styles, rStyle)
.Elements(W.rPr)
.FirstOrDefault();
}
if (rStyleRunProperties != null)
runPropertiesList.Add(rStyleRunProperties);
XElement toggleProperties = AssembleToggleProperties(runDefault, pStyleRunProperties, rStyleRunProperties);
if (toggleProperties != null)
runPropertiesList.Add(toggleProperties);
// 4 - direct run properties
XElement directRunProperties = run.Elements(W.rPr).FirstOrDefault();
if (directRunProperties != null)
runPropertiesList.Add(directRunProperties);
runProperties = AssembleRunProperties(runPropertiesList);
return runProperties;
}
private static XElement AssembleRunProperties(List<XElement> runPropertiesList)
{
return runPropertiesList
.Aggregate(
new XElement(W.rPr,
new XAttribute(XNamespace.Xmlns + "w", W.w)),
(mergedRun, run) =>
MergeChildElements(mergedRun, run));
}
static XElement AssembleToggleProperties(XElement runDefault, XElement pStyleRunProperties, XElement rStyleRunProperties)
{
XElement runToggleProperties;
runToggleProperties = new XElement(W.rPr,
new XAttribute(XNamespace.Xmlns + "w", W.w));
foreach (XName toggleProperty in toggleProperties)
{
XElement runDefaultToggleProperty = runDefault.Elements(toggleProperty).FirstOrDefault();
if (runDefaultToggleProperty != null)
{
if ((string)runDefaultToggleProperty.Attributes(W.val).FirstOrDefault() != "0")
{
runToggleProperties.Add(runDefaultToggleProperty);
continue;
}
}
XElement pStyleToggleProperty = null;
if (pStyleRunProperties == null)
pStyleToggleProperty = null;
else
pStyleToggleProperty = pStyleRunProperties.Elements(toggleProperty).FirstOrDefault();
XElement rStyleToggleProperty = null;
if (rStyleRunProperties == null)
rStyleToggleProperty = null;
else
rStyleToggleProperty = rStyleRunProperties.Elements(toggleProperty).FirstOrDefault();
if (pStyleToggleProperty == null && rStyleToggleProperty != null)
runToggleProperties.Add(rStyleToggleProperty);
else if (pStyleToggleProperty != null && rStyleToggleProperty == null)
runToggleProperties.Add(pStyleToggleProperty);
else if (pStyleToggleProperty != null && rStyleToggleProperty != null)
{
if ((string)rStyleToggleProperty.Attributes(W.val).FirstOrDefault() == "0")
runToggleProperties.Add(pStyleToggleProperty);
else if ((string)pStyleToggleProperty.Attributes(W.val).FirstOrDefault() == "0")
runToggleProperties.Add(rStyleToggleProperty);
else
runToggleProperties.Add(new XElement(toggleProperty, new XAttribute(W.val, "0")));
}
}
return runToggleProperties;
}
public static IEnumerable<XElement> StyleChainReverseOrder(XElement styles, string styleId)
{
string current = styleId;
while (true)
{
XElement style = styles.Elements(W.style)
.Where(s => (string)s.Attribute(W.styleId) == current).FirstOrDefault();
yield return style;
current = (string)style.Elements(W.basedOn).Attributes(W.val).FirstOrDefault();
if (current == null)
yield break;
}
}
public static IEnumerable<XElement> StyleChain(XElement styles, string styleId)
{
return StyleChainReverseOrder(styles, styleId).Reverse();
}
private static XElement AssembleStyleInformation(XElement styles, string styleId)
{
return StyleChain(styles, styleId)
.Aggregate(
new XElement(W.style, new XAttribute(XNamespace.Xmlns + "w", W.w)),
(mergedStyle, style) => MergeChildElements(mergedStyle, style));
}
public static XName[] Others =
{
W.pStyle,
W.rStyle
};
public static XName[] ElementsWithMergeElementsSemantics =
{
W.style,
W.rPr,
W.pPr
};
public static XName[] ElementsWithMergeAttributesSemantics =
{
W.ind,
W.spacing,
W.lang
};
public static XName[] ElementsWithReplaceElementsSemantics =
{
W.name, // The style Name element
W.adjustRightInd,
W.autoSpaceDE,
W.autoSpaceDN,
W.bidi,
W.cnfStyle, // within a table
W.contextualSpacing,
W.divId,
W.framePr,
W.jc,
W.keepLines,
W.keepNext,
W.kinsoku,
W.mirrorIndents,
W.numPr,
W.outlineLvl,
W.overflowPunct,
W.pageBreakBefore,
W.pBdr,
W.shd,
W.snapToGrid,
W.suppressAutoHyphens,
W.suppressLineNumbers,
W.suppressOverlap,
W.tabs,
W.textAlignment,
W.textboxTightWrap, // within a textbox
W.textDirection,
W.topLinePunct,
W.widowControl,
W.wordWrap,
W.b,
W.bCs,
W.bdr,
W.caps,
W.color,
W.cs,
W.dstrike,
W.eastAsianLayout,
W.effect,
W.em,
W.emboss,
W.fitText,
W.highlight,
W.i,
W.iCs,
W.imprint,
W.kern,
W.noProof,
W.oMath,
W.outline,
W.position,
W.rFonts,
W.rtl,
W.shadow,
W.shd,
W.smallCaps,
W.snapToGrid,
//W.spacing, // different from paragraph spacing
W.specVanish,
W.strike,
W.sz,
W.szCs,
W.u,
W.vanish,
W.vertAlign,
W._w,
W.webHidden
};
public static XName[] toggleProperties =
{
W.b,
W.bCs,
W.caps,
W.emboss,
W.i,
W.iCs,
W.imprint,
W.outline,
W.shadow,
W.smallCaps,
W.strike,
W.vanish
};
public static bool IsValidMergeElement(XName name)
{
if (ElementsWithMergeAttributesSemantics.Contains(name) ||
ElementsWithMergeElementsSemantics.Contains(name) ||
ElementsWithReplaceElementsSemantics.Contains(name))
return true;
return false;
}
public static bool IsToggleProperty(XName name)
{
if (toggleProperties.Contains(name))
return true;
return false;
}
public static bool HasReplaceSemantics(XName name)
{
if (ElementsWithReplaceElementsSemantics.Contains(name))
return true;
return false;
}
public static bool HasMergeElementsSemantics(XName name)
{
if (ElementsWithMergeElementsSemantics.Contains(name))
return true;
return false;
}
public static bool HasMergeAttributesSemantics(XName name)
{
if (ElementsWithMergeAttributesSemantics.Contains(name))
return true;
return false;
}
public static XElement MergeChildElements(XElement mergedElement, XElement element)
{
if (mergedElement == null || element == null)
{
if (element == null)
element = mergedElement;
XElement newElement = new XElement(element.Name,
new XAttribute(XNamespace.Xmlns + "w", W.w),
element.Attributes()
.Where(a =>
{
if (a.IsNamespaceDeclaration)
return false;
if (element.Name == W.style)
if (!(a.Name == W.type || a.Name == W.styleId))
return false;
return true;
}),
element.Elements().Select(e =>
{
if (e.Name == W.rPr || e.Name == W.pPr)
return MergeChildElements(null, e);
if (IsValidMergeElement(e.Name))
return e;
return null;
}));
return newElement;
}
XElement newMergedElement = new XElement(element.Name,
new XAttribute(XNamespace.Xmlns + "w", W.w),
element.Attributes()
.Where(a =>
{
if (a.IsNamespaceDeclaration)
return false;
if (element.Name == W.style)
if (!(a.Name == W.type || a.Name == W.styleId))
return false;
return true;
}),
element.Elements().Select(e =>
{
if (HasReplaceSemantics(e.Name))
return e;
// spacing within run properties has replace semantics
if (element.Name == W.rPr && e.Name == W.spacing)
return e;
if (HasMergeAttributesSemantics(e.Name))
{
XElement newElement;
newElement = new XElement(e.Name,
e.Attributes(),
mergedElement.Elements(e.Name).Attributes()
.Where(a =>
!(e.Attributes().Any(z => z.Name == a.Name))));
return newElement;
}
if (e.Name == W.rPr || e.Name == W.pPr)
{
XElement correspondingElement = mergedElement.Element(e.Name);
return MergeChildElements(correspondingElement, e);
}
return null;
}),
mergedElement.Elements()
.Where(m => !element.Elements(m.Name).Any()));
return newMergedElement;
}
}
}