一种可能的(工作)解决方案:
Private Sub ReadXMLAttributes(ByVal oXML As String)
ReadXMLAttributes(oXML, "mso-infoPathSolution")
End Sub
Private Sub ReadXMLAttributes(ByVal oXML As String, ByVal oTagName As String)
Try
Dim XmlDoc As New Xml.XmlDocument
XmlDoc.LoadXml(oXML)
oFileInfo = New InfoPathDocument
Dim XmlNodes As Xml.XmlNodeList = XmlDoc.GetElementsByTagName(oTagName)
For Each xNode As Xml.XmlNode In XmlNodes
With xNode
oFileInfo.SolutionVersion = .Attributes(InfoPathSolution.solutionVersion).Value
oFileInfo.ProductVersion = .Attributes(InfoPathSolution.productVersion).Value
oFileInfo.PIVersion = .Attributes(InfoPathSolution.PIVersion).Value
oFileInfo.href = .Attributes(InfoPathSolution.href).Value
oFileInfo.name = .Attributes(InfoPathSolution.name).Value
End With
Next
Catch ex As Exception
MsgBox(ex.Message, MsgBoxStyle.OkOnly, "ReadXMLAttributes")
End Try
End Sub
这可行,但如果重新排序属性,它仍然会遇到以下问题。我能想到的避免这个问题的唯一方法是将属性名称硬编码到我的程序中,并让它通过遍历解析的标签并搜索指定的标签来处理条目。
注意:InfoPathDocument 是我制作的自定义类,并不复杂:
Public Class InfoPathDocument
Private _sVersion As String
Private _pVersion As String
Private _piVersion As String
Private _href As String
Private _name As String
Public Property SolutionVersion() As String
Get
Return _sVersion
End Get
Set(ByVal value As String)
_sVersion = value
End Set
End Property
Public Property ProductVersion() As String
Get
Return _pVersion
End Get
Set(ByVal value As String)
_pVersion = value
End Set
End Property
Public Property PIVersion() As String
Get
Return _piVersion
End Get
Set(ByVal value As String)
_piVersion = value
End Set
End Property
Public Property href() As String
Get
Return _href
End Get
Set(ByVal value As String)
If value.ToLower.StartsWith("file:///") Then
value = value.Substring(8)
End If
_href = Form1.PathToUNC(URLDecode(value))
End Set
End Property
Public Property name() As String
Get
Return _name
End Get
Set(ByVal value As String)
_name = value
End Set
End Property
Sub New()
End Sub
Sub New(ByVal oSolutionVersion As String, ByVal oProductVersion As String, ByVal oPIVersion As String, ByVal oHref As String, ByVal oName As String)
SolutionVersion = oSolutionVersion
ProductVersion = oProductVersion
PIVersion = oPIVersion
href = oHref
name = oName
End Sub
Public Function URLDecode(ByVal StringToDecode As String) As String
Dim TempAns As String = String.Empty
Dim CurChr As Integer = 1
Dim oRet As String = String.Empty
Try
Do Until CurChr - 1 = Len(StringToDecode)
Select Case Mid(StringToDecode, CurChr, 1)
Case "+"
oRet &= " "
Case "%"
oRet &= Chr(Val("&h" & Mid(StringToDecode, CurChr + 1, 2)))
CurChr = CurChr + 2
Case Else
oRet &= Mid(StringToDecode, CurChr, 1)
End Select
CurChr += 1
Loop
Catch ex As Exception
MsgBox(ex.Message, MsgBoxStyle.OkOnly, "URLDecode")
End Try
Return oRet
End Function
End Class
原始问题
我正在处理一个需要读取 XML 文档的项目,尤其是来自 Microsoft InfoPath 的已保存表单。
以下是我将使用的一个简单示例以及一些可能有用的背景信息:
<?xml version="1.0" encoding="UTF-8"?>
<?mso-infoPathSolution solutionVersion="1.0.0.2" productVersion="12.0.0" PIVersion="1.0.0.0" href="file:///C:\Users\darren\Desktop\simple_form.xsn" name="urn:schemas-microsoft-com:office:infopath:simple-form:-myXSD-2009-05-15T14-16-37" ?>
<?mso-application progid="InfoPath.Document" versionProgid="InfoPath.Document.2"?>
<my:myFields xmlns:my="http://schemas.microsoft.com/office/infopath/2003/myXSD/2009-05-15T14:16:37" xml:lang="en-us">
<my:first_name>John</my:first_name>
<my:last_name>Doe</my:last_name>
</my:myFields>
我现在的目标是提取表单的版本 ID 和位置。使用正则表达式很容易:
Dim _doc As New XmlDocument
_doc.Load(_thefile)
Dim oRegex As String = "^solutionVersion=""(?<sVersion>[0-9.]*)"" productVersion=""(?<pVersion>[0-9.]*)"" PIVersion=""(?<piVersion>[0-9.]*)"" href=""(?<href>.*)"" name=""(?<name>.*)""$"
Dim rx As New Regex(oRegex), m As Match = Nothing
For Each section As XmlNode In _doc.ChildNodes
m = rx.Match(section.InnerText.Trim)
If m.Success Then
Dim temp As String = m.Groups("name").Value.Substring(m.Groups("name").Value.ToLower.IndexOf("infopath") + ("infopath").Length + 1)
fileName = temp.Substring(0, temp.LastIndexOf(":"))
fileVersion = m.Groups("sVersion").Value
End If
Next
这个工作解决方案带来的唯一问题是,如果 InfoPath 文档标题中的架构发生变化……例如解决方案版本和产品版本属性交换位置(微软似乎喜欢这样做)。
所以我选择尝试使用VB.NET的XML解析能力来帮助我实现上述结果,无正则表达式。
ChildNode
来自包含我需要的信息的对象_doc
,但是它没有任何子节点:
_doc.ChildNode(1).HasChildNodes = False
谁能帮我解决这个问题?