这里的一些解决方案需要参考 MS Word 对象库。玩弄我的牌,我找到了一个不依赖它的解决方案。它在 VBA 中去除 RTF 标记和其他绒毛,如字体表和样式表。它可能对你有帮助。我在您的数据中运行它,除了空格之外,我得到的输出与您预期的相同。
Public Function Alphanumeric(Character As String) As Boolean
If InStr("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-", Character) Then
Alphanumeric = True
Alphanumeric = False
End If
End Function
Public Function RemoveGroup(RTFString As String, GroupName As String) As String
Dim I As Integer
Dim J As Integer
Dim Count As Integer
I = InStr(RTFString, "{\" & GroupName)
' If the group was not found in the RTF string, then just return that string unchanged.
If I = 0 Then
RemoveGroup = RTFString
Exit Function
End If
' Otherwise, we will need to scan along, from the start of the group, until we find the end of the group.
' The group is delimited by { and }. Groups may be nested, so we need to count up if we encounter { and
' down if we encounter }. When that count reaches zero, then the end of the group has been found.
J = I
If Mid(RTFString, J, 1) = "{" Then Count = Count + 1
If Mid(RTFString, J, 1) = "}" Then Count = Count - 1
J = J + 1
Loop While Count > 0
RemoveGroup = Replace(RTFString, Mid(RTFString, I, J - I), "")
End Function
Public Function RemoveTags(RTFString As String) As String
Dim L As Long
Dim R As Long
L = 1
' Search to the end of the string.
While L < Len(RTFString)
' Append anything that's not a tag to the return value.
While Mid(RTFString, L, 1) <> "\" And L < Len(RTFString)
RemoveTags = RemoveTags & Mid(RTFString, L, 1)
L = L + 1
'Search to the end of the tag.
R = L + 1
While Alphanumeric(Mid(RTFString, R, 1)) And R < Len(RTFString)
R = R + 1
L = R
End Function
Public Function RemoveBraces(RTFString As String) As String
RemoveBraces = Replace(RTFString, "{", "")
RemoveBraces = Replace(RemoveBraces, "}", "")
End Function
Public Function RemoveTheFluff(RTFString As String) As String
RemoveTheFluff = Replace(RTFString, vbCrLf, "")
RemoveTheFluff = RemoveGroup(RemoveTheFluff, "fonttbl")
RemoveTheFluff = RemoveGroup(RemoveTheFluff, "colortbl")
RemoveTheFluff = RemoveGroup(RemoveTheFluff, "stylesheet")
RemoveTheFluff = RemoveTags(RemoveBraces(RemoveTheFluff))
End Function