@Matth3w 代码很棒,但不兼容 (VB6 - Visual Basic 6)
我已将他的代码降级为 vb6 并添加了一些有用的额外代码
1) 如果您的 HTML 文本包含 Unicode (UTF-8) 字符,添加 (Microsoft Forms 2 Object Library) 并将其 (Textbox) 用于 (输入和输出)
2) 添加 2 个文本框和 1 个命令按钮
3)设置文本框属性:(MultiLine = true)(将字体更改为Tahoma或不是:Ms Sans Serif)(滚动条:3)
4) 将以下代码粘贴到代码区:
Private Sub Command1_Click()
TextBox2.Text = RemoveHTML(TextBox1.Text)
End Sub
Public Function RemoveHTML(HTMLstring As String) As String
Dim DoRec As Boolean
Dim textOut As String
Dim SkipMe As Boolean
Dim SkipMeTag As String
Dim tmp As String
HTMLstring = Replace(LCase(HTMLstring), "</p>", vbCrLf)
HTMLstring = Replace(LCase(HTMLstring), "<br>", vbCrLf)
HTMLstring = Replace(LCase(HTMLstring), "<br/>", vbCrLf)
HTMLstring = Replace(LCase(HTMLstring), "‌", " ")
HTMLstring = Replace(LCase(HTMLstring), " ", " ")
HTMLstring = Replace(LCase(HTMLstring), "§", "-")
HTMLstring = Replace(LCase(HTMLstring), "–", "-")
HTMLstring = Replace(LCase(HTMLstring), "—", "-")
HTMLstring = Replace(LCase(HTMLstring), "‏", "")
HTMLstring = Replace(LCase(HTMLstring), "“", ChrW(34))
HTMLstring = Replace(LCase(HTMLstring), "”", ChrW(34))
HTMLstring = Replace(LCase(HTMLstring), "‘", ChrW(34))
HTMLstring = Replace(LCase(HTMLstring), "’", ChrW(34))
HTMLstring = Replace(LCase(HTMLstring), "«", ChrW(34))
HTMLstring = Replace(LCase(HTMLstring), "»", ChrW(34))
For l = 1 To Len(HTMLstring)
tmp = Mid(HTMLstring, l, 1)
' Enable skip-me mode (for large blocks of non-readable code)
If tmp = "<" And Mid(HTMLstring, l + 1, 6) = "script" Then SkipMe = True: SkipMeTag = "script": DoRec = False
If tmp = "<" And Mid(HTMLstring, l + 1, 5) = "style" Then SkipMe = True: SkipMeTag = "style": DoRec = False
' If we're already in skip-me mode, then figure out iff it's time to exit it.
If SkipMe = True Then
If tmp = "<" And Mid(HTMLstring, l + 1, Len(SkipMeTag) + 1) = "/" + SkipMeTag Then
SkipMe = False
tmp = ""
l = l + Len(SkipMeTag) + 1
DoRec = False
End If
End If
' If we arent in skip-me mode, move on to handle parsing of the HTML content (pulling text out from in between tags)
If SkipMe = False Then
If tmp = ">" Then DoRec = True: textOut = textOut & " ": tmp = ""
If tmp = "<" Then DoRec = False: tmp = ""
If DoRec = True Then
textOut = textOut & tmp
End If
End If
Next
RemoveHTML = textOut
End Function
(支持波斯语)将旧波斯语 ي 更改为新波斯语 ی 您可以添加以下行:
HTMLstring = Replace(LCase(HTMLstring), ChrW(1610), ChrW(1740))
更新:这个函数有一个重要的错误。如果它在你的变量中没有找到任何 html 标签,它会返回空值!为了安全起见,请使用以下条件:
if len(RemoveHTML(variable))>0 then variable=RemoveHTML(variable)