0

我难住了。我不想在我的应用程序中使用 WebBrowser,我想通过 id 获取特定元素。我的代码是:

Dim request As System.Net.HttpWebRequest = System.Net.HttpWebRequest.Create("http://www.google.com/finance?q=NASDAQ:GOOG")
Dim response As System.Net.HttpWebResponse = request.getresponse()
Dim sr As System.IO.StreamReader = New System.IO.StreamReader(response.GetResponseStream())
Dim sourcecode As String = sr.ReadToEnd()
TextBox1.Text = sourcecode

这让我得到了源代码。但是我如何获得一个特定的元素?我认为有一种简单的方法可以做到这一点......顺便说一句,我不想​​使用正则表达式,也不想下载 HTML Agility Pack。

4

2 回答 2

0

您可以制作一个解析表来识别 html 标签,并在标签内搜索id=elementname(加上可能的空白字符)。这似乎不是不可能完成的任务,因为您可以忽略大多数标签并且不必验证 html。只考虑<>,忽略引号、脚本等的内容。还有很多细节,需要一点工作,但编程很有趣。

另一种方法是下载 html 敏捷包之类的东西,使用浏览器或使用您希望避免的正则表达式。

于 2013-01-15T05:30:43.233 回答
0

这是一个非常粗略的想法,它不适用于需要单独关闭标签(如)的 BLOCK 元素,但它适用于自关闭元素,如

我还注意到,有些标签 ID 包含在语音标记中,有些则没有,所以你可能不得不调整它......

我只是粗略地整理了这段代码并复制粘贴了例程以检测未封闭的 id 标签,但它仍然需要处理它并且也可以缩短。

<script runat="server">
Dim sourcecode As String
Dim bodycode As String
Dim RetVal As String

Protected Sub Page_Load(sender As Object, e As System.EventArgs)
    '
    LoadHttpStuff()
    If Request.Form("Button1") = "Submit" Then
        RetVal = MyGetElementById(Request("Text1"))
    End If

End Sub

Private Sub LoadHttpStuff()

    Dim request As System.Net.HttpWebRequest
    Dim response As System.Net.HttpWebResponse
    Dim sr As System.IO.StreamReader
    Dim finishat As Long
    Dim startat As Long

    request = System.Net.HttpWebRequest.Create("http://www.google.com/finance?q=NASDAQ:GOOG")
    response = request.GetResponse()
    sr = New System.IO.StreamReader(response.GetResponseStream())
    sourcecode = sr.ReadToEnd()
    startat = InStr(sourcecode, "<body>")
    finishat = InStr(sourcecode, "</body>") + 7
    bodycode = Mid(sourcecode, startat, finishat - startat)
    bodycode = LCase(bodycode)


End Sub

Private Function MyGetElementById(Id As String) As String
    Dim tagstart As Long
    Dim tagend As Long
    Dim posx As Long
    Dim item As System.Web.UI.HtmlControls.HtmlGenericControl
    Dim test As Boolean
    Dim letter As Char
    Dim text As String
    item = Nothing
    test = False
    text = ""
    If Trim(Id) <> "" Then
        '-> with SPEECHMARKS
        posx = InStr(bodycode, LCase("id=" & Chr(34) & Id & Chr(34)))
        If posx > 0 Then
            'find start of tag
            Do
                posx = posx - 1
                letter = Mid(bodycode, posx, 1)
                If letter = "<" Then
                    'found tag start
                    tagstart = posx
                    Exit Do
                End If
            Loop Until posx < 1
            If tagstart > 0 Then
                posx = InStr(bodycode, LCase("id=" & Chr(34) & Id & Chr(34)))
                Do
                    posx = posx + 1
                    letter = Mid(bodycode, posx, 1)
                    If letter = ">" Then
                        tagend = posx + 1
                        Exit Do
                    End If
                Loop Until posx >= Len(bodycode)
                If tagend > 0 Then
                    text = Mid(bodycode, tagstart, tagend - tagstart)
                    test = True
                End If
            End If
        Else
            posx = InStr(bodycode, LCase("id=" & Id))
            If posx > 0 Then
                'find start of tag
                Do
                    posx = posx - 1
                    letter = Mid(bodycode, posx, 1)
                    If letter = "<" Then
                        'found tag start
                        tagstart = posx
                        Exit Do
                    End If
                Loop Until posx < 1
                If tagstart > 0 Then
                    posx = InStr(bodycode, LCase("id=" & Id))
                    Do
                        posx = posx + 1
                        letter = Mid(bodycode, posx, 1)
                        If letter = ">" Then
                            tagend = posx + 1
                        End If
                    Loop Until posx >= Len(bodycode)
                    If tagend > 0 Then
                        text = Mid(bodycode, tagstart, tagend - tagstart)
                        test = True
                    End If
                End If
            End If
        End If
    End If
    Return Text
End Function
</script>

<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
  <title></title>
</head>
<body>
  <form id="form1" runat="server">
    <table style="width: 100%;">
        <tr>
            <td style="text-align:left; vertical-align: top; width: 75%;"><textarea rows="20" cols="80" style="width: 90%;" disabled="disabled"><%=sourcecode%></textarea></td>
            <td style="width: 25%; text-align: left; vertical-align: top;">
                <table style="width:100%;">
                    <tr>
                        <td>Element Id&nbsp;<input id="Text1" name="Text1" type="text" /></td>
                    </tr><tr>
                        <td>&nbsp;</td>
                    </tr><tr>
                        <td>&nbsp;</td>
                    </tr><tr>
                        <td><input id="Button1" type="Submit" value="Submit" name="Button1" /></td>
                    </tr><tr>
                        <td>&nbsp;</td>
                    </tr><tr>
                        <td>&nbsp;</td>
                    </tr>
                </table>
            </td>
        </tr><tr>
            <td style="width: 75%;">&nbsp;</td>
            <td style="width: 25%;">&nbsp;</td>
        </tr><tr>
            <td style="width: 100%;" colspan="2"><textarea rows="20" cols="80" style="width: 75%;" disabled="disabled"><%=RetVal%></textarea></td>
            <td style="width: 25%;">&nbsp;</td>
        </tr>
    </table>
</form>
</body>
</html>

希望它有一点帮助

于 2013-01-15T06:16:32.813 回答