
HttpWebRequest req = (HttpWebRequest)WebRequest.Create("https://abcd.com.au/categories/A_dfn/sdf");
    HttpWebResponse res = req.GetResponse() as HttpWebResponse;

    StringBuilder sb = new StringBuilder();
    byte[] buf = new byte[10000];
    Stream resStream = res.GetResponseStream();
    string s = null;
    int c = 0;
        c = resStream.Read(buf, 0, buf.Length);
        if (c != 0) {
            s = ASCIIEncoding.ASCII.GetString(buf, 0, c);
    } while (c > 0);
    string oldhead = "class=\"login_button\">";
    string newhead = "class=\"login_button\">   <script type=\"text/javascript\">document.getElementById('btn').click()</script>";
    sb.Replace(oldhead, newhead);

    string oldbtn = "value=\"Submit\"";
    string newbtn = "value=\"Submit\" id=\"btn\" ";
    sb.Replace(oldbtn, newbtn);

    string oldAction = "<form action=\"/login\" method=\"post\">";
    string newAction = "<form action=\"https://abcd.com.au/login?orig_req_url=%2Fcategories/A_dfn/sdf\" method=\"post\">";
    sb.Replace(oldAction, newAction);

    string oldUsername = "<input id=\"login_email\" type=\"text\" name=\"user[email_address]\" class=\"textBox\" value=\"\">";
    string newUserName = "<input id=\"login_email\" type=\"text\" name=\"user[email_address]\" class=\"textBox\" value=\"abc@xyz.com.au\">";
    sb.Replace(oldUsername, newUserName);

    string oldPass = "<input id=\"login_password\" type=\"password\" name=\"user[password]\" class=\"textBox\" value=\"\">";
    string newPass = "<input id=\"login_password\" type=\"password\" name=\"user[password]\" class=\"textBox\" value=\"abc\">";

这是通过渲染页面(Response.write(sb))向我展示我想要的预期输出。但是,现在我想做同样的事情而不重定向到“ https://abcd.com.au/login?orig_req_url=%2Fcategories/A_dfn/sdf ”,并且想做更多的事情。我希望在某个缓冲区中获得 Response.Write(sb) 的输出。有可能吗?

这是示例,它准确地解释了我想要做什么。我正在寻找产品的数量说名称:螺丝 15mm,这位于页面https://abcd.com.au/%2Fcategories/A_dfn/sdf中。所以,我首先请求这个 url,但是由于需要登录才能访问该页面,它会将我重定向到登录页面,填写用户名和密码,使用 javascript 按下登录按钮,然后重定向到最初请求的页面。在这个页面上,我想找到该产品,并将信息返回到我的网络应用程序。




您的方案的关键是持久(存储)登录页面设置的会话和cookie;然后在您下一次请求产品信息之前,将凭证注入请求的 webRequest。

  1. 使用 WebRequest 对象加载登录页面。
  2. 存储登录页面响应标头发送的任何信息(cookie)。
  3. 使用提供的响应标头创建一个新的 WebRequest 对象,注入用户 ID/密码。
  4. 存储响应返回的任何凭据。
  5. 继续请求报价信息。


此外,您需要HTMLAgilityPack来解析 HTML 节点。这是正确的方法。

编辑:添加了我的代码。碰巧我之前创建了这个类。所以,你很幸运。但是,您需要安装并引用 HTMLAgilityPack 才能使用它。您可以在以下网址下载 HAP:http ://htmlagilitypack.codeplex.com/ 如果您想做任何严重的屏幕抓取,HAP 是事实上的标准。

Public Class clsBrowserSession
    'This is a special Browser Post class
    ' Instead of just POST to a URL as per the clsWeb.fnsPostResponse()
    ' clsBrowserSession allows us to LOAD a page first, persist all the cookies and variables, and then only POST to the target URL.
    ' The reason is that some program will drop (lets say) a SessionID as an input when you first load the page.
    ' and when you post, without the SessionID (variable), it will reject the POST. Thus clsBrowserSession can solve this problem.
    ' USAGE:
    '   Dim voBrowserSession As New clsBrowserSession
    '   voBrowserSession.sbLoadPage("https://xxx.yyy.net.my/publicncdenq/index.htm")
    '   voBrowserSession.proFormElements("UserID") = "myID"
    '   voBrowserSession.proFormElements("Password") = "myPassword"
    '   Dim vsResponseHTML As String = voBrowserSession.Post("https://xxx.yyy.net.my/publicncdenq/index.htm")
    Private vbIsPostingInProgress As Boolean
    Public voCookies As System.Net.CookieCollection
    Public proHTMLDoc As HtmlAgilityPack.HtmlDocument
    Public proFormElements As clsFormElementCollection

    Public Sub sbLoadPage(pvsURL As String)
        vbIsPostingInProgress = False
    End Sub
    Public Function Post(pvsURL As String) As String
        vbIsPostingInProgress = True
        fnoCreateWebRequestObject().Load(pvsURL, "POST")

        Return proHTMLDoc.DocumentNode.InnerHtml
    End Function

    Private Function fnoCreateWebRequestObject() As HtmlAgilityPack.HtmlWeb
        Dim voWeb As New HtmlAgilityPack.HtmlWeb
        voWeb.UseCookies = True
        voWeb.PreRequest = New HtmlAgilityPack.HtmlWeb.PreRequestHandler(AddressOf event_OnPreRequest)
        voWeb.PostResponse = New HtmlAgilityPack.HtmlWeb.PostResponseHandler(AddressOf event_OnAfterResponse)
        voWeb.PreHandleDocument = New HtmlAgilityPack.HtmlWeb.PreHandleDocumentHandler(AddressOf event_OnPreHandleDocument)
        Return voWeb
    End Function
    Private Sub sbAddPostDataTo(pvoRequest As Net.HttpWebRequest)
        Dim vsPayload As String = proFormElements.fnsAssemblePostPayload()
        Dim vabyteBuffer As Byte() = Text.Encoding.UTF8.GetBytes(vsPayload.ToCharArray())
        pvoRequest.ContentLength = vabyteBuffer.Length
        pvoRequest.ContentType = "application/x-www-form-urlencoded"
        pvoRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.97 Safari/537.11"
        pvoRequest.GetRequestStream().Write(vabyteBuffer, 0, vabyteBuffer.Length)
    End Sub
    Private Sub sbAddvoCookiesTo(pvoRequest As Net.HttpWebRequest)
        If (Not IsNothing(voCookies)) Then
            If voCookies.Count > 0 Then pvoRequest.CookieContainer.Add(voCookies)
        End If
    End Sub
    Private Sub sbSaveCookiesFrom(pvoResponse As Net.HttpWebResponse)
        If pvoResponse.Cookies.Count > 0 Then
            If IsNothing(voCookies) Then voCookies = New Net.CookieCollection
        End If
    End Sub
    Private Sub sbSaveHtmlDocument(pvoHTMLDocument As HtmlAgilityPack.HtmlDocument)
        proHTMLDoc = pvoHTMLDocument
        proFormElements = New clsFormElementCollection(proHTMLDoc)
    End Sub

    Protected Function event_OnPreRequest(pvoRequest As Net.HttpWebRequest) As Boolean
        If vbIsPostingInProgress Then sbAddPostDataTo(pvoRequest)
        Return True
    End Function
    Protected Sub event_OnAfterResponse(pvoRequest As System.Net.HttpWebRequest, pvoResponse As Net.HttpWebResponse)
    End Sub
    Protected Sub event_OnPreHandleDocument(pvoHTMLDocument As HtmlAgilityPack.HtmlDocument)
    End Sub

    'Form Elements class
    '  Note: This element class will only capture (any) INPUT elements only, which should be enough
    '  for most cases. It can be easily modified to add other SELECT, TEXTAREA, etc voInputs
    Public Class clsFormElementCollection
        Inherits Dictionary(Of String, String)
        Public Sub New(htmlDoc As HtmlAgilityPack.HtmlDocument)
            Dim voInputs As Collections.Generic.IEnumerable(Of HtmlAgilityPack.HtmlNode) = htmlDoc.DocumentNode.Descendants("input")
            For Each voInput As HtmlAgilityPack.HtmlNode In voInputs
                Dim vsName = voInput.GetAttributeValue("name", "undefined")
                Dim vsValue = voInput.GetAttributeValue("value", "")
                If vsName <> "undefined" Then Add(vsName, vsValue)
        End Sub
        Public Function fnsAssemblePostPayload() As String
            Dim sb As New Text.StringBuilder
            For Each voKeyValuePair In Me
                Dim vsValue = System.Web.HttpUtility.UrlEncode(voKeyValuePair.Value)
                sb.Append("&" & voKeyValuePair.Key & "=" & vsValue)
            Return sb.ToString.Substring(1)
        End Function
    End Class
End Class


