2

在以下应用程序中,我使用 HTML 敏捷包从给定的 url 中提取 HTML 文档。现在我需要知道如何使用 HTML 文档中的元素 ID 并在字段中输入文本,最后点击 SUBMIT 按钮提交表单。

protected void Button1_Click(object sender, EventArgs e)
{
    string Url = "https://something.com/login.asp"; 
    HtmlWeb web = new HtmlWeb(); 
    HtmlAgilityPack.HtmlDocument doc = web.Load(Url);
}

当前的应用程序使 Web 数据输入自动化。

4

3 回答 3

0
protected void Button1_Click(object sender, EventArgs e)         
{
    IWebDriver driver = 
        new InternetExplorerDriver(@"C:\.....\IEDriverServer_Win32_2.25.2"); 

    driver.Navigate().GoToUrl("https://website.com/login.asp");

    // Find the text input element by its name
    // username

    IWebElement name_ID = driver.FindElement(By.Name("name_ID"));
    name_ID.SendKeys("xyzw");

    // password
    IWebElement pwd_PW = driver.FindElement(By.Name("pwd_PW"));
    pwd_PW.SendKeys("fasdfasfdasdf");

    // submit login form
    IWebElement sSubmit = driver.FindElement(By.Name("submit"));
    submit.Submit();
    System.Threading.Thread.Sleep(5000);
    driver.Quit();
}
于 2012-10-12T18:01:42.570 回答
0

HtmlAgilityPack 不能那样工作。您只是持有一个表示 HTML 文档中节点的数据结构,而不是托管 Web 浏览器或客户端的实例。

解决方法是遍历 HtmlDocument 实例并找到正确的<form>元素,然后提取适当的子<input />元素。您需要创建自己的 HttpWebRequest 对象并RequestStream使用适当编码的键/值对手动填充它(假设它是 POST 表单而不是 GET 表单)。

如果您要提交的表单是静态的并且不会更改,那么您根本不需要使用 HtmlAgilityPack,只需将表单名称和值硬编码到您的 HttpWebRequest 中。

于 2012-10-08T17:29:02.827 回答
0

我正在做类似的事情,但我正在手动获取页面 html。

// do webrequest stuff and return raw html
string html = DemoDoHttpGet(url, cookieContainer);

// I'm hitting an asp.net page so I have to repeat a bunch of values back to the server
// key is the "name" attribute of an element i want to find in the html
// i gathered these manually by watching a normal exchange with fiddler
var fields = new Dictionary<string, string>();
fields.Add(System.Web.HttpUtility.UrlDecode("__LASTFOCUS"), string.Empty);
fields.Add(System.Web.HttpUtility.UrlDecode("__EVENTTARGET"), string.Empty);
fields.Add(System.Web.HttpUtility.UrlDecode("__EVENTARGUMENT"), string.Empty);
fields.Add(System.Web.HttpUtility.UrlDecode("__VIEWSTATE"), string.Empty);
fields.Add(System.Web.HttpUtility.UrlDecode("__EVENTVALIDATION"), string.Empty);
fields.Add(System.Web.HttpUtility.UrlDecode("ctl00%24ContentPlaceHolder1%24Login1%24LoginButton"), string.Empty);

// this method searches the html for elements with the given names and updates
// the value for each item in the field collection with the value sent from the server
Scraper.GetFieldValues(fields, html);

/* looks kind of like this

        var doc = new HtmlAgilityPack.HtmlDocument();
        doc.LoadHtml(html);

        var names = new List<string>();
        foreach (var ditem in fields)
        {
            names.Add(ditem.Key);
        }

        foreach (var nitem in names)
        {
            // find items, read value

            string xpath = string.Format("//*[@name=\"{0}\"]", (nitem));
            var nodes = doc.DocumentNode.SelectNodes(xpath);

            // if node found read whatever attribute is appropriate,
            // write value back to fields collection

*/

// here i'm manually providing values for login username/password
fields.Add(System.Web.HttpUtility.UrlDecode(
    "ctl00%24ContentPlaceHolder1%24Login1%24UserName"), "my@email.aaa");
fields.Add(System.Web.HttpUtility.UrlDecode(
    "ctl00%24ContentPlaceHolder1%24Login1%24Password"), "mypassword");

// another webrequest to post back to the server

var request2 = (HttpWebRequest)WebRequest.Create(url);
request2.CookieContainer = cookieContainer;
request2.Method = "POST";
request2.ContentType = "application/x-www-form-urlencoded";

var args = new StringBuilder();
foreach (var item in fields)
{
    args.Append(System.Web.HttpUtility.UrlEncode(item.Key));
    args.Append("=");
    args.Append(System.Web.HttpUtility.UrlEncode(item.Value));
    args.Append("&");
}

using (System.IO.StreamWriter writer = 
    new System.IO.StreamWriter(request2.GetRequestStream()))
{
    writer.Write(args.ToString().TrimEnd('&'));
}

string html;
using (var response2 = (System.Net.HttpWebResponse)request2.GetResponse())
using (var rdr2 = new System.IO.StreamReader(response2.GetResponseStream()))
{
    html = rdr2.ReadToEnd();
}
于 2012-10-08T17:45:27.707 回答