在 .net 中有一些使用 IE 组件的 .net 类。通过它们我们可以读取 htmldom 并进行登录和数据抓取。任何机构都可以给我那些存在的类/程序集的名称。我自己用它来登录和数据报废很早以前
问问题
195 次
2 回答
1
如果您想从现有网页中抓取数据,请考虑使用HtmlAgilityPack
于 2012-11-07T11:20:31.950 回答
1
我能够做到这一点。关于 HTMLTableRowCollection 的小事是 HTMLTable 留下的。我的代码如下
object o = null;
InternetExplorer ie = new InternetExplorerClass();
IWebBrowserApp wb = ie;
wb.Visible = chkShowBrowser.Checked;
wb.Navigate("http://LoginPage.aspx", ref o, ref o, ref o, ref o);
do
{
Thread.Sleep(10000);
} while (wb.Busy);
if (ie.Document != null)
{
var myDoc = ie.Document as HTMLDocument;
if (myDoc != null)
{
var oUserName = (HTMLInputTextElement)myDoc.getElementById("ctl00_MainBodyPlaceholder_PublicPortalLogin_UserName");
oUserName.value = ConfigurationManager.AppSettings.Get("userName");
var oPassword =
(HTMLInputTextElement)
myDoc.getElementById("ctl00_MainBodyPlaceholder_PublicPortalLogin_Password");
oPassword.value = ConfigurationManager.AppSettings.Get("password");
var btnSubmitLogin =
(HTMLInputElement)myDoc.getElementById("ctl00_MainBodyPlaceholder_PublicPortalLogin_Login");
btnSubmitLogin.click();
do
{
Thread.Sleep(10000);
} while (wb.Busy);
if (ie.Document != null)
{
wb.Navigate("http://SearchPage.aspx", ref o, ref o, ref o, ref o);
do
{
Thread.Sleep(10000);
} while (wb.Busy);
if (ie.Document != null)
{
var oIncidentNumber =
(HTMLInputTextElement)
myDoc.getElementById("ctl00_MainBodyPlaceholder_txtIncidentNumber");
oIncidentNumber.value = ConfigurationManager.AppSettings.Get("incidentNumber");
var btnTicketNumberSearch =
(HTMLInputElement)myDoc.getElementById("ctl00_MainBodyPlaceholder_btnSearch");
btnTicketNumberSearch.click();
do
{
Thread.Sleep(10000);
} while (wb.Busy);
HTMLTable searchResultTable = myDoc.getElementById("ctl00_MainBodyPlaceholder_gdView_DXMainTable") as HTMLTable;
if (searchResultTable != null)
{
//foreach (var VARIABLE in searchResultTable.T)
//{
//}
}
if (chkRenderBody.Checked)
{
txtFinalTextBox.Text = myDoc.body.outerHTML;
}
}
}
}
}
于 2012-11-07T14:47:14.190 回答