在页面上的 JavaScript 修改页面后,我试图访问页面的 HTML。根据我在网上找到的内容,这是我目前一直在尝试的。
using System;
using System.Windows.Forms;
using System.IO;
namespace WebBrowserDemo
{
class Program
{
public const string TestUrl = @"http://www.theverge.com/2012/7/2/3126604/android-jelly-bean-updates-htc-samsung-google-pdk";
[STAThread]
static void Main(string[] args)
{
WebBrowser wb = new WebBrowser();
wb.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(wb_DocumentCompleted);
wb.Navigate(TestUrl);
while (wb.ReadyState != WebBrowserReadyState.Complete)
{
Application.DoEvents();
}
Console.WriteLine("\nPress any key to continue...");
Console.ReadKey(true);
}
static void wb_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
WebBrowser wb = (WebBrowser)sender;
HtmlElement document = wb.Document.GetElementsByTagName("html")[0];
using (StreamWriter sw = new StreamWriter("OuterHTML.txt"))
{
sw.WriteLine(document.OuterHtml);
}
var abc = wb.Document.InvokeScript("eval", new object[] { "window.scrollTo(0, document.body.scrollHeight);" });
Console.WriteLine();
document = wb.Document.GetElementsByTagName("html")[0];
using (StreamWriter sw = new StreamWriter("OuterHTML2.txt"))
{
sw.WriteLine(document.OuterHtml);
}
}
}
}
最终目标是滚动到页面底部激活任何 JS 以加载文章的评论。虽然目前我从脚本运行前后返回的 html 是相同的。
有什么建议么?
谢谢