我是 C# 新手,但碰巧我需要以编程方式登录到特定网站以在 C# 中进行屏幕抓取。我已经进行了在线研究(这个站点特别有用)并且我了解到我需要使用以下对象/类之一才能登录:WebRequest/WebResponse
、、、HttpWebRequest/HttpWebResponse
WebClient
,而且我需要将从网站收到的 cookie 传递给后续(屏幕抓取)请求。但是,我一直无法成功登录,此时我已经没有想法了。我想登录主页------然后屏幕抓取一些这样的页面:-----。该网站的工作原理是这样的:它允许访问与我引用的页面类似的页面,但除非用户登录,否则它会在某些字段中返回星号。我认为这意味着内容是动态生成的,我怀疑这可能是我登录问题的根本原因。我包括我用来登录网站的代码:
class Program
{
private static string link_main_page = "-----------";
private static string link_target_page = "------------";
private static string authorization_param = "----------";
private static void LoginUsingTheHttpWebRequestClass()
{
HttpWebRequest MyLoginRequest = (HttpWebRequest)WebRequest.Create(link_main_page);
MyLoginRequest.Method = "POST";
MyLoginRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; InfoPath.1; .NET4.0C; .NET CLR 2.0.50727; .NET4.0E; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)";
MyLoginRequest.ContentType = "application/x-www-form-urlencoded";
MyLoginRequest.CookieContainer = new CookieContainer();
byte[] sentData = Encoding.UTF8.GetBytes(authorization_param);
MyLoginRequest.ContentLength = sentData.Length;
Stream sendStream = MyLoginRequest.GetRequestStream();
sendStream.Write(sentData, 0, sentData.Length);
HttpWebResponse MyLoginResponse = (HttpWebResponse)MyLoginRequest.GetResponse();
CookieCollection MyCookieCollection = new CookieCollection();
MyCookieCollection.Add(MyLoginResponse.Cookies);
foreach (Cookie MyCookie in MyCookieCollection)
{
Console.WriteLine("Cookie:");
Console.WriteLine("{0} = {1}", MyCookie.Name, MyCookie.Value);
}
HttpWebRequest MyGetRequest = (HttpWebRequest)WebRequest.Create(link_target_page);
MyGetRequest.ContentType = "application/x-www-form-urlencoded";
MyGetRequest.Method = "GET";
MyGetRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; InfoPath.1; .NET4.0C; .NET CLR 2.0.50727; .NET4.0E; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)";
MyGetRequest.CookieContainer = new CookieContainer();
MyGetRequest.CookieContainer.Add(MyCookieCollection);
HttpWebResponse MyGetResponse = (HttpWebResponse)MyGetRequest.GetResponse();
Stream stream;
stream = MyGetResponse.GetResponseStream();
string s;
using (StreamReader sr = new StreamReader(stream))
{
s = sr.ReadToEnd();
using (StreamWriter sw = File.CreateText("TheFile.htm"))
{
sw.Write(s);
sw.Close();
}
sr.Close();
}
}
private static void LoginUsingTheWebRequestClass()
{
WebRequest MyLoginRequest = WebRequest.Create(link_main_page);
MyLoginRequest.Method = "POST";
MyLoginRequest.ContentType = "application/x-www-form-urlencoded";
byte[] sentData = Encoding.UTF8.GetBytes(authorization_param);
MyLoginRequest.ContentLength = sentData.Length;
Stream sendStream = MyLoginRequest.GetRequestStream();
sendStream.Write(sentData, 0, sentData.Length);
WebResponse MyLoginResponse = MyLoginRequest.GetResponse();
string CookieHeader;
CookieHeader = MyLoginResponse.Headers["Set-cookie"];
Console.WriteLine("Cookie:");
Console.WriteLine(CookieHeader);
WebRequest MyGetRequest = WebRequest.Create(link_target_page);
MyGetRequest.ContentType = "application/x-www-form-urlencoded";
MyGetRequest.Method = "GET";
MyGetRequest.Headers.Add("Cookie", CookieHeader);
WebResponse MyGetResponse = MyGetRequest.GetResponse();
Stream stream;
stream = MyGetResponse.GetResponseStream();
string s;
using (StreamReader sr = new StreamReader(stream))
{
s = sr.ReadToEnd();
using (StreamWriter sw = File.CreateText("TheFile2.htm"))
{
sw.Write(s);
sw.Close();
}
sr.Close();
}
}
static void Main(string[] args)
{
Console.WriteLine("Login Using the HttpWebRequest Class");
LoginUsingTheHttpWebRequestClass();
Console.WriteLine("Login Using the WebRequest Class");
LoginUsingTheWebRequestClass();
Console.WriteLine("Done! Press any key to continue");
Console.ReadKey();
}
}
尝试使用 HttpWebRequest/HttpWebResponse 登录或尝试使用 WebRequest/WebResponse 登录均无效。第一个返回一个看起来像这样的cookie:PHPSESSID=hncrr0... 第二个返回一个看起来像这样的cookie:PHPSESSID=88dn1n9...; path=/ 这些 cookie 对我来说看起来很可疑。一方面,它们看起来与 IE 中的 cookie 不同。但我不知道我应该期待什么。(我还尝试将通过 (Http)WebRequest/(Http)WebResponse 收到的 cookie 传递给 WebClient,但同样无济于事 - 我没有将其包含在此处以节省空间)。
我将非常感谢任何意见。如果有人想运行代码,我实际上可以提供实际的登录名/密码信息(无论如何,在该网站上注册是免费的)。