0

我正在使用以下代码登录到asp网站,但它不起作用。

<?php
/************************************************
* ASP.NET web site scraping script;
* Developed by MishaInTheCloud.com
* Copyright 2009 MishaInTheCloud.com. All rights reserved.
* The use of this script is governed by the CodeProject Open License
* See the following link for full details on use and restrictions.
*   http://www.codeproject.com/info/cpol10.aspx
*
* The above copyright notice must be included in any reproductions of
this script.
************************************************/
error_reporting(E_ALL);
/************************************************
* values used throughout the script
************************************************/
// urls to call - the login page and the secured page
$urlLogin = "http://www.website.com/Default.aspx";
$urlSecuredPage = "http://www.website.com/Default.aspx";

// POST names and values to support login
$nameUsername=rawurlencode('ctl00$txtLoginName');       // the name of the username textbox on the login form
$namePassword=rawurlencode('ctl00$txtPassword');       // the name of the password textbox on the login form
$nameLoginBtn=rawurlencode('ctl00$btnLogin');          // the name of the login button (submit) on the login form
$valUsername ='myusername';        // the value to submit for the username
$valPassword ='mypass';        // the value to submit for the password
$valLoginBtn ='Login';             // the text value of the login button itself

// the path to a file we can read/write; this will
// store cookies we need for accessing secured pages
$cookieFile = 'cookie.txt';

// regular expressions to parse out the special ASP.NET
// values for __VIEWSTATE and __EVENTVALIDATION
$regexViewstate = '/__VIEWSTATE\" value=\"(.*)\"/i';
$regexEventVal  = '/__EVENTVALIDATION\" value=\"(.*)\"/i';


/************************************************
* utility function: regexExtract
*    use the given regular expression to extract
*    a value from the given text;  $regs will
*    be set to an array of all group values
*    (assuming a match) and the nthValue item
*    from the array is returned as a string
************************************************/
function regexExtract($text, $regex, $regs, $nthValue)
{

if (preg_match($regex, $text, $regs)) {
 $result = $regs[$nthValue];
}
else {
 $result = "";
}
return $result;
}



/************************************************
* initialize a curl handle; we'll use this
*   handle throughout the script
************************************************/
$ch = curl_init();


/************************************************
* first, issue a GET call to the ASP.NET login
*   page.  This is necessary to retrieve the
*   __VIEWSTATE and __EVENTVALIDATION values
*   that the server issues
************************************************/
curl_setopt($ch, CURLOPT_URL, $urlLogin);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:11.0) Gecko/20100101 Firefox/11.0' );
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
$data=curl_exec($ch);
echo curl_error($ch);
// from the returned html, parse out the __VIEWSTATE and
// __EVENTVALIDATION values
$viewstate = regexExtract($data,$regexViewstate,$regs,1);
$eventval = regexExtract($data,$regexEventVal,$regs,1);
/************************************************
* now issue a second call to the Login page;
*   this time, it will be a POST; we'll send back
*   as post data the __VIEWSTATE and __EVENTVALIDATION
*   values the server previously sent us, as well as the
*   username/password.  We'll also set up a cookie
*   jar to retrieve the authentication cookie that
*   the server will generate and send us upon login.
************************************************/
$postData = '__VIEWSTATE='.rawurlencode($viewstate)
          .'&__EVENTVALIDATION='.rawurlencode($eventval)
          .'&'.$nameUsername.'='.$valUsername
          .'&'.$namePassword.'='.$valPassword
          .'&'.$nameLoginBtn.'='.$valLoginBtn
          ;

curl_setOpt($ch, CURLOPT_POST, TRUE);
curl_setopt($ch, CURLOPT_POSTFIELDS, $postData);
curl_setopt($ch, CURLOPT_URL, $urlLogin);  
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieFile);    

$data = curl_exec($ch);


/************************************************
* with the authentication cookie in the jar,
* we'll now issue a GET to the secured page;
* we set curl's COOKIEFILE option to the same
* file we used for the jar before to ensure the
* authentication cookie is sent back to the
* server
************************************************/
curl_setOpt($ch, CURLOPT_POST, FALSE);
curl_setopt($ch, CURLOPT_URL, $urlSecuredPage);  
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookieFile);    

$data = curl_exec($ch);

// at this point the secured page may be parsed for
// values, or additional POSTS made to submit parameters
// and retrieve data.  For this sample, we'll just
// echo the results.
echo $data;


/************************************************
* that's it! Close the curl handle
************************************************/
curl_close($ch);
?>

网站的 HTML 代码:

<div>
<input type="hidden" name="__EVENTTARGET" id="__EVENTTARGET" value="" />
<input type="hidden" name="__EVENTARGUMENT" id="__EVENTARGUMENT" value="" />
<input type="hidden" name="__LASTFOCUS" id="__LASTFOCUS" value="" />
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUKMTIblablabla" />
</div>
<div>
<input type="hidden" name="__PREVIOUSPAGE" id="__PREVIOUSPAGE" value="W3sZnblblabla" />
<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="/wEWjwEC7blblabla" />
</div>
<div id="ctl00_pnlLoginBox">
<span id="ctl00_Label1">Username:</span><br />
<input name="ctl00$txtLoginName" type="text" id="ctl00_txtLoginName" style="width:100px;" /><br />
<span id="ctl00_Label2">Password:</span><br />
<input name="ctl00$txtPassword" type="password" id="ctl00_txtPassword" style="width:100px;" /><br />
<input type="submit" name="ctl00$btnLogin" value="Login" id="ctl00_btnLogin" style="width:104px;" />
</div>
<div align="center" style="padding-top:2px">
<a href="/getpassword.aspx" id="ctl00_aForgotPassword">
<span>Forgot your Password</span>
</a>
</div>

我收到以下错误:

Notice: Undefined variable: viewstate in /home/me/test3.php on line 101
Notice: Undefined variable: eventval in /home/me/test3.php on line 102

我尝试手动设置 viewstate 和 eventval 但它没有登录。我尝试了很多在这个网站上找到的不同脚本,但都没有奏效。有人可以帮我一点。

4

1 回答 1

1

可能有很多原因,我会介绍几个(如果不对实际网址进行测试就很难说)

想法 1:不正确的数组指针: 看起来您的regexExtract()函数没有为 var 赋值$viewstate- 这可能是因为您regexExtract()使用$nthValueas调用1- 这意味着当您$regs填充数组时,preg_match()您正在提取数组中的第二项(不要忘记 php 数组指针是基于 0 的。)

想法 2:正则表达式中的逻辑问题 由于逻辑错误,您的正则表达式与任何内容都不匹配 - 再说一次,除非我们能看到返回的内容,否则我无能为力curl_exec()- 这些正则表达式是为使用 asp.net 的网站设计的本机身份验证系统 - 您是否确定登录到 asp.net 站点?

另外,请从您的问题中删除 asp-classic 标签,我认为它不适用吗?

编辑:好的,我已经做了一些测试并得到了$viewstate$eventval变量来填充并证明正则表达式工作正常,(并且检查“1”是正确的指针)。我的测试证明 curl 正确地提取了视图状态并将其发布到登录页面。

我通过创建以下 php 文件 (echopost.php) 对此进行了测试:

<?
foreach($_GET as $name => $value) {
    print "QueryString: $name : $value<br>";
}
foreach($_POST as $name => $value) {
    print "POST: $name : $value<br>";
}
$cookie=$_COOKIE;
foreach ($cookie as $name=>$value) {
    print "Cookie: $name : $value<br>";
}
?>

然后我更改了这行 99~ 以将登录信息提交到 echopost 并输出结果(以证明正则表达式已正确填充$viewstate$eventval

第99行~

curl_setOpt($ch, CURLOPT_POST, TRUE); 
curl_setopt($ch, CURLOPT_POSTFIELDS, $postData); 
curl_setopt($ch, CURLOPT_URL, "http://www.heavencore.co.uk/so/echopost.php"); //#### Note this line that changes 
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieFile);     

$data = curl_exec($ch); 
echo "Response from echopost.php (login): <hr />" . $data; 

如您所见,echopage 正在返回以 curl 正则表达式中的正确值,这意味着 $viewstate 和 $eventval 现在已填充并正常工作。

演示:http ://www.heavencore.co.uk/so/login.php 基于虚拟登录页面http://www.heavencore.co.uk/so/login.htm

我改变了这一行:

function regexExtract($text, $regex, $regs, $nthValue) 

对此:

function regexExtract($text, $regex, $nthValue)

& 改变了这些行:

$viewstate = regexExtract($data,$regexViewstate,$regs,1); 
$eventval = regexExtract($data,$regexEventVal,$regs,1); 

对此:

$viewstate = regexExtract($data,$regexViewstate,1); 
$eventval = regexExtract($data,$regexEventVal,1); 

编辑2:

好的,这工作正常:

http://www.heavencore.co.uk/so/login.php

请注意,login.php 输出“欢迎来到 /../,HeavenCore!” page - 表示 curl 登录成功。

我的来源:

<?php 
/************************************************ 
* ASP.NET web site scraping script; 
* Developed by MishaInTheCloud.com 
* Copyright 2009 MishaInTheCloud.com. All rights reserved. 
* The use of this script is governed by the CodeProject Open License 
* See the following link for full details on use and restrictions. 
*   http://www.codeproject.com/info/cpol10.aspx 
* 
* The above copyright notice must be included in any reproductions of 
this script. 
************************************************/ 
error_reporting(E_ALL); 
/************************************************ 
* values used throughout the script 
************************************************/ 
// urls to call - the login page and the secured page 
$urlLogin = "http://www.website.com/Default.aspx"; 
$urlSecuredPage = "http://www.website.com/Default.aspx"; 

// POST names and values to support login 
$nameUsername=rawurlencode('ctl00$txtLoginName');       // the name of the username textbox on the login form 
$namePassword=rawurlencode('ctl00$txtPassword');       // the name of the password textbox on the login form 
$nameLoginBtn=rawurlencode('ctl00$btnLogin');          // the name of the login button (submit) on the login form 
$valUsername ='HeavenCore';        // the value to submit for the username 
$valPassword ='password';        // the value to submit for the password 
$valLoginBtn ='Login';             // the text value of the login button itself 

// the path to a file we can read/write; this will 
// store cookies we need for accessing secured pages 
$cookieFile = 'cookie.txt'; 

// regular expressions to parse out the special ASP.NET 
// values for __VIEWSTATE and __EVENTVALIDATION 
$regexViewstate = "/__VIEWSTATE\" value=\"(.*)\"/i"; 
$regexEventVal  = "/__EVENTVALIDATION\" value=\"(.*)\"/i"; 

/************************************************ 
* utility function: regexExtract 
*    use the given regular expression to extract 
*    a value from the given text;  $regs will 
*    be set to an array of all group values 
*    (assuming a match) and the nthValue item 
*    from the array is returned as a string 
************************************************/ 
function regexExtract($text, $regex, $nthValue) 
{ 

if (preg_match($regex, $text, $regs)) { 
 $result = $regs[$nthValue]; 
} 
else { 
 $result = ""; 
} 
return $result; 
} 


/************************************************ 
* initialize a curl handle; we'll use this 
*   handle throughout the script 
************************************************/ 
$ch = curl_init(); 


/************************************************ 
* first, issue a GET call to the ASP.NET login 
*   page.  This is necessary to retrieve the 
*   __VIEWSTATE and __EVENTVALIDATION values 
*   that the server issues 
************************************************/ 
curl_setopt($ch, CURLOPT_URL, $urlLogin); 
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:11.0) Gecko/20100101 Firefox/11.0' ); 
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); 
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE); 
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); 
$data=curl_exec($ch); 
echo curl_error($ch); 
// from the returned html, parse out the __VIEWSTATE and 
// __EVENTVALIDATION values 
$viewstate = regexExtract($data,$regexViewstate,1); 
$eventval = regexExtract($data,$regexEventVal,1); 
/************************************************ 
* now issue a second call to the Login page; 
*   this time, it will be a POST; we'll send back 
*   as post data the __VIEWSTATE and __EVENTVALIDATION 
*   values the server previously sent us, as well as the 
*   username/password.  We'll also set up a cookie 
*   jar to retrieve the authentication cookie that 
*   the server will generate and send us upon login. 
************************************************/ 
$postData = '__VIEWSTATE='.rawurlencode($viewstate) 
          .'&__EVENTVALIDATION='.rawurlencode($eventval) 
          .'&'.$nameUsername.'='.$valUsername 
          .'&'.$namePassword.'='.$valPassword 
          .'&'.$nameLoginBtn.'='.$valLoginBtn 
          ; 

curl_setOpt($ch, CURLOPT_POST, TRUE); 
curl_setopt($ch, CURLOPT_POSTFIELDS, $postData); 
curl_setopt($ch, CURLOPT_URL, $urlLogin);   
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieFile);     

$data = curl_exec($ch); 
 echo "Response from echopost.php (login): <hr />" . $data; 

/************************************************ 
* with the authentication cookie in the jar, 
* we'll now issue a GET to the secured page; 
* we set curl's COOKIEFILE option to the same 
* file we used for the jar before to ensure the 
* authentication cookie is sent back to the 
* server 
************************************************/ 
curl_setOpt($ch, CURLOPT_POST, FALSE); 
curl_setopt($ch, CURLOPT_URL, $urlSecuredPage);   
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookieFile);     

$data = curl_exec($ch); 

// at this point the secured page may be parsed for 
// values, or additional POSTS made to submit parameters 
// and retrieve data.  For this sample, we'll just 
// echo the results. 



/************************************************ 
* that's it! Close the curl handle 
************************************************/ 
curl_close($ch); 
?>
于 2012-05-10T13:14:20.487 回答