你能看看这个页面:http ://apps.humana.com/tad/tad_new/Search.aspx?sortfield=name&policyType=medical
我试图弄清楚我可以使用 cURL 来下载该页面上每个链接后面的 PDF 的方法。
我尝试使用以下 cURL 代码提交页面的主表单:
我假设 90000 个字符长的 'VIEWSTATE' 变量将是一个问题,尽管我希望不会。**
$post_vars = array(
'__EVENTTARGET' => 'ctl00$ContentPlaceHolder1$MentorResultsGridView$ctl02$HrefLink',
'EVENTARGUMENT' => '',
'VIEWSTATE' => '**A STRING THAT IS 90000 characters long!!!**',
'submitted' => 'submitted',
'__SCROLLPOSITIONX', '0',
'__SCROLLPOSITIONY', '0',
'ctl00$ContentPlaceHolder1$MentorResultsGridView$ctl02$NewHiddenField', 'Revised'
);
$form_url = 'http://apps.humana.com/tad/tad_new/Search.aspx?sortfield=name&policyType=medical';
$cookie = tempnam ("/tmp", "CURLCOOKIE");
$ch = curl_init();
curl_setopt( $ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1" );
curl_setopt($ch, CURLOPT_URL, $form_url);
curl_setopt( $ch, CURLOPT_COOKIEJAR, $cookie );
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_vars);
$output = curl_exec($ch);
$info = curl_getinfo($ch);
curl_close($ch);
return $output;