我尝试使用 pycurl 从网站获取数据。我早些时候在 php 中制作了一个类似的脚本(也使用 curl)并且它工作但在 python 中我得到空响应。类似的问题是 php - 该站点获取发布数据,初始化会话并重定向到结果页面,但如果不允许使用 cookie,它会返回空响应而不是重定向。我无法访问此站点上的代码。使用 COOKIEFILE 和 COOKIEJAR 选项解决了这个问题。这是php代码:
<?php
$anul = 2009;
$idnp = "2000000000000";
$seria_diploma = "AB000000000";
$url = "http://acte.edu.md/handler.php";
$cookie_file = tempnam("tmp", "cookie");
$curl_session = curl_init($url);
curl_setopt($curl_session, CURLOPT_POST, 1);
$post_fields = "an=$anul&idnp=$idnp&=$seria_diploma&Submit=OK";
curl_setopt($curl_session, CURLOPT_POSTFIELDS,$post_fields);
curl_setopt($curl_session, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($curl_session, CURLOPT_COOKIEFILE, $cookie_file);
curl_setopt($curl_session, CURLOPT_COOKIEJAR, $cookie_file);
curl_setopt($curl_session, CURLOPT_RETURNTRANSFER, true);
$result = curl_exec($curl_session);
curl_close($curl_session);
echo $result;
在 python 中使用相同的选项,我得到 HTTP 代码 200 而不是 302 的空响应。这是我使用的代码:
url = "http://acte.edu.md/handler.php"
post_data = dict({
"an":2009,
"idnp":"2000000000000",
"a":"AB000000000",
"Submit":"OK"
})
buf = StringIO.StringIO()
c = pycurl.Curl()
c.setopt(pycurl.URL,url)
c.setopt(pycurl.WRITEFUNCTION, buf.write)
#c.setopt(pycurl.CONNECTTIMEOUT,10000)
cookiefile = os.tempnam(_APP_ROOT_PATH+"temp_files","cookie")
c.setopt(pycurl.COOKIEFILE, cookiefile)
c.setopt(pycurl.COOKIEJAR, cookiefile)
c.setopt(pycurl.FOLLOWLOCATION, 1)
#c.setopt(pycurl.MAXREDIRS, 5)
c.setopt(pycurl.POST, 1)
c.setopt(pycurl.POSTFIELDS, urllib.urlencode(post_data))
c.setopt(pycurl.VERBOSE, True)
c.perform()
response += "ERROR:" + c.errstr()
response += c.getinfo(pycurl.HTTP_CODE).__str__()+ c.getinfo(pycurl.EFFECTIVE_URL)
c.close()
如果您有任何建议,请告诉我...