我正在编写一个使用 QWebPage 抓取一些网页的应用程序。当响应是 Http 重定向(例如 302、303 等)时,我遇到了一些麻烦。QWebPage 根本不遵循重定向。
为了解决这个问题,我已经连接到页面的网络管理器的完成信号以捕获响应的状态并加载任何重定向,但是,当我在 QWebPage 上第二次调用load方法时,它只是将 url 设置为空白并且不发出任何请求。
以下是一些相关的代码:
connect(page->networkAccessManager(), SIGNAL(finished(QNetworkReply*)), SLOT(gotReply(QNetworkReply*)));
connect(page, SIGNAL(loadFinished(bool)), SLOT(doneLoading(bool)));
page->mainFrame()->load(url);
我的插槽:
void Snapshot::gotReply(QNetworkReply *reply)
{
if(reply->header(QNetworkRequest::ContentTypeHeader).toString().contains(QString("text/html")))
{
qDebug() << "Got reply " + reply->url().toString() + " - " + reply->attribute(QNetworkRequest::HttpStatusCodeAttribute).toString() + " - " + reply->header(QNetworkRequest::ContentTypeHeader).toString();
}
if(!statusCode && reply->header(QNetworkRequest::ContentTypeHeader).toString().contains(QString("text/html"))) {
statusCode = reply->attribute(QNetworkRequest::HttpStatusCodeAttribute).toInt();
redirectUrl = QUrl(reply->header(QNetworkRequest::LocationHeader).toUrl());
}
}
void Snapshot::doneLoading(bool)
{
// A reasonable waiting time for any script to execute
timer->start(3000);
}
void Snapshot::doneWaiting()
{
if( statusCode != 0 &&
statusCode != 301 &&
statusCode != 302 &&
statusCode != 303
) {
qDebug() << page->mainFrame()->url().toString();
qDebug() << page->mainFrame()->toHtml();
QImage image(page->viewportSize(), QImage::Format_ARGB32);
QPainter painter(&image);
page->mainFrame()->render(&painter);
painter.end();
image.save(*outputFilename);
delete outputFilename;
QApplication::quit();
}
else if(statusCode != 0) {
statusCode = 0;
qDebug() << "Redirecting to: " + redirectUrl.toString();
if(page->mainFrame()->url().toString().isEmpty()) {
qDebug() << "about:blank";
page->mainFrame()->load(this->redirectUrl); // No network activity after this
qDebug() << "Loading";
}
}
// This should ensure that the program never hangs
if(statusCode == 0) {
if(tries > 5) {
qDebug() << "Giving up.";
QApplication::quit();
}
tries++;
}
}