3

我想从 Commons HttpClient (3.x) 迁移到 HttpComponents Client (4.x) 但难以处理重定向。该代码在 Commons HttpClient 下可以正常工作,但在迁移到 HttpComponents Client 时会中断。一些链接得到了不受欢迎的重定向,但是当我将“http.protocol.handle-redirects”设置为“假”时,大量链接完全停止工作。

Commons HttpClient 3.x:

private static HttpClient httpClient = null;
private static MultiThreadedHttpConnectionManager connectionManager = null;
private static final long MAX_CONNECTION_IDLE_TIME = 60000; // milliseconds

static {
    //HttpURLConnection.setFollowRedirects(true);
    CookieManager manager = new CookieManager();
    manager.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
    CookieHandler.setDefault(manager);

connectionManager = new MultiThreadedHttpConnectionManager();
connectionManager.getParams().setDefaultMaxConnectionsPerHost(1000); // will need to set from properties file
connectionManager.getParams().setMaxTotalConnections(1000);
httpClient = new HttpClient(connectionManager);
}




/*
* Retrieve HTML
*/  
public String fetchURL(String url) throws IOException{

    if ( StringUtils.isEmpty(url) )
        return null;

    GetMethod getMethod = new GetMethod(url);
    HttpClient httpClient = new HttpClient();
    //configureMethod(getMethod);
    //ObjectInputStream oin = null;
    InputStream in = null;
    int code = -1;
    String html = "";
    String lastModified = null;
    try {
      code = httpClient.executeMethod(getMethod);

      in = getMethod.getResponseBodyAsStream();
        //oin = new ObjectInputStream(in);
        //html = getMethod.getResponseBodyAsString();
        html = CharStreams.toString(new InputStreamReader(in));

    }


    catch (Exception except) {
    }
    finally {

      try {
        //oin.close();
        in.close();
      }
      catch (Exception except) {}

      getMethod.releaseConnection();
      connectionManager.closeIdleConnections(MAX_CONNECTION_IDLE_TIME);
    }

    if (code <= 400){
        return html.replaceAll("\\s+", " ");
    } else {
        throw new Exception("URL: " + url + " returned response code " + code);
    }

}

HttpComponents 客户端 4.x:

private static HttpClient httpClient = null;
private static HttpParams params = null;
//private static MultiThreadedHttpConnectionManager connectionManager = null;
private static ThreadSafeClientConnManager connectionManager = null;
private static final int MAX_CONNECTION_IDLE_TIME = 60000; // milliseconds


static {
    //HttpURLConnection.setFollowRedirects(true);
    CookieManager manager = new CookieManager();
    manager.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
    CookieHandler.setDefault(manager);


connectionManager = new ThreadSafeClientConnManager();
connectionManager.setDefaultMaxPerRoute(1000); // will need to set from properties file
connectionManager.setMaxTotal(1000);
httpClient = new DefaultHttpClient(connectionManager);



    // HTTP parameters stores header etc.
    params = new BasicHttpParams();
    params.setParameter("http.protocol.handle-redirects",false);

}




/*
* Retrieve HTML
*/  
public String fetchURL(String url) throws IOException{

    if ( StringUtils.isEmpty(url) )
        return null;

    InputStream in = null;
    //int code = -1;
    String html = "";

 // Prepare a request object
 HttpGet httpget = new HttpGet(url);
httpget.setParams(params);

 // Execute the request
 HttpResponse response = httpClient.execute(httpget);

 // The response status
 //System.out.println(response.getStatusLine());
int code = response.getStatusLine().getStatusCode();

 // Get hold of the response entity
 HttpEntity entity = response.getEntity();

 // If the response does not enclose an entity, there is no need
 // to worry about connection release
 if (entity != null) {

        try {
            //code = httpClient.executeMethod(getMethod);

            //in = getMethod.getResponseBodyAsStream();
            in = entity.getContent();
            html = CharStreams.toString(new InputStreamReader(in));

        }


        catch (Exception except) {
            throw new Exception("URL: " + url + " returned response code " + code);
        }
        finally {

            try {
                //oin.close();
                in.close();
            }
            catch (Exception except) {}

            //getMethod.releaseConnection();
            connectionManager.closeIdleConnections(MAX_CONNECTION_IDLE_TIME, TimeUnit.MILLISECONDS);
            connectionManager.closeExpiredConnections();
        }

    }

    if (code <= 400){
        return html;
    } else {
        throw new Exception("URL: " + url + " returned response code " + code);
    }


}

我不想要重定向,但在 HttpClient 4.x 下,如果我启用重定向,那么我会得到一些不受欢迎的东西,例如 http://www.walmart.com/ => http://mobile.walmart.com/。在 HttpClient 3.x 下,不会发生此类重定向。

在不破坏代码的情况下,我需要做什么才能将 HttpClient 3.x 迁移到 HttpClient 4.x?

4

1 回答 1

2

这不是 HttpClient 4.x 的问题,可能是目标服务器处理请求的方式,因为用户代理是 httpclient,它可以作为移动设备处理(目标服务器可能会考虑其他可用浏览器,例如 chrome、mozilla等作为移动设备。)

请使用以下代码手动设置

 httpclient.getParams().setParameter(
            org.apache.http.params.HttpProtocolParams.USER_AGENT,
            "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2"
        );
于 2013-01-11T06:17:52.897 回答