1

作为我正在做的一个小项目的一部分,我将永远不会真正投入“生产”,我需要能够登录到一个非常安全的网站并检索 html。

我一直在研究使用 apache commons HTTPCLient 来做到这一点。但是我只是想确保它甚至是可能的,因为这个网站非常安全并且可能有 sso 登录方法?如果可能,最好的方法是什么?登录后,我需要能够浏览大约三个页面,因此需要以某种方式存储 cookie 或会话。

非常感谢!

4

2 回答 2

3

是的,它可以使用 apache http 组件来做到这一点,但是对于与复杂网站的交互,没有什么(我知道)能胜过HtmlUnit。要使用 httpcomponents,您需要“编写”整个 http 请求序列,如果中间的任何内容依赖于动态内容/javascript,您就会遇到问题。

另一方面,HtmlUnit 是一个几乎完整的“盒子里的鲍泽”,您可以在更高级别上编写交互脚本 - 单击它,填写这些值,提交等。

于 2013-01-13T12:28:12.977 回答
-1

您需要 jsoup.jar 文件将响应发送到特定网站通过检查元素查看 gmail 表单数据以进行元素验证

 package com.kowthal;

import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.CookieHandler;
import java.net.CookieManager;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import javax.net.ssl.HttpsURLConnection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class HttpUrlConnectionExample {

  private List<String> cookies;
  private HttpsURLConnection conn;

  private final String USER_AGENT = "Mozilla/5.0";

  public static void main(String[] args) throws Exception {

    String url = "https://accounts.google.com/ServiceLoginAuth";
    String gmail = "https://mail.google.com/mail/";

    HttpUrlConnectionExample http = new HttpUrlConnectionExample();

    // make sure cookies is turn on
    CookieHandler.setDefault(new CookieManager());

    // 1. Send a "GET" request, so that you can extract the form's data.
    String page = http.GetPageContent(url);
    String postParams = http.getFormParams(page, "username@gmail.com", "password");

    // 2. Construct above post's content and then send a POST request for
    // authentication
    http.sendPost(url, postParams);

    // 3. success then go to gmail.
    String result = http.GetPageContent(gmail);
    System.out.println(result);
  }

  private void sendPost(String url, String postParams) throws Exception {

    URL obj = new URL(url);
    conn = (HttpsURLConnection) obj.openConnection();

    // Acts like a browser
    conn.setUseCaches(false);
    conn.setRequestMethod("POST");
    conn.setRequestProperty("Host", "accounts.google.com");
    conn.setRequestProperty("User-Agent", USER_AGENT);
    conn.setRequestProperty("Accept",
        "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
    conn.setRequestProperty("Accept-Language", "en-US,en;q=0.5");
    for (String cookie : this.cookies) {
        conn.addRequestProperty("Cookie", cookie.split(";", 1)[0]);
    }
    conn.setRequestProperty("Connection", "keep-alive");
    conn.setRequestProperty("Referer", "https://accounts.google.com/ServiceLoginAuth");
    conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
    conn.setRequestProperty("Content-Length", Integer.toString(postParams.length()));

    conn.setDoOutput(true);
    conn.setDoInput(true);

    // Send post request
    DataOutputStream wr = new DataOutputStream(conn.getOutputStream());
    wr.writeBytes(postParams);
    wr.flush();
    wr.close();

    int responseCode = conn.getResponseCode();
    System.out.println("\nSending 'POST' request to URL : " + url);
    System.out.println("Post parameters : " + postParams);
    System.out.println("Response Code : " + responseCode);

    BufferedReader in = 
             new BufferedReader(new InputStreamReader(conn.getInputStream()));
    String inputLine;
    StringBuffer response = new StringBuffer();

    while ((inputLine = in.readLine()) != null) {
        response.append(inputLine);
    }
    in.close();
    // System.out.println(response.toString());

  }

  private String GetPageContent(String url) throws Exception {

    URL obj = new URL(url);
    conn = (HttpsURLConnection) obj.openConnection();

    // default is GET
    conn.setRequestMethod("GET");

    conn.setUseCaches(false);

    // act like a browser
    conn.setRequestProperty("User-Agent", USER_AGENT);
    conn.setRequestProperty("Accept",
        "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
    conn.setRequestProperty("Accept-Language", "en-US,en;q=0.5");
    if (cookies != null) {
        for (String cookie : this.cookies) {
            conn.addRequestProperty("Cookie", cookie.split(";", 1)[0]);
        }
    }
    int responseCode = conn.getResponseCode();
    System.out.println("\nSending 'GET' request to URL : " + url);
    System.out.println("Response Code : " + responseCode);

    BufferedReader in = 
            new BufferedReader(new InputStreamReader(conn.getInputStream()));
    String inputLine;
    StringBuffer response = new StringBuffer();

    while ((inputLine = in.readLine()) != null) {
        response.append(inputLine);
    }
    in.close();

    // Get the response cookies
    setCookies(conn.getHeaderFields().get("Set-Cookie"));

    return response.toString();

  }

  public String getFormParams(String html, String username, String password)
        throws UnsupportedEncodingException {

    System.out.println("Extracting form's data...");

    Document doc = Jsoup.parse(html);

    // Google form id
    Element loginform = doc.getElementById("gaia_loginform");
    Elements inputElements = loginform.getElementsByTag("input");
    List<String> paramList = new ArrayList<String>();
    for (Element inputElement : inputElements) {
        String key = inputElement.attr("name");
        String value = inputElement.attr("value");

        if (key.equals("Email"))
            value = username;
        else if (key.equals("Passwd"))
            value = password;
        paramList.add(key + "=" + URLEncoder.encode(value, "UTF-8"));
    }

    // build parameters list
    StringBuilder result = new StringBuilder();
    for (String param : paramList) {
        if (result.length() == 0) {
            result.append(param);
        } else {
            result.append("&" + param);
        }
    }
    return result.toString();
  }

  public List<String> getCookies() {
    return cookies;
  }

  public void setCookies(List<String> cookies) {
    this.cookies = cookies;
  }

}
于 2013-12-10T11:21:43.237 回答