作为我正在做的一个小项目的一部分,我将永远不会真正投入“生产”,我需要能够登录到一个非常安全的网站并检索 html。
我一直在研究使用 apache commons HTTPCLient 来做到这一点。但是我只是想确保它甚至是可能的,因为这个网站非常安全并且可能有 sso 登录方法?如果可能,最好的方法是什么?登录后,我需要能够浏览大约三个页面,因此需要以某种方式存储 cookie 或会话。
非常感谢!
作为我正在做的一个小项目的一部分,我将永远不会真正投入“生产”,我需要能够登录到一个非常安全的网站并检索 html。
我一直在研究使用 apache commons HTTPCLient 来做到这一点。但是我只是想确保它甚至是可能的,因为这个网站非常安全并且可能有 sso 登录方法?如果可能,最好的方法是什么?登录后,我需要能够浏览大约三个页面,因此需要以某种方式存储 cookie 或会话。
非常感谢!
是的,它可以使用 apache http 组件来做到这一点,但是对于与复杂网站的交互,没有什么(我知道)能胜过HtmlUnit。要使用 httpcomponents,您需要“编写”整个 http 请求序列,如果中间的任何内容依赖于动态内容/javascript,您就会遇到问题。
另一方面,HtmlUnit 是一个几乎完整的“盒子里的鲍泽”,您可以在更高级别上编写交互脚本 - 单击它,填写这些值,提交等。
您需要 jsoup.jar 文件将响应发送到特定网站通过检查元素查看 gmail 表单数据以进行元素验证
package com.kowthal;
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.CookieHandler;
import java.net.CookieManager;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import javax.net.ssl.HttpsURLConnection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class HttpUrlConnectionExample {
private List<String> cookies;
private HttpsURLConnection conn;
private final String USER_AGENT = "Mozilla/5.0";
public static void main(String[] args) throws Exception {
String url = "https://accounts.google.com/ServiceLoginAuth";
String gmail = "https://mail.google.com/mail/";
HttpUrlConnectionExample http = new HttpUrlConnectionExample();
// make sure cookies is turn on
CookieHandler.setDefault(new CookieManager());
// 1. Send a "GET" request, so that you can extract the form's data.
String page = http.GetPageContent(url);
String postParams = http.getFormParams(page, "username@gmail.com", "password");
// 2. Construct above post's content and then send a POST request for
// authentication
http.sendPost(url, postParams);
// 3. success then go to gmail.
String result = http.GetPageContent(gmail);
System.out.println(result);
}
private void sendPost(String url, String postParams) throws Exception {
URL obj = new URL(url);
conn = (HttpsURLConnection) obj.openConnection();
// Acts like a browser
conn.setUseCaches(false);
conn.setRequestMethod("POST");
conn.setRequestProperty("Host", "accounts.google.com");
conn.setRequestProperty("User-Agent", USER_AGENT);
conn.setRequestProperty("Accept",
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
conn.setRequestProperty("Accept-Language", "en-US,en;q=0.5");
for (String cookie : this.cookies) {
conn.addRequestProperty("Cookie", cookie.split(";", 1)[0]);
}
conn.setRequestProperty("Connection", "keep-alive");
conn.setRequestProperty("Referer", "https://accounts.google.com/ServiceLoginAuth");
conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
conn.setRequestProperty("Content-Length", Integer.toString(postParams.length()));
conn.setDoOutput(true);
conn.setDoInput(true);
// Send post request
DataOutputStream wr = new DataOutputStream(conn.getOutputStream());
wr.writeBytes(postParams);
wr.flush();
wr.close();
int responseCode = conn.getResponseCode();
System.out.println("\nSending 'POST' request to URL : " + url);
System.out.println("Post parameters : " + postParams);
System.out.println("Response Code : " + responseCode);
BufferedReader in =
new BufferedReader(new InputStreamReader(conn.getInputStream()));
String inputLine;
StringBuffer response = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
response.append(inputLine);
}
in.close();
// System.out.println(response.toString());
}
private String GetPageContent(String url) throws Exception {
URL obj = new URL(url);
conn = (HttpsURLConnection) obj.openConnection();
// default is GET
conn.setRequestMethod("GET");
conn.setUseCaches(false);
// act like a browser
conn.setRequestProperty("User-Agent", USER_AGENT);
conn.setRequestProperty("Accept",
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
conn.setRequestProperty("Accept-Language", "en-US,en;q=0.5");
if (cookies != null) {
for (String cookie : this.cookies) {
conn.addRequestProperty("Cookie", cookie.split(";", 1)[0]);
}
}
int responseCode = conn.getResponseCode();
System.out.println("\nSending 'GET' request to URL : " + url);
System.out.println("Response Code : " + responseCode);
BufferedReader in =
new BufferedReader(new InputStreamReader(conn.getInputStream()));
String inputLine;
StringBuffer response = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
response.append(inputLine);
}
in.close();
// Get the response cookies
setCookies(conn.getHeaderFields().get("Set-Cookie"));
return response.toString();
}
public String getFormParams(String html, String username, String password)
throws UnsupportedEncodingException {
System.out.println("Extracting form's data...");
Document doc = Jsoup.parse(html);
// Google form id
Element loginform = doc.getElementById("gaia_loginform");
Elements inputElements = loginform.getElementsByTag("input");
List<String> paramList = new ArrayList<String>();
for (Element inputElement : inputElements) {
String key = inputElement.attr("name");
String value = inputElement.attr("value");
if (key.equals("Email"))
value = username;
else if (key.equals("Passwd"))
value = password;
paramList.add(key + "=" + URLEncoder.encode(value, "UTF-8"));
}
// build parameters list
StringBuilder result = new StringBuilder();
for (String param : paramList) {
if (result.length() == 0) {
result.append(param);
} else {
result.append("&" + param);
}
}
return result.toString();
}
public List<String> getCookies() {
return cookies;
}
public void setCookies(List<String> cookies) {
this.cookies = cookies;
}
}