针对Jsoup解析https网页,网上的一段源码执行后并不能实现成功访问。
import java.net.MalformedURLException;
import java.net.URL;
import java.security.SecureRandom;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.Map;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.X509TrustManager;
import org.jsoup.Connection;
import org.jsoup.helper.HttpConnection;
public class HTTPCommonUtil {
public static void trustEveryone() {
try {
HttpsURLConnection.setDefaultHostnameVerifier(new HostnameVerifier() {
public boolean verify(String hostname, SSLSession session) {
return true;
}
});
SSLContext context = SSLContext.getInstance("TLS");
context.init(null, new X509TrustManager[] { new X509TrustManager() {
public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
}
public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
}
public X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[0];
}
} }, new SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(context.getSocketFactory());
} catch (Exception e) {
e.printStackTrace();
}
}
public static Object getHttpHeaders(URL url, int timeout) {
try {
trustEveryone();
Connection conn = HttpConnection.connect(url);
conn.timeout(timeout);
conn.header("Accept-Encoding", "gzip,deflate,sdch");
conn.header("Connection", "close");
conn.get();
//String result=conn.response().body();
Map<String, String> result = conn.response().headers();
result.put("title", conn.response().parse().title());
return result;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
public static void main(String[] args) {
try {
URL url = new URL("https", "www.icbc-axa.com", -1, "");
System.out.println(getHttpHeaders(url, 10000));
} catch (MalformedURLException e) {
e.printStackTrace();
}
}
}
执行结果:{Content-Length=187, Connection=close, Pragma=no-cache, Cache-Control=no-cache, title=Request Rejected}
需要进一步寻找方案,通过java自带HttpsURLConnection可实现https访问,实际上上面这段代码的trustEveryone()函数也是通过HttpsURLConnection,只是如何结合到jsoup尚未找到有效办法,先转到htmlparser来实现。