/**
* 过滤https协议
* @author edwin
*/
public class FiltratHttpsUtils {
static HostnameVerifier hv = new HostnameVerifier() {
public boolean verify(String urlHostName, SSLSession session) {
System.out.println("Warning: URL Host: " + urlHostName + " vs. "+ session.getPeerHost());
return true;
}
};
private static void trustAllHttpsCertificates() throws Exception {
javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1];
javax.net.ssl.TrustManager tm = new miTM();
trustAllCerts[0] = tm;
javax.net.ssl.SSLContext sc = javax.net.ssl.SSLContext.getInstance("SSL");
sc.init(null, trustAllCerts, null);
javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
}
static class miTM implements javax.net.ssl.TrustManager, javax.net.ssl.X509TrustManager {
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return null;
}
public boolean isServerTrusted(java.security.cert.X509Certificate[] certs) {
return true;
}
public boolean isClientTrusted(java.security.cert.X509Certificate[] certs) {
return true;
}
public void checkServerTrusted(java.security.cert.X509Certificate[] certs, String authType)
throws java.security.cert.CertificateException {
return;
}
public void checkClientTrusted(java.security.cert.X509Certificate[] certs, String authType)
throws java.security.cert.CertificateException {
return;
}
}
public static void doFiltra() throws Exception {
trustAllHttpsCertificates();
HttpsURLConnection.setDefaultHostnameVerifier(hv);
}
}
只需要在请求前调用:FiltratHttpsUtils .doFiltra()即可。
package com.dao;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.Connection.Method;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.dao.HttpsUrlValidator.FiltratHttpsUtils;
public class PhotoVerification {
public static void main(String[] args) throws Exception {
try {
FiltratHttpsUtils .doFiltra();
String url="https://inv-veri.chinatax.gov.cn/index.html";
//HttpsUrlValidator.retrieveResponseFromServer(url);
//doc = Jsoup.connect(url).header("User-Agent",rand_agents).timeout(10000).get();
//body = doc.getElementsByTag("body").html();
//log.info(e.getMessage());
//
//通过Jsoup的Connect方法获取document类
Document document = Jsoup.connect(url).get();
//System.out.println(document.title());//控制台打印网页标题
//根据class获取Elements类
//Element timeElement = document.getElementById("yzminfo");//文字
//Element titleElement = document.getElementById("yzm_img");//图片的base64码
//
Element timeElement = document.getElementById("yzminfo");//文字
Element titleElement = document.getElementById("imgarea");//图片的base64码
System.out.println(timeElement);
System.out.println("----------------------");
System.out.println(titleElement);
//指定文件名及路径
//File file = new File("D:\\title.txt");
//File contentFile = new File("D:\\content.txt");
//if(!file.exists()){
//file.createNewFile();
//}
//if(!contentFile.exists()){
//contentFile.createNewFile();
//}
写入本地
//PrintWriter pw = new PrintWriter("D:\\title.txt");
//PrintWriter contentPw = new PrintWriter("D:\\content.txt");
//pw.close();
//
//
String href = titleElement.get(i).attr("href");//取出新闻标题的url
//String schoolHref = "http://www.haie.edu.cn/";
//因为取出来的新闻url不规范,直接访问不了,需要将其拼接成正常的网页url
//String contentHref = schoolHref+href;
//重复第一步的内容,根据URL取Documet类
//Document contentDoc = Jsoup.connect(contentHref).get();
//继续观察网页,取出新闻详细页面的文字。
//Elements contentElement = contentDoc.getElementsByClass("contentstyle125127");
//Elements authorElement = contentDoc.getElementsByClass("authorstyle125127");
//String content = contentElement.text();
//String author = authorElement.text();
//打印出作者,新闻详细内容
//contentPw.println(author);
//contentPw.println(content);
//contentPw.println("---------------------------");
//
//contentPw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}