webmagic在tomcat上https报错问题

我是直接把process中的page.getHtml()替换为我自己得到的

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.security.SecureRandom;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;

import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.SSLSocketFactory;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;

public class TestWebMagic implements PageProcessor{
	private static Html html;
	private static String baseUrl;
	
	@Override
	public void process(Page page) {
		//https协议得到页面html
		html = new Html(getHtmlByHttps(baseUrl, "UTF-8"));
		System.out.println(html);
		page.putField("imgs", html.$("img", "src-medium").regex(".*800x800.jpg.*").all());
	}

	@Override
	public Site getSite() {
		return Site.me().setRetryTimes(3).setSleepTime(1000);
	}
	
    public static String getHtmlByHttps(String u, String encoding){
    	try {
    		
    		SSLContext sc = SSLContext.getInstance("SSL", "SunJSSE");
    		sc.init(null, new TrustManager[] { new X509TrustManager() {  
    	        @Override  
    	        public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
    	        	
    	        }  

    	        @Override  
    	        public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
    	        	
    	        }  

    	        @Override  
    	        public X509Certificate[] getAcceptedIssuers() {  
    	            return null;  
    	        }  
    	    } }, new SecureRandom());
    		HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());  
    	    HttpsURLConnection.setDefaultHostnameVerifier(new HostnameVerifier() {  
    	        @Override  
    	        public boolean verify(String arg0, SSLSession arg1) {  
    	            return true;  
    	        }  
    	    });
    	    
    	    SSLSocketFactory ssf = sc.getSocketFactory();
    	    URL url = new URL(null, u, new sun.net.www.protocol.https.Handler());
    	    HttpsURLConnection conn = (HttpsURLConnection) url.openConnection();
    	    conn.setSSLSocketFactory(ssf);
    	    conn.setDoOutput(true);  
    	    conn.setDoInput(true);
    	    conn.setUseCaches(false);
    	    conn.setRequestMethod("GET");
    	    conn.connect();  
    	    BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream(), encoding));
    	    StringBuffer sb = new StringBuffer();
    	    String line;  
    	    while ((line = br.readLine()) != null)  
    	        sb.append(line);  
    	    return sb.toString();
    	} catch (Exception e) {  
    	    e.printStackTrace();  
    	}  
    	return null;
    }
	
	public static void main(String[] args) {
		baseUrl = "https://product.suning.com/0070137013/149868717.html";
		// 创建默认的httpClient实例
        Spider.create(new TestWebMagic())
                .addUrl("http://fanyi.baidu.com")//随便写个html协议可以得到的链接
                //启动爬虫
                .run();
    }

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值