利用httpclient开源工具抓取网页的源码,并且控制台打印出来工具类的编写
package com.pyc.search.crawler.node.tools;
import java.io.InputStream;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.params.BasicHttpParams;
import org.junit.Test;
/**
* @update yangluan
* 通过 地址 和cookie信息 直接把需要的网页源代码拿下来
*/
public class HttpClientContent {
public static String getHeadersByHttpClient(String link,String cookie) throws Exception {
CloseableHttpClient httpClient = HttpClients.createDefault();
// 请求链接
HttpGet request = new HttpGet(link);
/**
* 请求参数设置
*/
// String location = null;
// int code = 0;
// BasicHttpParams params = new BasicHttpParams();
// // 参数,