场景,使用HttpClient技术,爬取指定网站的数据。
一、构建HttpClient连接(支持Http和Https请求)
public CloseableHttpClient createHttpClient() {
SocketConfig socketConfig = SocketConfig.custom()
.setSoKeepAlive(false)
.setSoLinger(1)
.setSoReuseAddress(true)
.setSoTimeout(120000) //超时时间设置
.setTcpNoDelay(true).build();
RequestConfig config = RequestConfig.custom()
.setConnectTimeout(120000)//建立网络超时时间
.setSocketTimeout(120000)//通讯过程中的超时时间
.setConnectionRequestTimeout(120000) //分配等待连接池分配的超时时间
.setCookieSpec(CookieSpecs.STANDARD).build();
Registry<ConnectionSocketFactory> socketFactoryRegistry = null;
try {
// 构造一个信任所有ssl证书的httpclient
SSLContext sslContext = SSLContexts.custom().loadTrustMaterial(null, new TrustStrategy() {
@Override
public boolean isTrusted(X509Certificate[] chain, String authType) throws CertificateException {
return true;
}
}).build();
SSLConnectionSocketFactory ssLConnectionSocketFactory = new SSLConnectionSocketFactory(sslContext);
socketFactoryRegistry = RegistryBuilder.<ConnectionSocketFactory>create()
.register("http", PlainConnectionSocketFactory.getSocketFactory())
.register("https", ssLConnectionSocketFactory)
.build();
} catch(Exception ex) {
socketFactoryRegistry = RegistryBuilder.<ConnectionSocketFactory>create()
.register("http", PlainConnectionSocketFactory.getSocketFactory())
.register("https", SSLConnectionSocketFactory.getSocketFactory())
.build();
}
// Http连接池管理,降低延迟,支持并发
PoolingHttpClientConnectionManager syncConnectionManager = new PoolingHttpClientConnectionManager(socketFactoryRegistry);
// 设置最大连接数
syncConnectionManager.setMaxTotal(1000);
// 设置每个路由基础的连接
syncConnectionManager.setDefaultMaxPerRoute(50);
syncConnectionManager.setDefaultSocketConfig(socketConfig);
LaxRedirectStrategy laxRedirectStrategy = new LaxRedirectStrategy();
DefaultConnectionKeepAliveStrategy defaultConnectionKeepAliveStrategy = new DefaultConnectionKeepAliveStrategy();
//关键 构建httpClient
CloseableHttpClient httpClient = HttpClientBuilder.create().setKeepAliveStrategy(defaultConnectionKeepAliveStrategy)
.setConnectionManager(syncConnectionManager).setConnectionManagerShared(true)
.setRedirectStrategy(laxRedirectStrategy).setDefaultRequestConfig(config)
.setDefaultCookieStore(cookieStore).build(); //保证会话cookie一致
HttpClientContext cookieContext = HttpClientContext.create();
cookieContext.setCookieStore(cookieStore);
return httpClient;
}
二、构建Http请求
一般就是HttpGet和HttpPost请求
HttpPost请求,处理如下:
//获取连接 保持会话需要保证使用同一个httpClient对象
final CloseableHttpClient httpClient = createHttpClient();
HttpPost httpPost = newHttpPost("RequestUrl");
//构建post 请求体的参数
List<NameValuePair> nvps = new ArrayList<>();
nvps.add(new BasicNameValuePair("", assetMap.get("id")));
nvps.add(new BasicNameValuePair("", assetMap.get("title")));
//放入请求体参数 如没有参数则不需要
httpPost.setEntity(new UrlEncodedFormEntity(nvps));
//发起请求
HttpResponse response = httpClient.execute(httpPost);
//获取响应体
String entityStr = EntityUtils.toString(response.getEntity());
//获取返回的状态码
int statusCode = response.getStatusLine().getStatusCode(); // 请求返回的状态码
HttpPost请求,请求正文携带的另一种方式如下:
//组装请求正文
Map<String, Object> param=new HashMap<String, Object>();
//...此处省略param对数据的封装
String parameter =JacksonUtils.objToJsonStr(param);
HttpPost httpPost = new HttpPost(loginUrl); // 登录系统
//构建请求体
StringEntity se = new StringEntity(parameter);
//请求数据jsons设置
se.setContentType("text/json");
//放入请求体
httpPost.setEntity(se);
//发送请求
HttpResponse responsePost = httpClient.execute(httpPost);
String entityStr = EntityUtils.toString(responsePost.getEntity());
HttpGet请求处理如下:
HttpGet queryTgAccountListGet = new HttpGet("RequestUrl");
//发送请求 获取响应体
CloseableHttpResponse queryTgAccountResponse = httpClient.execute(queryTgAccountListGet);
String entityStr = EntityUtils.toString(queryTgAccountResponse.getEntity());
int statusCode = queryTgAccountResponse.getStatusLine().getStatusCode();
请求体携带处理:
httpPost.setHeader("Connection", "keep-alive");
httpPost.setHeader("Content-Type", "application/json;charset=UTF-8");