import com.google.common.collect.Lists;
import com.meiyunji.spider.contant.HttpConnectContant;
import com.meiyunji.spider.crawl.response.HeaderVo;
import com.meiyunji.spider.crawl.response.ResponseObj;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.net.ssl.*;
import java.io.*;
import java.net.*;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.security.SecureRandom;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;
/**
* @author ldj
* @date 2018/6/25.
* <p>
* 使用 httpUrlConnection请求信息
*/
public class HttpConnectUtil {
private static final Logger logger = LoggerFactory.getLogger(HttpConnectUtil.class);
/**
* Get 方法
*
* @param url 请求连接
* @param ip 本次请求使用的ip
* @param headers 请求头, 为空会填写默认头部信息
* @param parameters 请求体,
* @return ResponseObj 返回结果
**/
public static ResponseObj urlGet(String url, String ip, Map<String, String> headers, Map<String, String> parameters) {
return urlBaseGet(url, ip, null, headers, parameters);
}
/**
* 代理请求
*
* @param url 请求连接
* @param ip 代理ip
* @param port 代理端口
* @param headers 请求头, 为空会填写默认头部信息
* @param parameters 请求体,
* @return ResponseObj 返回结果
*/
public static ResponseObj urlPorxyGet(String url, String ip, Integer port, Map<String, String> headers, Map<String, String> parameters) {
return urlBaseGet(url, ip, port, headers, parameters);
}
/**
* Get 请求
*/
private static ResponseObj urlBaseGet(String url, String ip, Integer port, Map<String, String> headers, Map<String, String> parameters) {
ResponseObj responseObj;
try {
url = url.trim();
if (parameters != null) {
StringBuilder sbd = new StringBuilder(url);
parameters.forEach((key, value) -> sbd.append(StringUtils.containsIgnoreCase(sbd, "?") ? "?" : "&").append(key).append("=").append(value));
url = sbd.toString();
}
URL requestUrl = new URL(url);
if (HttpConnectContant.CONNECT_TYPE_HTTPS.equalsIgnoreCase(requestUrl.getProtocol())) {
responseObj = httpsConnectionGet(requestUrl, ip, port, headers, parameters);
} else {
responseObj = httpConnectionGet(requestUrl, ip, port, headers, parameters);
}
} catch (IOException e) {
responseObj = new ResponseObj();
responseObj.setStatusCode(-1);
responseObj.setReasonPhrase("Crawl Error : URL DealHtml error, the url is not support request.");
}
responseObj.setIp(ip);
responseObj.setUrl(url);
logger.info("url: {}", url);
logger.info("statusCode: {}", responseObj.getStatusCode());
logger.info("message: {}", responseObj.getReasonPhrase());
return responseObj;
}
/**
* Https Get 方法
*
* @param requestUrl 请求连接
* @param ip 本次请求使用的ip
* @param headers 请求头, 为空会填写默认头部信息
* @param parameters 请求体,
* @return ResponseObj 返回结果
* String ua,
* String html,
* String statusCode,
* String ip;
* String url;
* String userAgent;
*/
private static ResponseObj httpsConnectionGet(URL requestUrl, String ip, Integer port, Map<String, String> headers, Map<String, String> parameters) {
InputStreamReader reader = null;
GZIPInputStream gzipInputStream = null;
InputStream inputStream = null;
HttpsURLConnection httpsURLConnection = null;
ResponseObj responseObj = new ResponseObj();
try {
// 获取 URLConnection对象
httpsURLConnection = (HttpsURLConnection) getUrlConnection(requestUrl, ip, port);
setDefaultProperties(httpsURLConnection);
httpsURLConnection.setRequestMethod(HttpConnectContant.GET);
if (headers == null || headers.size() == 0) {
setDefaultHeader(httpsURLConnection);
} else {
setCustomizedHeader(httpsURLConnection, headers);
}
/// 设置绕过https验证
trustAllHosts(httpsURLConnection);
httpsURLConnection.setHostnameVerifier(DO_NOT_VARIFY);
// 打开链接
httpsURLConnection.connect();
// 获取结果
if (httpsURLConnection.getResponseCode() == HttpStatus.SC_OK || httpsURLConnection.getResponseCode() == HttpStatus.SC_CREATED) {
inputStream = httpsURLConnection.getInputStream();
} else {
inputStream = httpsURLConnection.getErrorStream();
}
String contentEncoding = httpsURLConnection.getContentEncoding();
if (contentEncoding != null && "gzip".equals(contentEncoding.trim().toLowerCase())) {
gzipInputStream = new GZIPInputStream(inputStream);
reader = new InputStreamReader(gzipInputStream);
} else {
reader = new InputStreamReader(inputStream);
}
StringBuilder sbfs = new StringBuilder();
char[] c = new char[1024];
int len;
while (0 < (len = reader.read(c))) {
sbfs.append(c, 0, len);
}
List<HeaderVo> headerList = Lists.newArrayList();
Map<String, List<String>> responseHeaderFields = httpsURLConnection.getHeaderFields();
if (responseHeaderFields != null && responseHeaderFields.size() > 0) {
responseHeaderFields.forEach((key, value) -> headerList.add(new HeaderVo(key, JsonUtil.objectToJson(value))));
}
responseObj.setStatusCode(httpsURLConnection.getResponseCode());
responseObj.setContent(sbfs.toString());
responseObj.setResponseHeader(headerList);
responseObj.setUserAgent(httpsURLConnection.getRequestProperty("user-agent"));
} catch (IOException e) {
e.printStackTrace();
responseObj.setStatusCode(-1);
responseObj.setReasonPhrase("Crawl Error : HttpsUrlConnection IOException .");
} finally {
try {
if (reader != null) {
reader.close();
}
if (gzipInputStream != null) {
gzipInputStream.close();
}
if (inputStream != null) {
inputStream.close();
}
if (httpsURLConnection != null) {
httpsURLConnection.disconnect();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return responseObj;
}
/**
* Http Get 方法
*
* @param requestUrl 请求连接
* @param ip 本次请求使用的ip
* @param headers 请求头, 为空会填写默认头部信息
* @param parameters 请求体,
* @return ResponseObj 返回结果
* String ua,
* String html,
* String statusCode,
* String ip;
* String url;
* URL requestUrl;
* String userAgent;
*/
private static ResponseObj httpConnectionGet(URL requestUrl, String ip, Integer port, Map<String, String> headers, Map<String, String> parameters) {
InputStreamReader reader = null;
GZIPInputStream gzipInputStream = null;
InputStream inputStream = null;
HttpURLConnection httpUrlConnection = null;
ResponseObj responseObj = new ResponseObj();
try {
httpUrlConnection = (HttpURLConnection) getUrlConnection(requestUrl, ip, port);
setDefaultProperties(httpUrlConnection);
httpUrlConnection.setRequestMethod(HttpConnectContant.GET);
if (headers == null || headers.size() <= 0) {
setDefaultHeader(httpUrlConnection);
} else {
setCustomizedHeader(httpUrlConnection, headers);
}
httpUrlConnection.connect();
if (httpUrlConnection.getResponseCode() == HttpStatus.SC_OK || httpUrlConnection.getResponseCode() == HttpStatus.SC_CREATED) {
inputStream = httpUrlConnection.getInputStream();
} else {
inputStream = httpUrlConnection.getErrorStream();
}
String contentEncoding = httpUrlConnection.getContentEncoding();
if (contentEncoding != null && "gzip".equals(contentEncoding.trim().toLowerCase())) {
gzipInputStream = new GZIPInputStream(inputStream);
reader = new InputStreamReader(gzipInputStream);
} else {
reader = new InputStreamReader(inputStream);
}
StringBuilder sbfs = new StringBuilder();
char[] c = new char[1024];
int len;
while (0 < (len = reader.read(c))) {
sbfs.append(c, 0, len);
}
List<HeaderVo> headerList = Lists.newArrayList();
httpUrlConnection.getHeaderFields().forEach((key, value) -> headerList.add(new HeaderVo(key, JsonUtil.objectToJson(value))));
responseObj.setStatusCode(httpUrlConnection.getResponseCode());
responseObj.setContent(sbfs.toString());
responseObj.setResponseHeader(headerList);
responseObj.setUrl(requestUrl.getProtocol() + "://" + requestUrl.getHost() + requestUrl.getPath());
if (headers != null) {
responseObj.setUserAgent(headers.get("user_agent"));
}
} catch (IOException e) {
responseObj.setStatusCode(-1);
responseObj.setReasonPhrase("Crawl Error : HttpsUrlConnection IOException .");
} finally {
try {
if (reader != null) {
reader.close();
}
if (gzipInputStream != null) {
gzipInputStream.close();
}
if (inputStream != null) {
inputStream.close();
}
if (httpUrlConnection != null) {
httpUrlConnection.disconnect();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return responseObj;
}
/**
* POST 请求方式
* 默认以JSON的形式传递数据
*/
public static ResponseObj urlPost(String url, String ip, Map<String, String> headers, Map<String, String> bodyMap) {
return urlAdvancedPost(url, ip, headers, bodyMap, HttpConnectContant.POST_BODY_TYPE_JSON);
}
/**
* POST 走代理请求
*/
public static ResponseObj urlProxyPost(String url, String ip, Integer port, Map<String, String> headers, Map<String, String> bodyMap) {
return urlAdvancedPost(url, ip, headers, bodyMap, HttpConnectContant.POST_BODY_TYPE_JSON);
}
/**
* POST请求方式
*
* @param url 请求连接
* @param ip 请求ip
* @param headers 请求头
* @param bodyMap 请求体
* @param submiType 提交数据方式 format / json
*/
public static ResponseObj urlAdvancedPost(String url, String ip, Map<String, String> headers, Map<String, String> bodyMap, String submiType) {
return urlBasePost(url, ip, null, headers, bodyMap, submiType);
}
/**
* POST 代理请求方式
*
* @param url 请求连接
* @param ip 代理请求ip
* @param port 代理请求端口
* @param headers 请求头
* @param bodyMap 请求体
* @param submitType 提交数据方式 format / json
*/
public static ResponseObj urlAdvancedProxyPost(String url, String ip, Integer port, Map<String, String> headers, Map<String, String> bodyMap, String submitType) {
return urlBasePost(url, ip, port, headers, bodyMap, submitType);
}
private static ResponseObj urlBasePost(String url, String ip, Integer port, Map<String, String> headers, Map<String, String> bodyMap, String submitType) {
ResponseObj responseObj;
try {
URL requestUrl = new URL(url);
if (HttpConnectContant.CONNECT_TYPE_HTTPS.equalsIgnoreCase(requestUrl.getProtocol())) {
responseObj = httpsConnectionPost(requestUrl, ip, port, headers, bodyMap, submitType);
} else {
responseObj = httpConnectionPost(requestUrl, ip, port, headers, bodyMap, submitType);
}
} catch (IOException e) {
responseObj = new ResponseObj();
responseObj.setStatusCode(-1);
responseObj.setReasonPhrase("Crawl Error : URL DealHtml error, the url is not support request.");
}
responseObj.setIp(ip);
responseObj.setUrl(url);
logger.info("url: {}", url);
logger.info("statusCode: {}", responseObj.getStatusCode());
logger.info("message: {}", responseObj.getReasonPhrase());
return responseObj;
}
/**
* https POST方法
*/
private static ResponseObj httpsConnectionPost(URL requestUrl, String ip, Integer port, Map<String, String> headers, Map<String, String> bodyMap, String submitType) {
Reader reader = null;
GZIPInputStream gzipInputStream = null;
InputStream inputStream = null;
HttpsURLConnection httpsURLConnection = null;
ResponseObj responseObj = new ResponseObj();
try {
httpsURLConnection = (HttpsURLConnection) getUrlConnection(requestUrl, ip, port);
httpsURLConnection.setRequestMethod(HttpConnectContant.POST);
// 设置默认请求属性
setDefaultProperties(httpsURLConnection);
httpsURLConnection.setUseCaches(false);
// 绕过 https 验证
trustAllHosts(httpsURLConnection);
httpsURLConnection.setHostnameVerifier(DO_NOT_VARIFY);
// 拼接请求头
if (headers == null || headers.size() <= 0) {
setDefaultHeader(httpsURLConnection);
} else {
setCustomizedHeader(httpsURLConnection, headers);
}
// 拼接请求体
String parameterStr = getPostBody(bodyMap, submitType);
byte[] writeBytes = parameterStr.getBytes();
httpsURLConnection.setRequestProperty("Content-Length", String.valueOf(writeBytes.length));
// 发起请求
OutputStream outputStream = httpsURLConnection.getOutputStream();
outputStream.write(writeBytes);
outputStream.flush();
outputStream.close();
if (httpsURLConnection.getResponseCode() == HttpStatus.SC_OK || httpsURLConnection.getResponseCode() == HttpStatus.SC_CREATED) {
inputStream = httpsURLConnection.getInputStream();
} else {
inputStream = httpsURLConnection.getErrorStream();
}
String contentEncoding = httpsURLConnection.getContentEncoding();
if (contentEncoding != null && HttpConnectContant.RESPONSE_CODE_GZIP.equals(contentEncoding.trim().toLowerCase())) {
gzipInputStream = new GZIPInputStream(inputStream);
reader = new InputStreamReader(gzipInputStream);
} else {
reader = new InputStreamReader(inputStream);
}
StringBuilder sbd = new StringBuilder();
char[] c = new char[1024];
int len;
while (0 < (len = reader.read(c))) {
sbd.append(c, 0, len);
}
List<HeaderVo> headerList = Lists.newArrayList();
Map<String, List<String>> responseHeaderFields = httpsURLConnection.getHeaderFields();
if (responseHeaderFields != null && responseHeaderFields.size() > 0) {
responseHeaderFields.forEach((key, value) -> headerList.add(new HeaderVo(key, JsonUtil.objectToJson(value))));
}
responseObj.setStatusCode(httpsURLConnection.getResponseCode());
responseObj.setContent(sbd.toString());
responseObj.setResponseHeader(headerList);
responseObj.setUserAgent(httpsURLConnection.getRequestProperty("user-agent"));
} catch (IOException e) {
e.printStackTrace();
responseObj.setStatusCode(-1);
responseObj.setReasonPhrase("Crawl Error : HttpsUrlConnection IOException .");
} finally {
try {
if (reader != null) {
reader.close();
}
if (gzipInputStream != null) {
gzipInputStream.close();
}
if (inputStream != null) {
inputStream.close();
}
if (httpsURLConnection != null) {
httpsURLConnection.disconnect();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return responseObj;
}
/**
* https POST方法
*/
private static ResponseObj httpConnectionPost(URL requestUrl, String ip, Integer port, Map<String, String> headers, Map<String, String> bodyMap, String submitType) {
Reader reader = null;
GZIPInputStream gzipInputStream = null;
InputStream inputStream = null;
HttpURLConnection httpURLConnection = null;
ResponseObj responseObj = new ResponseObj();
try {
httpURLConnection = (HttpURLConnection) getUrlConnection(requestUrl, ip, port);
httpURLConnection.setRequestMethod(HttpConnectContant.POST);
// 设置默认请求属性
setDefaultProperties(httpURLConnection);
httpURLConnection.setUseCaches(false);
// 拼接请求头
if (headers == null || headers.size() <= 0) {
setDefaultHeader(httpURLConnection);
} else {
setCustomizedHeader(httpURLConnection, headers);
}
// 拼接请求体
String parameterStr = getPostBody(bodyMap, submitType);
byte[] writeBytes = parameterStr.getBytes();
httpURLConnection.setRequestProperty("Content-Length", String.valueOf(writeBytes.length));
// 发起请求
OutputStream outputStream = httpURLConnection.getOutputStream();
outputStream.write(writeBytes);
outputStream.flush();
outputStream.close();
if (httpURLConnection.getResponseCode() == HttpStatus.SC_OK || httpURLConnection.getResponseCode() == HttpStatus.SC_CREATED) {
inputStream = httpURLConnection.getInputStream();
} else {
inputStream = httpURLConnection.getErrorStream();
}
String contentEncoding = httpURLConnection.getContentEncoding();
if (contentEncoding != null && HttpConnectContant.RESPONSE_CODE_GZIP.equals(contentEncoding.trim().toLowerCase())) {
gzipInputStream = new GZIPInputStream(inputStream);
reader = new InputStreamReader(gzipInputStream);
} else {
reader = new InputStreamReader(inputStream);
}
StringBuilder sbd = new StringBuilder();
char[] c = new char[1024];
int len;
while (0 < (len = reader.read(c))) {
sbd.append(c, 0, len);
}
List<HeaderVo> headerList = Lists.newArrayList();
Map<String, List<String>> responseHeaderFields = httpURLConnection.getHeaderFields();
if (responseHeaderFields != null && responseHeaderFields.size() > 0) {
responseHeaderFields.forEach((key, value) -> headerList.add(new HeaderVo(key, JsonUtil.objectToJson(value))));
}
responseObj.setStatusCode(httpURLConnection.getResponseCode());
responseObj.setContent(sbd.toString());
responseObj.setResponseHeader(headerList);
responseObj.setUserAgent(httpURLConnection.getRequestProperty("user-agent"));
} catch (IOException e) {
e.printStackTrace();
responseObj.setStatusCode(-1);
responseObj.setReasonPhrase("Crawl Error : HttpsUrlConnection IOException .");
} finally {
try {
if (reader != null) {
reader.close();
}
if (gzipInputStream != null) {
gzipInputStream.close();
}
if (inputStream != null) {
inputStream.close();
}
if (httpURLConnection != null) {
httpURLConnection.disconnect();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return responseObj;
}
/**
* 拼接 POST 请求 请求体
*/
private static String getPostBody(Map<String, String> bodyMap, String submitType) {
String parameterStr;
StringBuilder sbd = new StringBuilder();
if (bodyMap != null && bodyMap.size() > 0 && HttpConnectContant.POST_BODY_TYPE_FORMAT.equals(submitType)) {
//todo 数组类型数据不合适,需要优化
bodyMap.forEach((key, value) -> sbd.append(key).append("=").append(value).append("&"));
parameterStr = sbd.toString();
} else {
parameterStr = JsonUtil.objectToJson(bodyMap);
}
if (StringUtils.isBlank(parameterStr)) {
parameterStr = "";
}
return parameterStr;
}
//region---------------------------------------设置头部信息-----------------------------------------------------
/**
* 先设置默认头部信息
* 然后用新的头部信息覆盖默认信息
*/
private static void setCustomizedHeader(URLConnection urlConnection, Map<String, String> headerMap) {
setDefaultHeader(urlConnection);
for (Map.Entry<String, String> entry : headerMap.entrySet()) {
urlConnection.setRequestProperty(entry.getKey(), entry.getValue());
}
}
/**
* 设置默认头部信息
*/
private static void setDefaultHeader(URLConnection urlConnection) {
urlConnection.setRequestProperty("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
urlConnection.setRequestProperty("accept-encoding", "gzip, deflate, br");
urlConnection.setRequestProperty("accept-language", "en-US,en;q=0.9,fr-FR;q=0.8,fr-CA;q=0.7,fr;q=0.6,de;q=0.5,zh-CN;q=0.4,zh;q=0.3,en-AU;q=0.2");
urlConnection.setRequestProperty("cache-control", "max-age=0");
urlConnection.setRequestProperty("user-agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.79 Safari/537.36");
urlConnection.setRequestProperty("connection", "keep-alive");
}
/**
* 设置默认属性
*/
private static void setDefaultProperties(URLConnection urlConnection) {
urlConnection.setDoOutput(true);
urlConnection.setDoInput(true);
urlConnection.setConnectTimeout(HttpConnectContant.CONNECT_TIME_OUT_TIME);
// 系统超时设置, 防止网络异常情况下, 可能会导致程序僵死而不继续往下执行
System.setProperty("sun.net.client.defaultConnectionTimeOut", String.valueOf(HttpConnectContant.CONNECT_TIME_OUT_TIME));
System.setProperty("sun.net.client.defaultReadTimeout", String.valueOf(HttpConnectContant.CONNECT_TIME_OUT_TIME));
}
/**
* 打开连接,获取连接对象
*/
private static URLConnection getUrlConnection(URL requestUrl, String ip, Integer port) throws IOException {
Proxy proxy = null;
if (StringUtils.isNotBlank(ip) && port != null && port > 0) {
proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ip, port));
}
if (proxy == null) {
return requestUrl.openConnection();
}
return requestUrl.openConnection(proxy);
}
//endregion-------------------------------------------------------------------------------------------------------------
//region ------------------------------------- 绕过 https 验证 -----------------------------------------------------------
/**
* 覆盖java默认的证书验证
*/
private static final TrustManager[] TRUST_ALL_CERTS = new TrustManager[]{
new X509TrustManager() {
@Override
public void checkClientTrusted(X509Certificate[] x509Certificates, String s) throws CertificateException {
}
@Override
public void checkServerTrusted(X509Certificate[] x509Certificates, String s) throws CertificateException {
}
@Override
public X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[]{};
}
}
};
/**
* 设置不验证主机
*/
private static final HostnameVerifier DO_NOT_VARIFY = new HostnameVerifier() {
@Override
public boolean verify(String s, SSLSession sslSession) {
return true;
}
};
/**
* 信任所有的链接
*/
private static SSLSocketFactory trustAllHosts(HttpsURLConnection connection) {
SSLSocketFactory sslSocketFactory = connection.getSSLSocketFactory();
try {
SSLContext tls = SSLContext.getInstance("TLS");
tls.init(null, TRUST_ALL_CERTS, new SecureRandom());
SSLSocketFactory socketFactory = tls.getSocketFactory();
connection.setSSLSocketFactory(socketFactory);
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
} catch (KeyManagementException e) {
e.printStackTrace();
}
return sslSocketFactory;
}
// endregion -----------------------------------------------------------------------------------------------------------
}
HttpURLConnection使用
最新推荐文章于 2023-03-11 01:07:36 发布