对于Http的get和post是无状态的,所以我们需要将socket保存,然后带着socket去访问网站,就好像我们浏览器去访问一样,就可以爬去登录后的数据
HttpClient工具类所用到的jar包
其实这个jar包,真是很淡腾,由于HttpClient比较的重要,所以和Java自带的.net包中的有些重复,所以我就把这个jar贴出来了
commons-codec-1.9.jar
commons-logging-1.2.jar
fluent-hc-4.5.3.jar
httpclient-cache-4.5.3.jar
httpclient-win-4.5.3.jar
httpcore-4.4.6.jar
httpmime-4.5.3.jar
jna-4.1.0.jar
jna-platform-4.1.0.jar
httpclient-4.5.3.jar
log4j-1.2.17.jar
jsoup-1.7.3.jar
项目地址
https://gitee.com/yellowcong/utils
maven依赖
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>yellowcong.com</groupId>
<artifactId>utils-httpclient</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>utils-httpclient</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<httpclient.version>4.5.3</httpclient.version>
<log4j.version>1.2.17</log4j.version>
<commons-logging.version>1.2</commons-logging.version>
<jsoup.version>1.7.3</jsoup.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>${httpclient.version}</version>
</dependency>
<!-- 配置日志信息 -->
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>${log4j.version}</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>${commons-logging.version}</version>
</dependency>
<!-- 网页解析工具 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>${jsoup.version}</version>
</dependency>
<!-- 测试 -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
工具类
发送请求的时候,我们需要设定并配置编码,不然,获取到的是乱码,就蛋疼了,说实话,我们可以通过获取第一个界面,然后获取html里面的编码,然后自动设置编码。
package com.yellowcong.http.test;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.http.HeaderIterator;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.ParseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.CookieStore;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultConnectionKeepAliveStrategy;
import org.apache.http.impl.client.DefaultRedirectStrategy;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.cookie.BasicClientCookie;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
public class HttpClient {
private static final Logger LOG = LogManager.getLogger(HttpClient.class);
/** 请求网站的编码,这个地方,我默认 写的是GB3212*/
private static final String DEFALUT_ENCODE = "GB2312";
public static CloseableHttpClient httpClient = null;
public static HttpClientContext context = null;
public static CookieStore cookieStore = null;
public static RequestConfig requestConfig = null;
static {
init();
}
private static void init() {
context = HttpClientContext.create();
cookieStore = new BasicCookieStore();
// 配置超时时间(连接服务端超时1秒,请求数据返回超时2秒)
requestConfig = RequestConfig.custom().setConnectTimeout(120000).setSocketTimeout(60000)
.setConnectionRequestTimeout(60000).build();
// 设置默认跳转以及存储cookie
httpClient = HttpClientBuilder.create().setKeepAliveStrategy(new DefaultConnectionKeepAliveStrategy())
.setRedirectStrategy(new DefaultRedirectStrategy()).setDefaultRequestConfig(requestConfig)
.setDefaultCookieStore(cookieStore).build();
}
/**
* 发送get请求
*
* @param url
* @return response
* @throws ClientProtocolException
* @throws IOException
*/
public static String get(String url) {
HttpGet httpget = new HttpGet(url);
CloseableHttpResponse response = null;
try {
//设定请求的参数
response= httpClient.execute(httpget, context);
return copyResponse2Str(response);
} catch(Exception e){
LOG.debug("请求失败\t"+url);
}finally {
try {
if(response != null){
response.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
/**
* 将返回的Response转化成String对象
* @param response 返回的Response
* @return
*/
private static String copyResponse2Str(CloseableHttpResponse response){
try {
int code = response.getStatusLine().getStatusCode();
//当请求的code返回值不是400的情况
if((code == HttpStatus.SC_MOVED_TEMPORARILY )
|| (code == HttpStatus.SC_MOVED_PERMANENTLY)
|| (code == HttpStatus.SC_SEE_OTHER)
|| (code == HttpStatus.SC_TEMPORARY_REDIRECT)) {
return null;
}else{
return copyInputStream2Str(response.getEntity().getContent());
}
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
/**
* 将InputStream转化为String类型的数据
* @param in
* @return
*/
private static String copyInputStream2Str(InputStream in){
try {
BufferedReader reader = new BufferedReader(new InputStreamReader(in,DEFALUT_ENCODE));
String line = null;
StringBuffer sb = new StringBuffer();
while((line = reader.readLine()) != null){
sb.append(line);
}
return sb.toString();
} catch (Exception e) {
LOG.debug("获取字符串失败");
}
return null;
}
/**
* 发送post请求,不带参数 的post
* @param url
* @return
*/
public static String post(String url){
return post(url, null);
}
/**
* 发从post 请求
* @param url
* @param parameters
* @return
* @throws ClientProtocolException
* @throws IOException
*/
public static String post(String url, Map<String,Object> parameters){
HttpPost httpPost = new HttpPost(url);
CloseableHttpResponse response = null;
try {
//设定请求的参数
setRequestParamter(parameters, httpPost);
//发送请求
response = httpClient.execute(httpPost, context);
return copyResponse2Str(response);
}catch(Exception e){
LOG.debug("请求失败\t"+url);
}finally {
try {
if(response != null){
response.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return null;
}
/**
* 设定POST请求的参数
* @param parameters
* @param httpPost
* @throws UnsupportedEncodingException
*/
private static void setRequestParamter(Map<String, Object> parameters, HttpPost httpPost)
throws UnsupportedEncodingException {
List<NameValuePair> nvps;
//添加参数
if(parameters != null && parameters.size()>0){
nvps = new ArrayList<NameValuePair>();
for(Map.Entry<String, Object> map:parameters.entrySet()){
NameValuePair param = new BasicNameValuePair(map.getKey(), map.getValue().toString());
nvps.add(param);
}
httpPost.setEntity(new UrlEncodedFormEntity(nvps, DEFALUT_ENCODE));
}
}
/**
* 将 http://www.yellowcong.com?age=7&name=8
* 这种age=7&name=8 转化为map数据
* @param parameters
* @return
*/
@SuppressWarnings("unused")
private static List<NameValuePair> toNameValuePairList(String parameters) {
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
String[] paramList = parameters.split("&");
for (String parm : paramList) {
int index = -1;
for (int i = 0; i < parm.length(); i++) {
index = parm.indexOf("=");
break;
}
String key = parm.substring(0, index);
String value = parm.substring(++index, parm.length());
nvps.add(new BasicNameValuePair(key, value));
}
System.out.println(nvps.toString());
return nvps;
}
/**
* 手动增加cookie
* @param name
* @param value
* @param domain
* @param path
*/
public void addCookie(String name, String value, String domain, String path) {
BasicClientCookie cookie = new BasicClientCookie(name, value);
cookie.setDomain(domain);
cookie.setPath(path);
cookieStore.addCookie(cookie);
}
/**
* 把结果console出来
*
* @param httpResponse
* @throws ParseException
* @throws IOException
*/
public static void printResponse(HttpResponse httpResponse) throws ParseException, IOException {
// 获取响应消息实体
HttpEntity entity = httpResponse.getEntity();
// 响应状态
System.out.println("status:" + httpResponse.getStatusLine());
System.out.println("headers:");
HeaderIterator iterator = httpResponse.headerIterator();
while (iterator.hasNext()) {
System.out.println("\t" + iterator.next());
}
}
/**
* 把当前cookie从控制台输出出来
*
*/
public static void printCookies() {
cookieStore = context.getCookieStore();
List<Cookie> cookies = cookieStore.getCookies();
for (Cookie cookie : cookies) {
System.out.println("key:" + cookie.getName() + " value:" + cookie.getValue());
}
}
/**
* 检查cookie的键值是否包含传参
*
* @param key
* @return
*/
public static boolean checkCookie(String key) {
cookieStore = context.getCookieStore();
List<Cookie> cookies = cookieStore.getCookies();
boolean res = false;
for (Cookie cookie : cookies) {
if (cookie.getName().equals(key)) {
res = true;
break;
}
}
return res;
}
/**
* 直接把Response内的Entity内容转换成String
*
* @param httpResponse
* @return
* @throws ParseException
* @throws IOException
*/
public static String toString(CloseableHttpResponse httpResponse) throws ParseException, IOException {
// 获取响应消息实体
HttpEntity entity = httpResponse.getEntity();
if (entity != null)
return EntityUtils.toString(entity);
else
return null;
}
}