HttpClient
1.pom.xml依赖
<!-- 自动爬取 -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.12</version>
</dependency>
<!-- 日志 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>2.0.0-alpha1</version>
<scope>test</scope>
</dependency>
2.日志
#log4j.properties
log4j.rootLogger=DEBUG,A1
log4j.logger.cn.itcast=DEBUG
log4j.appender.A1=org.apache.log4j.ConsoleAppender
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-d{yyyy-MM-dd HH:mm:ss,SSS} [%c]-[%p] %m%n
3.入门程序-Get
3.1无参
public class CrawlerFirst {
public static void main(String[] args) throws Exception {
//1.打开浏览器,创建HttpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//2.输入网址,发起get请求创建HttpGet对象
String uri = "http://www.itcast.cn";
HttpGet httpGet = new HttpGet(uri);
//3.按回车,发起请求,返回响应,使用HttpClient对象发起请求
CloseableHttpResponse execute = httpClient.execute(httpGet);
//4.解析响应,获取数据
//判断状态码是否是200
if (execute.getStatusLine().getStatusCode() == 200){
//拿到响应体
HttpEntity httpEntity = execute.getEntity();
//拿到响应体的String对象
String content = EntityUtils.toString(httpEntity, "utf8");
System.out.println(content);
}
}
}
3.2带参数
public class HttpGetParamTest {
public static void main(String[] args){
//创建HttpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//设置请求地址是:http://yun.itheima.com/search
String uri = "http://yun.itheima.com/search";
URIBuilder builder = null;
try {
builder = new URIBuilder(uri);
//设置参数
builder.setParameter("keys", "java");
} catch (URISyntaxException e) {
e.printStackTrace();
}
String url = "http://www.itcast.cn";
//创建HttpGet对象,设置url访问地址
HttpGet httpGet = null;
try {
httpGet = new HttpGet(builder.build());
} catch (URISyntaxException e) {
e.printStackTrace();
}
CloseableHttpResponse response = null;
try {
//使用HttpClient发起请求,获取response
response = httpClient.execute(httpGet);
//解析响应
if (response.getStatusLine().getStatusCode() == 200){
String content = EntityUtils.toString(response.getEntity(), "utf8");
System.out.println(content);
System.out.println(content.length());
}
} catch (IOException e) {
e.printStackTrace();
}finally {
//关闭连接
try {
response.close();
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
4.入门程序-Post
4.1无参
public class HttpPostTest {
public static void main(String[] args){
//创建HttpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
String url = "http://www.itcast.cn";
//创建HttpGet对象,设置url访问地址
HttpPost httpPost = new HttpPost(url);
CloseableHttpResponse response = null;
try {
//使用HttpClient发起请求,获取response
response = httpClient.execute(httpPost);
//解析响应
if (response.getStatusLine().getStatusCode() == 200){
String content = EntityUtils.toString(response.getEntity(), "utf8");
System.out.println(content);
System.out.println(content.length());
}
} catch (IOException e) {
e.printStackTrace();
}finally {
//关闭连接
try {
response.close();
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
4.2带参数
public class HttpPostParamTest {
public static void main(String[] args) throws UnsupportedEncodingException {
//创建HttpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
String url = "http://yun.itheima.com/search";
//创建HttpGet对象,设置url访问地址
HttpPost httpPost = new HttpPost(url);
//声明List集合,封装表单中的参数
ArrayList<NameValuePair> params = new ArrayList<NameValuePair>();
params.add(new BasicNameValuePair("keys", "java"));
//创建表单的Entity对象
UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(params, "utf8");
//设置表单的Entity对象到Post请求中
httpPost.setEntity(formEntity);
CloseableHttpResponse response = null;
try {
//使用HttpClient发起请求,获取response
response = httpClient.execute(httpPost);
//解析响应
if (response.getStatusLine().getStatusCode() == 200){
String content = EntityUtils.toString(response.getEntity(), "utf8");
System.out.println(content);
System.out.println(content.length());
}
} catch (IOException e) {
e.printStackTrace();
}finally {
//关闭连接
try {
response.close();
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
5.结果
6.连接池
每次请求都要创建HttpClient,会有频繁创建和销毁的问题,这里可以参照数据库连接池,使用连接池来解决这个问题。
代码
public class HttpClientPoolTest {
public static void main(String[] args){
//创建连接池管理器
PoolingHttpClientConnectionManager poolingHttpClientConnectionManager = new PoolingHttpClientConnectionManager();
//设置最大连接数
poolingHttpClientConnectionManager.setMaxTotal(100);
//设置每个主机的最大连接数:这个可以设置每个主机可同时连接的最大连接数,防止连接池里所有连接都去爬同一个网站,造成数据爬取不均衡
poolingHttpClientConnectionManager.setDefaultMaxPerRoute(10);
//使用连接池管理器发起请求
doGet(poolingHttpClientConnectionManager);
doGet(poolingHttpClientConnectionManager);
}
private static void doGet(PoolingHttpClientConnectionManager poolingHttpClientConnectionManager) {
//不是每次创建新的HttpClient,而是从连接池中获取HttpClient对象
CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(poolingHttpClientConnectionManager).build();
HttpGet httpGet = new HttpGet("http://www.itcast.cn");
CloseableHttpResponse response = null;
try {
response = httpClient.execute(httpGet);
if (response.getStatusLine().getStatusCode() == 200){
String content = EntityUtils.toString(response.getEntity(), "utf8");
System.out.println(content);
System.out.println(content.length());
}
} catch (IOException e) {
e.printStackTrace();
}finally {
if (response != null){
try {
response.close();
} catch (IOException e) {
e.printStackTrace();
}
// 这里不关闭httpClient的连接,因为是连接池管理的,不手动释放
// httpClient.close();
}
}
}
}
7.设置请求参数
设置连接的超时时间等参数
public class HttpConfigTest {
public static void main(String[] args){
//创建HttpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
String url = "http://www.itcast.cn";
//创建HttpGet对象,设置url访问地址
HttpGet httpGet = new HttpGet(url);
//配置请求信息
RequestConfig requestConfig = RequestConfig.custom().setConnectTimeout(1000) //创建连接的最长时间,单位:毫秒
.setConnectionRequestTimeout(500) //设置获取连接的最长时间,单位:毫秒
.setSocketTimeout(10 * 1000) //设置数据传输的最长时间,单位:毫秒
.build();
//给请求设置请求信息
httpGet.setConfig(requestConfig);
CloseableHttpResponse response = null;
try {
//使用HttpClient发起请求,获取response
response = httpClient.execute(httpGet);
//解析响应
if (response.getStatusLine().getStatusCode() == 200){
String content = EntityUtils.toString(response.getEntity(), "utf8");
System.out.println(content);
System.out.println(content.length());
}
} catch (IOException e) {
e.printStackTrace();
}finally {
//关闭连接
try {
response.close();
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}