HttpClient
HttpClient 是 Apache Jakarta Common 下的子项目,用来提供高效的、最新的、功能丰富的支持 HTTP 协议的客户端编程工具包,并且它支持 HTTP 协议最新的版本和建议。
主要提供一下功能:
(1)实现了所有 HTTP 的方法(GET,POST,PUT,HEAD 等)
(2)支持自动转向
(3)支持 HTTPS 协议
(4)支持代理服务器等
接下来我们就使用Java的HTTP协议客户端 HttpClient这个技术,来实现抓取网页数据。
首先要想使用httpClient就需要导入他相应的架包。
我们先创建一个Maven项目,然后在pom.xml中加入如下配置:
<!-- httpClient -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.6</version>
</dependency>
实现http的Get方法
1.get请求不带参数
package com.yjj.httpClient.get;
import java.io.IOException;
import java.net.URISyntaxException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
public class HttpClientGet2 {
public static void main(String[] args) throws URISyntaxException {
//创建httpClient对象
CloseableHttpClient httpClient=HttpClients.createDefault();
//发送get请求 参数为URL地址,就是你想打印出那个页面的html代码
HttpGet httpGet=new HttpGet("https://www.csdn.net");
CloseableHttpResponse response=null;
try {
//使用HTTPClient发送请求,获得响应对象
response=httpClient.execute(httpGet);
//解析响应,如果响应状态吗是200 说明请求成功。
if(response.getStatusLine().getStatusCode()==200){
//获取响应数据,转换为字符串输出
String context=EntityUtils.toString(response.getEntity(), "utf8");
//以字符串的形式输出html页面代码
System.out.println(context);
//输出html字节长度
System.out.println(context.length());
}
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally{
//关闭资源
if(response!=null){
try {
response.close();
} catch (IOException e) {
e.printStackTrace();
}
}
try {
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
1.get请求带参数,主要先创建URIBuilder,因为发get请求的时候带的就是这个对象,然后运用它的setParameter方法进行设置带的参数.
package com.yjj.httpClient.get;
import java.io.IOException;
import java.net.URISyntaxException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
public class HttpClientGet {
public static void main(String[] args) throws URISyntaxException {
//创建httpClient对象
CloseableHttpClient httpClient=HttpClients.createDefault();
//创建URIBuilder
URIBuilder url=new URIBuilder("https://passport.baidu.com/v6/ucenter");
//设置单个参数https://passport.baidu.com/v6/ucenter?_t=1584344841
//url.setParameter("_t", "1584344841");
//可以设置多个参数https://passport.baidu.com/v6/ucenter?_t=1584344841&lang=zh-cn&gid=C0C1417-E022-470D-A550-DDF468E75EE8
url.setParameter("_t", "1584344841").setParameter("lang", "zh-cn").setParameter("gid", "C0C1417-E022-470D-A550-DDF468E75EE8");
//发送get请求
HttpGet httpGet=new HttpGet(url.build());
CloseableHttpResponse response=null;
try {
//使用HTTPClient发送请求
response=httpClient.execute(httpGet);
//解析请求
if(response.getStatusLine().getStatusCode()==200){
String context=EntityUtils.toString(response.getEntity(), "utf8");
System.out.println(context);
}
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally{
if(response!=null){
try {
response.close();
} catch (IOException e) {
e.printStackTrace();
}
}
try {
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
实现http的Post方法
1.post不带参数
package com.yjj.httpClient.post;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
public class HttpClientPost2 {
public static void main(String[] args) throws URISyntaxException, UnsupportedEncodingException {
//创建httpClient对象
CloseableHttpClient httpClient=HttpClients.createDefault();
//发送post请求
HttpPost post=new HttpPost("https://www.csdn.net");
CloseableHttpResponse response=null;
try {
//发送请求获得响应
response=httpClient.execute(post);
//解析响应的状态码,如果是200则请求发送成功
if(response.getStatusLine().getStatusCode()==200){
//获得响应的内容
String context=EntityUtils.toString(response.getEntity(), "utf-8");
System.out.println(context);
}
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally{
//释放资源
if(response!=null){
try {
response.close();
} catch (IOException e) {
e.printStackTrace();
}
}
try {
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
2.post请求带参数,主要用到NameValuePair的list集合,NameValuePair这个类是一个接口,在赋值参数时,实例化他的BasicNameValuePair实现类,刚好这个类中封装了一个构造方法,就是键值对。
package com.yjj.httpClient.post;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
public class HttpClientPost {
public static void main(String[] args) throws URISyntaxException, UnsupportedEncodingException {
CloseableHttpClient httpClient=HttpClients.createDefault();
//设置请求地址https://www.imooc.com/course/list?c=be
HttpPost post=new HttpPost("https://www.imooc.com/course/list");
//声明list集合 封装表单中的参数
List<NameValuePair> params=new ArrayList<NameValuePair>();
//设置单个参数https://www.imooc.com/course/list?c=be
//params.add(new BasicNameValuePair("c", "be"));
//设置多个参数https://www.imooc.com/course/list?c=be&is_easy=4
params.add(new BasicNameValuePair("c","be"));
params.add(new BasicNameValuePair("is_easy", "4"));
//创建表单Entity对象,第一个参数是表单中的参数,第二个是编码集
UrlEncodedFormEntity formEntity=new UrlEncodedFormEntity(params,"utf-8");
//设置表单的Entity对象到post请求中
post.setEntity(formEntity);
CloseableHttpResponse response=null;
try {
response=httpClient.execute(post);
if(response.getStatusLine().getStatusCode()==200){
String context=EntityUtils.toString(response.getEntity(), "utf-8");
System.out.println(context);
}
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally{
if(response!=null){
try {
response.close();
} catch (IOException e) {
e.printStackTrace();
}
}
try {
httpClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
总结:
希望我的这边文章能让你初步了解一下HttpClient功能里面的第一条。实现了HTTP协议里面的两个重要请求方法,一个是Get请求,一个是Post请求。