一、构造HttpClient对象
static {
httpParams = new BasicHttpParams();
// 设置连接超时时间
HttpConnectionParams.setConnectionTimeout(httpParams, 500);
// 设置读取超时时间
HttpConnectionParams.setSoTimeout(httpParams, 2000);
SchemeRegistry registry = new SchemeRegistry();
registry.register(new Scheme("http", PlainSocketFactory.getSocketFactory(), 80));
registry.register(new Scheme("https", SSLSocketFactory.getSocketFactory(), 443));
connectionManager = new ThreadSafeClientConnManager(httpParams, registry);
//设置最大连接数
connectionManager.setMaxTotal(800);
//设置每个路由最大连接数
connectionManager.setDefaultMaxPerRoute(100);
//指定host的最大连接数
connectionManager.setMaxForRoute(new HttpRoute(new HttpHost("localhost",5);
}
public static HttpClient getHttpClient() {
return new DefaultHttpClient(connectionManager, httpParams);
}
什么是route:可以理解为运行环境机器到目标机器的一条线路。举例来说,我们使用HttpClient的实现来分别请求 www.baidu.com 的资源和 www.bing.com 的资源那么他就会产生两个route。
这里为什么要特别提到route最大连接数这个参数呢,因为这个参数的默认值为2,如果不设置这个参数值默认情况下对于同一个目标机器的最大并发连接只有2个!这意味着如果你正在执行一个针对某一台目标机器的抓取任务的时候,哪怕你设置连接池的最大连接数为200,但是实际上还是只有2个连接在工作,其他剩余的198个连接都在等待,都是为别的目标机器服务的。
@ThreadSafe
public final class ConnPerRouteBean implements ConnPerRoute {
/** The default maximum number of connections allowed per host */
public static final int DEFAULT_MAX_CONNECTIONS_PER_ROUTE = 2; // Per RFC 2616 sec 8.1.4
private final ConcurrentHashMap<HttpRoute, Integer> maxPerHostMap;
private volatile int defaultMax;
public ConnPerRouteBean(int defaultMax) {
super();
this.maxPerHostMap = new ConcurrentHashMap<HttpRoute, Integer>();
setDefaultMaxPerRoute(defaultMax);
}
public ConnPerRouteBean() {
this(DEFAULT_MAX_CONNECTIONS_PER_ROUTE);
}
二、GET请求
1、准备参数及Encode
List<BasicNameValuePair> data = new ArrayList<BasicNameValuePair>();
data.add(new BasicNameValuePair("name","jack"));
data.add(new BasicNameValuePair("age","14"));
data.add(new BasicNameValuePair("country","中国"));
//使用UTF8避免中文乱码
String dataStr = URLEncodedUtils.format(data, "UTF-8");
2、构造GET请求
URI uri = URIUtils.createURI("http", "www.taobao.com", -1, "/", dataStr, null);
HttpUriRequest request = new HttpGet(uri);
3、发起GET请求
HttpResponse response = getHttpClient().execute(request);
4、处理请求结果
HttpEntity entity = response.getEntity();
//获取字符串,指定UTF8避免乱码
String content = EntityUtils.toString(entity, "UTF-8");
5、关闭数据流
EntityUtils.consume(entity);
------
public static void consume(final HttpEntity entity) throws IOException {
if (entity == null) {
return;
}
if (entity.isStreaming()) {
InputStream instream = entity.getContent();
if (instream != null) {
instream.close();
}
}
}
三、POST请求
1、准备参数及Encode
List<BasicNameValuePair> data = new ArrayList<BasicNameValuePair>();
data.add(new BasicNameValuePair("name","jack"));
data.add(new BasicNameValuePair("age","14"));
data.add(new BasicNameValuePair("country","中国"));
HttpEntity entity = new UrlEncodedFormEntity(data, "UTF-8");
2、构造POST请求
HttpPost httpPost = new HttpPost("http://www.taobao.com");
httpPost.setEntity(entity);
3、发起POST请求:同GET
4、处理请求结果:同GET
5、关闭数据流:同GET
四、Header设置
HttpGet get = new HttpGet(url);
get.addHeader(HttpHeaders.ACCEPT, "text/html");
get.addHeader(HttpHeaders.ACCEPT_CHARSET, "utf-8");
get.addHeader(HttpHeaders.ACCEPT_ENCODING, "gzip");
get.addHeader(HttpHeaders.ACCEPT_LANGUAGE, "zh-CN");
get.addHeader(HttpHeaders.USER_AGENT, "Mozilla/5.0 (X11; Linux x86_64)");
HttpHeaders常量枚举
/**
* Constants enumerating the HTTP headers. All headers defined in RFC1945 (HTTP/1.0), RFC2616 (HTTP/1.1), and RFC2518
* (WebDAV) are listed.
*
* @since 4.1
*/
public final class HttpHeaders {
private HttpHeaders() {
}
/** RFC 2616 (HTTP/1.1) Section 14.1 */
public static final String ACCEPT = "Accept";
......
使用gzip访问,可以压缩服务端返回的数据,节省宽带占用,集群也会因此而提高响应速度
DefaultHttpClient httpclient = new DefaultHttpClient();
httpclient.addRequestInterceptor(new HttpRequestInterceptor() {
public void process(
final HttpRequest request,
final HttpContext context) throws HttpException, IOException {
if (!request.containsHeader("Accept-Encoding")) {
request.addHeader("Accept-Encoding", "gzip");
}
}
});
httpclient.addResponseInterceptor(new HttpResponseInterceptor() {
public void process(
final HttpResponse response,
final HttpContext context) throws HttpException, IOException {
HttpEntity entity = response.getEntity();
Header ceheader = entity.getContentEncoding();
if (ceheader != null) {
HeaderElement[] codecs = ceheader.getElements();
for (int i = 0; i < codecs.length; i++) {
if (codecs[i].getName().equalsIgnoreCase("gzip")) {
response.setEntity(
new GzipDecompressingEntity(response.getEntity()));
return;
}
}
}
}
});
HttpGet httpget = new HttpGet("http://www.apache.org/");
// Execute HTTP request
System.out.println("executing request " + httpget.getURI());
HttpResponse response = httpclient.execute(httpget);
System.out.println("----------------------------------------");
System.out.println(response.getStatusLine());
System.out.println(response.getLastHeader("Content-Encoding"));
System.out.println(response.getLastHeader("Content-Length"));
System.out.println("----------------------------------------");
HttpEntity entity = response.getEntity();
if (entity != null) {
String content = EntityUtils.toString(entity);
System.out.println(content);
System.out.println("----------------------------------------");
System.out.println("Uncompressed size: "+content.length());
}
五、Cookie设置