了解HttpClient、httpclient获取指定的网页

最新推荐文章于 2024-10-06 20:16:20 发布

揽峰moc

最新推荐文章于 2024-10-06 20:16:20 发布

阅读量129

点赞数

分类专栏： Http - Https - HttpClient - httpCore-SSL-TLS 文章标签： java

本文链接：https://blog.csdn.net/big1989wmf/article/details/84279444

版权

Http - Https - HttpClient - httpCore-SSL-TLS 专栏收录该内容

9 篇文章 0 订阅

订阅专栏

引用参考：
--HttpClient超时设置详解
[url]http://blog.csdn.net/u011191463/article/details/78664896[/url]
--HttpClient 4.5版本设置连接超时时间
[url]https://my.oschina.net/wallechen/blog/526642[/url]
--HttpClient PoolingClientConnectionManager 参数含义
[url]http://geniuszhe.blog.163.com/blog/static/11934682014102521241984/[/url]

package com.tender.news.crawler;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
public class HttpClientGet{
		public String getHtml(String url){
	    String result="";
//		String url="http://www.ahbc.com.cn/bulletin.aspx?ID=1";
		//构造HttpClient的实例
		HttpClient httpClient = new HttpClient();
		//创建GET方法的实例
		GetMethod getMethod = new GetMethod(url);
		//使用系统提供的默认的恢复策略
		getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
		new DefaultHttpMethodRetryHandler());
		//定义一个输入流
		InputStream ins = null;
		//定义文件流
		BufferedReader br =null;
		try {
		  //执行getMethod
		  int statusCode = httpClient.executeMethod(getMethod);
		  if (statusCode != HttpStatus.SC_OK) {
		  System.err.println("方法失败: "+ getMethod.getStatusLine());
		  }
		  //使用getResponseBodyAsStream读取页面内容，这个方法对于目标地址中有大量数据需要传输是最佳的。
		  ins = getMethod.getResponseBodyAsStream();
		  String charset = getMethod.getResponseCharSet();
		  System.out.println("编码是？"+charset);
		  if(charset.toUpperCase().equals("ISO-8859-1")){
		  charset = "gbk";
		  }
		  //按服务器编码字符集构建文件流，这里的CHARSET要根据实际情况设置
		  br = new BufferedReader(new InputStreamReader(ins,getMethod.getResponseCharSet()));
		  StringBuffer sbf = new StringBuffer();
		  String line = null;
		  while ((line = br.readLine()) != null)
		  {
		  sbf.append(line);
		  }
		  result = new String(sbf.toString().getBytes(getMethod.getResponseCharSet()),charset);
		  //输出内容
//		  System.out.println(result);
		  //服务器编码
	  System.out.println("服务器编码是："+getMethod.getResponseCharSet());
		} catch (HttpException e) {
		  //发生致命的异常，可能是协议不对或者返回的内容有问题
		  System.out.println("请检查您所提供的HTTP地址！");
		  e.printStackTrace();
		} catch (IOException e) {
		  //发生网络异常
		  e.printStackTrace();
		} finally {
		  //关闭流，释放连接   
		try {
		ins.close();
		br.close();} 
		catch (IOException e) {
			e.printStackTrace();
		}
		getMethod.releaseConnection();
		}
		return result;
		}

}