package creeper.part1.capturepage;
import java.io.IOException;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
//爬虫技术(1)--抓取网页
@SuppressWarnings("unused")
public class capturePage {
public static void main(String[] args) throws Exception {
//声明一个HttpClient客户端,相当于打开一个浏览器(4.3以后都是CloseableHttpClient以前的已经过时)
CloseableHttpClient httpClient=HttpClients.createDefault();
//创建代理,省略...
try {
//get方法,相当于打开了一个网页
String url="http://www.baidu.com";
HttpGet get=new HttpGet(url);
System.out.println("---------URI----------");
System.out.println(get.getURI());
//创建响应处理器处理响应内容
ResponseHandler<String> handler=new ResponseHandler<String>(){
@Override
public String handleResponse(HttpResponse response)
throws ClientProtocolException, IOException {
int status=response.getStatusLine().getStatusCode();//获取响应状态码
//对状态码进行判断处理
if(status>=200 && status<300 ){
HttpEntity entity=response.getEntity();//获取响应的数据
return entity==null?null:EntityUtils.toString(entity);
}else{
throw new ClientProtocolException("status:"+status);
}
}
};
//发送请求,相当于敲个回车
String responseBody=httpClient.execute(get, handler);
System.out.println("----------------responseBody-----------------");
System.out.println(responseBody);
System.out.println("----------------responseBody-----------------");
} catch (Exception e) {
}finally{
httpClient.close();
}
}
}
爬虫技术(2)--抓取网页java代码实现
最新推荐文章于 2018-09-24 14:37:00 发布