我们在从网页抓取数据的时候经常会遇到被禁IP的情况。当IP被禁的时候可以使用代理方式。让代理帮我们获取想要的数据。下面是用HttpClients使用代理发送get请求获取网页源码的代码示例:
public class HttpClientProxy {
public static void main(String[] args)throws Exception {
CloseableHttpClient httpClient=HttpClients.createDefault(); // 创建httpClient实例
RequestConfig defaultRequestConfig = RequestConfig.custom().setSocketTimeout(30000).setConnectTimeout(30000)
.build();
HttpGet httpGet=new HttpGet("https://www.amazon.cn/"); // 创建httpget实例
httpGet.setConfig(defaultRequestConfig);
HttpHost proxy=new HttpHost("106.44.80.17", 8118);//网上找的代理IP
RequestConfig requestConfig=RequestConfig.custom().setProxy(proxy).build();
httpGet.setConfig(requestConfig);
httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:50.0) Gecko/20100101 Firefox/50.0");
CloseableHttpResponse response=httpClient.execute(httpGet); // 执行http get请求
HttpEntity entity=response.getEntity(); // 获取返回实体
System.out.println("网页内容:"+EntityUtils.toString(entity, "utf-8")); // 获取网页内容
response.close(); // response关闭
httpClient.close(); // httpClient关闭
}
}