使用httpclient 模拟游览器请求
maven 依赖
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.4.10</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.6</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
测试代码
这是请求某个网站,获取数据
package com.httpclient.demo.httpClient;
import java.io.File;
import java.io.InputStream;
import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
/**
* Hello world!
*
*/
public class App {
public static void main(String[] args) {
//创建CloseableHttpClient 实例
CloseableHttpClient chc =HttpClients.createDefault();
//请求方式和网址
HttpGet get = new HttpGet("http://cj.sdust.edu.cn/");
//添加请求头,模拟游览器
get.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36\r\n");
CloseableHttpResponse chr = null;
try {
//发送请求
chr = chc.execute(get);
//获取请求页面的数据
HttpEntity httpentity = chr.getEntity();
//转码获得数据
String str = EntityUtils.toString(httpentity,"utf-8");
System.out.println(str.toString());
}catch (Exception e) {
e.printStackTrace();
}
}
}
测试代码
这是获取网站图片,写入本地
package com.httpclient.demo.httpClient;
import java.io.File;
import java.io.InputStream;
import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
/**
* Hello world!
*
*/
public class App {
public static void main(String[] args) {
//创建CloseableHttpClient 实例
CloseableHttpClient chc =HttpClients.createDefault();
//请求方式和网址
HttpGet get = new HttpGet("http://cj.sdust.edu.cn/_mediafile/cj/_festival/_pic/9803f445-5965-42db-9695-28857a49b4b1.jpg");
//添加请求头,模拟游览器
get.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36\r\n");
CloseableHttpResponse chr = null;
try {
//发送请求
chr = chc.execute(get);
//获取请求页面的数据
HttpEntity httpentity = chr.getEntity();
//写入图片的话 网址一定是图片的网址
//使用了 FileUtils工具类
if(httpentity !=null){
InputStream is = httpentity.getContent();
FileUtils.copyToFile(is, new File("C:\\Users\\雷霆世纪\\Desktop\\aa.jpg"));
}
}catch (Exception e) {
e.printStackTrace();
}
}
}
ip代理
代理IP的话 也分几种 透明代理、匿名代理、混淆代理、高匿代理
1、透明代理(Transparent Proxy)
透明代理虽然可以直接“隐藏”你的IP地址,但是还是可以从HTTP_X_FORWARDED_FOR来查到你是谁。
2、匿名代理(Anonymous Proxy)
匿名代理比透明代理进步了一点:别人只能知道你用了代理,无法知道你是谁
3、混淆代理(Distorting Proxies)
如上,与匿名代理相同,如果使用了混淆代理,别人还是能知道你在用代理,但是会得到一个假的IP地址,伪装的更逼真:-)
4、高匿代理(Elite proxy或High Anonymity Proxy)
可以看出来,高匿代理让别人根本无法发现你是在用代理,所以是最好的选择。
一般我们搞爬虫 用的都是 高匿的代理IP
测试代码
使用高匿代理
package com.httpclient.demo.httpClient;
import java.io.File;
import java.io.InputStream;
import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
/**
* Hello world!
*
*/
public class App {
public static void main(String[] args) {
CloseableHttpClient chc =HttpClients.createDefault();
HttpGet get = new HttpGet("http://cj.sdust.edu.cn/");
//使用代理 ,添加在头部,ip需要经常变,很容易挂
HttpHost proxy = new HttpHost("218.91.94.133", 9999);
RequestConfig config = RequestConfig.custom().setProxy(proxy).build();
get.setConfig(config);
get.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36\r\n");
CloseableHttpResponse chr = null;
try {
chr = chc.execute(get);
HttpEntity httpentity = chr.getEntity();
String str = EntityUtils.toString(httpentity,"utf-8");
System.out.println(str.toString());
}catch (Exception e) {
e.printStackTrace();
}
}
}
添加httpclient 请求服务器时间,读取数据时间
测试代码
package com.httpclient.demo.httpClient;
import java.io.File;
import java.io.InputStream;
import org.apache.commons.io.FileUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
/**
* Hello world!
*
*/
public class App {
public static void main(String[] args) {
CloseableHttpClient chc =HttpClients.createDefault();
HttpGet get = new HttpGet("https://www.cnblogs.com/zhou-test/p/10001270.html");
//设置连接服务器时间 ,读取数据超时时间 毫秒
//RequestConfig config = RequestConfig.custom().setConnectTimeout(10000).setSocketTimeout(10000).build();
get.setConfig(config);
get.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36\r\n");
CloseableHttpResponse chr = null;
try {
chr = chc.execute(get);
HttpEntity httpentity = chr.getEntity();
String str = EntityUtils.toString(httpentity,"utf-8");
System.out.println(str.toString());
}catch (Exception e) {
e.printStackTrace();
}
}
}