准备步骤
- 打开谷歌浏览器,按F12键,访问链接
- 查看Network,找到访问后台的链接请求,点开
- 我们一般看请求方式、请求头里面的Content-Type和Cookie以及请求参数部分,根据这些内容去使用httpclient爬取我们需要的数据,当然如果你有postman工具可以去尝试更好
httpclient的使用
环境搭建
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.6</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.4.11</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1.1</version>
</dependency>
HttpClientUtil
import org.apache.commons.io.Charsets;
import org.apache.http.HttpEntityEnclosingRequest;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.HttpRequestRetryHandler;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.config.SocketConfig;
import org.apache.http.conn.ConnectTimeoutException;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
public class HttpUtil {
private static HttpClient httpClient;
private static final int MAX_CONNECTION = 100;
private static final int MAX_CONCURRENT_CONNECTIONS = 100;
private static final int CONNECTION_TIME_OUT = 100000;
private static final int REQUEST_TIME_OUT = 100000;
private static final int MAX_FAIL_RETRY_COUNT = 3;
private static RequestConfig requestConfig;
static {
SocketConfig socketConfig = SocketConfig.custom()
.setSoTimeout(REQUEST_TIME_OUT).setSoKeepAlive(true)
.setTcpNoDelay(true).build();
requestConfig = RequestConfig.custom()
.setSocketTimeout(REQUEST_TIME_OUT)
.setConnectTimeout(CONNECTION_TIME_OUT).build();
PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager();
connManager.setMaxTotal(MAX_CONNECTION);
connManager.setDefaultMaxPerRoute(MAX_CONCURRENT_CONNECTIONS);
connManager.setDefaultSocketConfig(socketConfig);
httpClient = HttpClients.custom().setConnectionManager(connManager)
.setRetryHandler(new MyHttpRequestRetryHandler()).build();
}
public static String post(String url, Map<String, String> paramMap,
Map<String, String> headers) throws Exception {
URIBuilder uriBuilder = new URIBuilder(url);
if (paramMap != null) {
for (Entry<String, String> entry : paramMap.entrySet()) {
uriBuilder.addParameter(entry.getKey(), entry.getValue());
}
}
HttpPost httpPost = new HttpPost(uriBuilder.build());
if (headers != null) {
for (String key : headers.keySet()) {
httpPost.addHeader(key,headers.get(key));
}
}
httpPost.setConfig(requestConfig);
HttpResponse response = httpClient.execute(httpPost);
return EntityUtils.toString(response.getEntity(), Charsets.UTF_8);
}
public static String post(String url, Map<String, String> paramMap)
throws Exception {
return post(url, paramMap, null);
}
public static String get(String url, Map<String, String> paramMap,
Map<String, String> headers) throws Exception {
URIBuilder uriBuilder = new URIBuilder(url);
if (paramMap != null) {
for (Entry<String, String> entry : paramMap.entrySet()) {
uriBuilder.addParameter(entry.getKey(), entry.getValue());
}
}
HttpGet httpGet = new HttpGet(uriBuilder.build());
if (headers != null) {
for (String key : headers.keySet()) {
httpGet.addHeader(key,headers.get(key));
}
}
httpGet.setConfig(requestConfig);
HttpResponse response = httpClient.execute(httpGet);
return EntityUtils.toString(response.getEntity(), Charsets.UTF_8);
}
public static String get(String url, Map<String, String> paramMap) throws Exception {
return get(url, paramMap, null);
}
private static class MyHttpRequestRetryHandler implements HttpRequestRetryHandler {
@Override
public boolean retryRequest(IOException exception, int executionCount,
HttpContext context) {
if (executionCount >= MAX_FAIL_RETRY_COUNT) {
return false;
}
if (exception instanceof InterruptedIOException) {
return false;
}
if (exception instanceof UnknownHostException) {
return false;
}
if (exception instanceof ConnectTimeoutException) {
return false;
}
if (exception instanceof SSLException) {
return false;
}
HttpClientContext clientContext = HttpClientContext.adapt(context);
HttpRequest request = clientContext.getRequest();
boolean idempotent = !(request instanceof HttpEntityEnclosingRequest);
if (idempotent) {
return true;
}
return false;
}
}
public static void getSaveImg(String url,HttpServletResponse response) throws Exception {
URIBuilder uriBuilder = new URIBuilder(url);
HttpGet httpGet = new HttpGet(uriBuilder.build());
httpGet.setHeader("Content-Type","application/json");
httpGet.setConfig(requestConfig);
HttpResponse resp = httpClient.execute(httpGet);
if (resp.getStatusLine().getStatusCode() == 200) {
byte[] data = EntityUtils.toByteArray(resp.getEntity());
File file = new File("c:/1.png");
if (!file.exists()) {
file.createNewFile();
}
FileOutputStream os = new FileOutputStream(file);
os.write(data);
os.flush();
os.close();
OutputStream os = response.getOutputStream();
os.write(data);
os.close();
}
}
}