1 首先导入相关的依赖包
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.4</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
<version>3.2.17.RELEASE</version>
</dependency>
2 http请求网页地址
package com.cheng.webb1.http;
import org.apache.http.HttpStatus;
import org.apache.http.NameValuePair;
import org.apache.http.client.CookieStore;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@Component
public abstract class AbstractHttpReqHandler<T> {
protected Logger logger = LoggerFactory.getLogger(getClass());
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64)"
+ " AppleWebKit/537.36 (KHTML, like Gecko)" + " Chrome/58.0.3013.3 Safari/537.36";
private String charset = "utf-8";
public String getCharset() {
return charset;
}
public void setCharset(String charset) {
this.charset = charset;
}
public void setValue(Object value) {
}
public T get(String url) {
if (logger.isDebugEnabled()) {
logger.info(url);
}
CookieStore cookieStore = new BasicCookieStore();
CloseableHttpClient client = HttpClients.custom().setDefaultCookieStore(cookieStore).build();
String html = "";
try {
HttpGet byGet = new HttpGet(url);
setUserAgent(byGet);
CloseableHttpResponse response = client.execute(byGet);
if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
html = EntityUtils.toString(response.getEntity(), charset);
} else {
logger.info("Fail(Get): {}[{}] -> {}", url, response.getStatusLine().getStatusCode(),
response.getStatusLine().getReasonPhrase());
}
response.close();
} catch (Exception e) {
e.printStackTrace();
logger.error("{}->{}", url, e);
} finally {
try {
client.close();
} catch (Exception e) {
}
}
return parse(html);
}
public T post(String url, Map<String, Object> parameterMap) {
if (logger.isDebugEnabled()) {
logger.info(url);
}
CloseableHttpClient client = HttpClients.custom().build();
String html = "";
try {
HttpPost byPost = new HttpPost(url);
setUserAgent(byPost);
setReqParams(byPost, parameterMap);
CloseableHttpResponse response = client.execute(byPost);
if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
html = EntityUtils.toString(response.getEntity(), "utf-8");
} else {
logger.info("Fail(Post): {}[{}] -> {}", url, response.getStatusLine().getStatusCode(),
response.getStatusLine().getReasonPhrase());
}
response.close();
} catch (Exception e) {
logger.error("{} -> {}", url, e);
} finally {
try {
client.close();
} catch (Exception e) {
}
}
return parse(html);
}
private static void setUserAgent(HttpUriRequest req) {
req.setHeader("User-Agent", USER_AGENT);
}
private static void setReqParams(HttpPost byPost, Map<String, Object> params) {
if (null == params) {
return;
}
List<NameValuePair> nvps = new ArrayList<>(params.size());
for (Map.Entry<String, Object> entry : params.entrySet()) {
nvps.add(new BasicNameValuePair(entry.getKey(), String.valueOf(entry.getValue())));
}
try {
byPost.setEntity(new UrlEncodedFormEntity(nvps));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
protected abstract T parse(String html);
}
3 实体类继承上面抽象类
package com.cheng.webb1.http;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.junit.Test;
public class SpiderTest extends AbstractHttpReqHandler<Object>{
@Override
protected Object parse(String html) {
Document document = Jsoup.parse(html);
Elements select = document.select("#123");
System.out.println(select);
return null;
}
@Test
public void test() {
SpiderTest sp =new SpiderTest();
sp.get("http://www.xxxx.com/");
}
}