首先使用jsoup
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.1</version>
</dependency>
<dependency>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
<version>3.0.1</version>
</dependency>
废话不多说了
public class Zhizhu {
public final static String LOGIN_URL = "";// 登录页面
public final static String TARGET_URL = "";// 登录完成后访问的目标页
public final static String USERNAME="ABC";// 用户名
public final static String PASSWORD="123";// 密码
public static void main(String[] args) throws Exception {
Zhizhu loginDemo = new Zhizhu();
loginDemo.cookieLogin();
}
/**
* 二次登录
*/
public void cookieLogin() throws IOException {
Map<String, String> cookies=httpPostGetCookies();
// 使用线程池
ExecutorService exec = Executors.newFixedThreadPool(5);
// 获取请求连接
long start = System.currentTimeMillis();
for (int i = 20; i < 170; i++) {
final int index = i;
exec.execute(new Runnable() {
@Override
public void run() {
try {
//
Connection connection2 = Jsoup.connect(TARGET_URL + Integer.toString(index));
Set<Entry<String, String>> entries = cookies.entrySet();
for (Entry<String, String> entry : entries) {
connection2.cookie(entry.getKey(), entry.getValue());
}
Document document = null;
document = connection2.get();
// 解析DOM 查找到元素的ID
Elements elements = document.select("#ID");
// 这段代码可以注销,用来测试异常
if(index==50){
throw new RuntimeException("000001", "爬虫更新异常" );
}
} catch (Exception e) {
e.printStackTrace();
//线程立即停止
exec.shutdownNow();
Thread.currentThread().interrupt();
}
}
});
}
exec.shutdown();
// 统计时间
while(true){
if(exec.isTerminated()){
//System.out.println("Finally do something ");
long end = System.currentTimeMillis();
System.out.println("用时: " + (end - start) + "ms");
break;
}
}
}
// 第一次模拟登陆目标网站,然后获取cookies
private Map<String, String> httpPostGetCookies() throws IOException {
// 登录请求参数(分析登录网页的表单参数)
Map<String, String> params = new HashMap<>();
params.put("username", USERNAME);
params.put("password", PASSWORD);
// 获取请求连接
Connection con = Jsoup.connect(LOGIN_URL);
con.header("Accept", "text/html, application/xhtml+xml, */*");
con.header("Content-Type", "application/x-www-form-urlencoded");//分析登录HTTP请求
con.header("User-Agent", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0))");
con.header("Cookie", "");
if (params != null) {
for (Entry<String, String> entry : params.entrySet()) {
// 添加参数
con.data(entry.getKey(), entry.getValue());
}
}
// 发送请求
Response login = con.method(Method.POST).timeout(20000).execute();
// 获取cookie
Map<String, String> mapCookies = login.cookies();
return mapCookies;
}
}