1、添加依赖
org.jsoup
jsoup
1.9.2
2、示例代码
package com.neo;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
/**
* The type Crawler test.
*
* @author weixiang.wu
*/
public class CrawlerTest {
/**
* The entry point of application.
*
* @param args the input arguments
* @throws IOException the io exception
*/
public static void main(String[] args) throws IOException {
//想要爬取的url
String url = "https://www.?.com/at/api.html?url=admin_member_list";
String username = "?";
String password = "?";
Map sessionId = getSessionInfo(username, password);
String todoSomething = httpPost(sessionId, url);
}
private static Map getSessionInfo(String username, String password) throws IOException {
//登录网站
Connection.Response res = Jsoup.connect("https://www.?.com/at/login.html")
.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31")
.data("username", username, "password", password)
.method(Connection.Method.POST)
.timeout(10000)
.execute();
//获取cookie
Map cookie = new HashMap<>();
cookie.put("__cfduid", res.cookie("__cfduid"));
cookie.put("PHPSESSID", res.cookie("PHPSESSID"));
return cookie;
}
private static String httpPost(Map sessionId, String url) throws IOException {
//获取请求连接
Connection con = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31")
.cookies(sessionId).timeout(10000).ignoreContentType(true);
//请求参数设置
con.data("page", "1");
con.data("pid", "0");
con.data("level", "[1,2,3]");
Document doc = con.post();
System.out.println(doc);
return doc.toString();
}
}
转载至链接:https://my.oschina.net/wuweixiang/blog/1928967