之前写爬虫都是用的python,公司电脑上没有python环境,没办法,只好学习了一下java版,于是发现了一个很好用的工具Jsoup,个人感觉用法上和python的beautifulsoup有些相似。
目标是实现网站的模拟登陆,在网上发现了一个挺不错的板子,特此记录学习
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Connection;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
public class Test {
public static void main(String[] args) throws IOException {
Connection con = Jsoup.connect("xxxxxxxx");
Response rs = con.execute();
System.out.println(rs.cookies());
Document d1 = Jsoup.parse(rs.body());
List<Element> et = d1.select("form");
Map<String, String> datas = new HashMap<>();
for (Element e : et.get(0).getAllElements()) {
if (e.attr("name").equals("username")) {
e.attr("value", "xxxxxxxxx");// 设置用户名
}
if (e.attr("name").equals("password")) {
e.attr("value", "xxxxxxxxxxx"); // 设置用户密码
}
if (e.attr("name").length() > 0) {// 排除空值表单属性
datas.put(e.attr("name"), e.attr("value"));
}
}
Connection con2 = Jsoup.connect("xxxxxxxxx");
Response login = con2.ignoreContentType(true).method(Method.POST).data(datas).cookies(rs.cookies()).execute();
//System.out.println(login.body());
Map<String, String> map = login.cookies();
/*for (String s : map.keySet()) {
System.out.println(s + " " + map.get(s));
}*/
}
}