Jsoup爬取是我第一次进行的爬虫实践,虽然途中有些坎坷,总体的使用体验还算不错。接下来简单做个随记
Jsoup的git地址
导入Jsoup包
以AndroidStudio为例,在libs目录里添加下载好的jar包
记得添加网络权限
<uses-permission android:name="android.permission.INTERNET"/>
创建JsoupUtil工具类
习惯是创建特定的网络请求类,具体内容还是要根据实际情况
public class JsoupUtil {
private Connection.Response res;
private String second;
private String id;
private String cookie;
private static JsoupUtil instance = null;
private int timeX;
private int timeY;
private int week = -1;
public void sendRequest(String muser, String passwd) throws IOException {
Map<String, String> datas = new HashMap<>();
datas.put("muser", muser);
datas.put("passwd", passwd);
datas.put("x","20");
datas.put("y","37");
//创建连接,添加请求头和请求参数
Connection connection2 = Jsoup.connect("http://59.77.226.32/logincheck.asp");
connection2.header("Referer","http://jwch.fzu.edu.cn/");
connection2.header("Connection","Keep-Alive");
//关闭重定向
connection2.followRedirects(false);
connection2.ignoreHttpErrors(true);
connection2.ignoreContentType(true);
res = connection2
.data(datas)
.method(Connection.Method.POST)
.execute();
second = res.header("Location");
getSecond();
}
private void getSecond() throws IOException {
//通过第一次获取的链接进行下一次请求
Connection connection2 = Jsoup.connect(second);
connection2.header("Connection","Keep-Alive");
connection2.ignoreHttpErrors(true);
connection2.ignoreContentType(true);
res = connection2
.header("Referer","http://jwch.fzu.edu.cn/")
.followRedirects(false)
.method(Connection.Method.GET)
.execute();
Map<String, String> co = res.cookies();
cookie = co.get("ASP.NET_SessionId");
id = res.header("Location").split("=")[1];
getRight();
}
private void getRight() throws IOException {
Connection connection2 = Jsoup.connect("http://59.77.226.35/right.aspx?id="+id);
connection2.ignoreHttpErrors(true);
connection2.ignoreContentType(true);
//设置cookie解决会话过期问题
connection2.header("Cookie","ASP.NET_SessionId="+cookie);
res = connection2
.header("Referer","http://59.77.226.35/default.aspx?id="+id)
.method(Connection.Method.GET)
.maxBodySize(0)
.execute();
Document document = Jsoup.parseBodyFragment(res.body());
//通过id获取对应块
Element courses = document.getElementById("LB_kb");
//再筛选子节点获取内容
Elements all = courses.select("table").get(0).select("td");
for (int i = 0;i < all.size();i++){
if (all.get(i).text() != ""){
Log.d("132",all.get(i).text()+week);
pareCourse(all.get(i).text());
}
week++;
}
}
//单例模式
public static JsoupUtil getInstance() {
if (instance == null)
instance = new JsoupUtil();
return instance;
}
}