偷懒用了jsoup。把数据保存在了csv文件中。灵感来自eoj上 的大佬
package just4test5;
import java.nio.charset.Charset;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.csvreader.CsvWriter;
public class Eoj {
public static String REQURL = "https://acm.ecnu.edu.cn/problem/";
public static int Max = 3509;
public static void main(String[] args) {
String filePath = "ans.csv";
CsvWriter csvWriter = new CsvWriter(filePath,',', Charset.forName("GBK"));
// 写表头
String[] headers = {"序号","题名","报酬","解决人数","尝试人数","提交次数","通过次数"};
try {
csvWriter.writeRecord(headers);
int n;
for (n=1;n<=Max;++n) {
String NewUrl = REQURL+ n;
Connection con = Jsoup.connect(NewUrl).timeout(2000);
System.out.println(n);
Problem p = crawl(con.get());
if (p==null)
continue;
String[] content = {p.no,p.name,p.emb,p.us,p.ut,p.sa,p.st};
csvWriter.writeRecord(content);
}
}catch (Exception e){
e.printStackTrace();
}finally {
csvWriter.close();
}
}
static Problem crawl (Document doc) {
String s[] = doc.select("h1").text().split("(?<=[0-9])\\.");//正则表达式的后顾特性
//System.out.println(s[0]);
String no = s[0];
Problem p=null;
if (!no.equals("Log-in to your account")&&!no.equals("")) {
Element ele = doc.select(".description").first();
String us = ele.child(0).child(0).text().split(" ")[0];
String ut = ele.child(0).text().split(" ")[4];
String sa = ele.child(1).child(0).text().split(" ")[0];
String st = ele.child(1).text().split(" ")[4];
String emb = ele.child(2).child(0).text();
String name = s[1];
p = new Problem(no,emb,us,ut,sa,st,name);
}
return p;
}
}
class Problem{
String no,emb,hard,eff,us,ut,sa,st;
String name;
public Problem(String no, String emb, String us, String ut, String sa, String st, String name) {
this.no = no;
this.emb = emb;
this.us = us;
this.ut = ut;
this.sa = sa;
this.st = st;
this.name = name;
}
}