【又是爬虫】Java爬取eoj题目信息

偷懒用了jsoup。把数据保存在了csv文件中。灵感来自eoj上 的大佬

package just4test5;
import java.nio.charset.Charset;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import com.csvreader.CsvWriter;
public class Eoj {
	public static String REQURL = "https://acm.ecnu.edu.cn/problem/";
	public static int Max = 3509;
	public static void main(String[] args) {
		String filePath = "ans.csv";
        CsvWriter csvWriter = new CsvWriter(filePath,',', Charset.forName("GBK"));
        // 写表头
        String[] headers = {"序号","题名","报酬","解决人数","尝试人数","提交次数","通过次数"};
        try {
	        csvWriter.writeRecord(headers);
			int n;
			for (n=1;n<=Max;++n) {
				String NewUrl = REQURL+ n;
				Connection con  = Jsoup.connect(NewUrl).timeout(2000);
				System.out.println(n);
				Problem p = crawl(con.get());
				if (p==null)
					continue;
	            String[] content = {p.no,p.name,p.emb,p.us,p.ut,p.sa,p.st};
	            csvWriter.writeRecord(content);
			}
        }catch (Exception e){
			
			e.printStackTrace();
		}finally {
			csvWriter.close();
		}
	}
	static Problem crawl (Document doc) {
		String s[] = doc.select("h1").text().split("(?<=[0-9])\\.");//正则表达式的后顾特性
		//System.out.println(s[0]);
		String no = s[0];
		Problem p=null;
		if (!no.equals("Log-in to your account")&&!no.equals("")) {
			Element ele = doc.select(".description").first();
			
			String us = ele.child(0).child(0).text().split(" ")[0];
			String ut = ele.child(0).text().split(" ")[4];
			String sa = ele.child(1).child(0).text().split(" ")[0];
			String st = ele.child(1).text().split(" ")[4];
			String emb = ele.child(2).child(0).text();
			String name = s[1];
			p = new Problem(no,emb,us,ut,sa,st,name);
		}
		return p;
	}

}
class Problem{
	String no,emb,hard,eff,us,ut,sa,st;
	String name;
	public Problem(String no, String emb, String us, String ut, String sa, String st, String name) {
		this.no = no;
		this.emb = emb;
		this.us = us;
		this.ut = ut;
		this.sa = sa;
		this.st = st;
		this.name = name;
	}
	
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值