webmagic 爬虫学习代码。
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.FilePipeline;
import us.codecraft.webmagic.processor.PageProcessor;
public class JobProcessor implements PageProcessor {
@Override
public void process(Page page) {
System.out.println("=============================");
page.addTargetRequests(page.getHtml().css("div.album_detail_wrap a").links().all());
page.putField("html",page.getHtml().css("div.album_detail_wrap dl dt a").links().all());
page.putField("url",page.getHtml().css("div.album_detail_wrap a").links().all());
}
private Site site =Site.me()
.setCharset("utf8")
.setTimeOut(10000)
.setRetrySleepTime(3000)
.setSleepTime(3);
@Override
public Site getSite() {
return site;
}
public static void main(String[] args) {
Spider.create(new JobProcessor())
.addUrl("https://download.csdn.net/download/weixin_45026040/11260253")
.addPipeline(new FilePipeline("C:\\Users\\Administrator\\Desktop\\yyycsdn"))
.thread(5)
.run();
}
}