public class WebMagicUtil implements PageProcessor{
private Site site = Site.me().setRetryTimes(3).setSleepTime(1000).setTimeOut(10000);
public void process(Page page) {
//爬取id为chapter-list-4下的a标签属性为href
List<String>list=page.getHtml().$("#chapter-list-4").$("a","href").all();
System.out.println(list.size());
}
public Site getSite() {
return site;
}
public static void main(String[] args) {
String url="https://www.codetansuozhe.com";
Spider.create(new WebMagicUtil()).addUrl(url).thread(1).run();
}
}
WebMagic爬虫框架获取a标签的href属性
于 2020-01-14 22:04:01 首次发布