今天在获取一个网页的具体内容时遇到了一些小麻烦,
源代码:
1 packagecom.ms.test;2
3 importus.codecraft.webmagic.Page;4 importus.codecraft.webmagic.Site;5 importus.codecraft.webmagic.Spider;6 importus.codecraft.webmagic.processor.PageProcessor;7
8 public class TestWebmagic implementsPageProcessor{9
10 Site site =Site.me();11 @Override12 publicSite getSite() {13 //TODO Auto-generated method stub
14 returnsite;15 }16
17 @Override18 public voidprocess(Page page) {19 //TODO Auto-generated method stub
20 page.putField("test", page.getHtml().xpath("//div[@class=p-2]/div[@class=o-border-bottom2]/div[@class=my-2]/strong"));21 }22
23 public static voidmain(String[] args) {24 Spider.create(newTestWebmagic())25 .addUrl("http://www.beijing.gov.cn/hudong/