参考文档地址
webmagic中文文档:http://webmagic.io/docs/zh/
一:引入依赖
pom.xml中添加
<properties> <webmagic.version>0.7.3</webmagic.version> </properties> <dependency> <groupId>us.codecraft</groupId> <artifactId>webmagic-core</artifactId> <version>${webmagic.version}</version> <exclusions> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>us.codecraft</groupId> <artifactId>webmagic-extension</artifactId> <version>${webmagic.version}</version> </dependency> <dependency> <groupId>us.codecraft</groupId> <artifactId>webmagic-saxon</artifactId> <version>${webmagic.version}</version> </dependency> <dependency> <groupId>us.codecraft</groupId> <artifactId>webmagic-selenium</artifactId> <version>${webmagic.version}</version> </dependency> |
二:实现PageProcessor
/** * 解析器,基础处理类 * * @author</ |