创建一个Maven项目
在pom.xml中加入
<dependency>
<groupId>edu.uci.ics</groupId>
<artifactId>crawler4j</artifactId>
<version>4.3</version>
</dependency>
项目代码组织:
Controller类:
package com.yj.WebCrawlTest;
import edu.uci.ics.crawler4j.crawler.CrawlConfig;
import edu.uci.ics.crawler4j.crawler.CrawlController;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
public class Controller {
public static void main(String[] args) throws Exception {
String crawlStorageFolder = "E:/crawler";// 定义爬虫数据存储位置
i