1.开机自启动爬虫
@CrossOrigin
@RestController
@RequestMapping("/property")
public class PropertyController implements CommandLineRunner {
@Autowired
private PropertyDaoPipeLine01 diYiPropertyDaoPipeLine;
@Override
public void run(String... args) throws Exception {
property01();
}
@GetMapping("/start01")
public void property01() {
Spider.create(new PropertyPageProcessor01())
.addUrl("http://wh.01fy.cn/sale/list_2_0_0_0-0_0_0-0_0_0_0-0_0_0-0_2_0_1_.html")
.addPipeline(diYiPropertyDaoPipeLine)
.thread(1)
.setExitWhenComplete(true)
.setDownloader(Downloader.newIpDownloader())
.runAsync();
}
2.实体类
import org.springframework.data.annotation.Id;
@Data
public class Property {
@Id
private Long id;
/**
* 联系人 user_name
*/
private String userName;
/**
* 联系电话 user_mobile
*/
private String userMobile;
/**
* 楼盘名称 estate_name
*/
private String estateName;
/**
* 房源标题 property_title
*/
private String propertyTitle;
/**
* 区域 area_name
*/
private String areaName;
/**
* 几室几厅几卫 house_type
*/
private String houseType;
/**
* 面积 size
*/
private String size;
/**
* 总价 price
*/
private String price;
/**
* 单价 ava_price
*/
private String avaPrice;
/**
* 房源详情链接 source_url
*/
private String sourceUrl;
/**
* 状态 未查看 1 已查看 2 未同步 3 已同步 4 删除
*/
private Integer status;
}
3.自定义Agent
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.locks.ReentrantReadWriteLock;
/**
-
@author wongH
-
@date 2019/5/7 9:51
-
@Version 1.0
*/
public class Agent {
private static final String AGENT_FILE_PATH = “user-agent/User-Agents.txt”;
private static ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
private static List agents;public static String getRandom() {
String random = getRandom(null);
System.err.println(“Agent======================>” + random);
return random;
}private static String getRandom(String agent) {
try {
lock.readLock().lock();
int size = agents.size();
if (size == 0)
return “Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36”;
Random random = new Random();
if (null != agent) return agent;
else return agents.get(random.nextInt(size));
} catch (Exception e) {
e.printStackTrace();
return “Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36”;
} finally {
lock.readLock().unlock();
}
}static {
agents = new ArrayList<>();
InputStream resourceAsStream = null;
InputStreamReader inputStreamReader = null;
BufferedReader bufferedReader = null;
try {
resourceAsStream = Agent.class.getClassLoader().getResourceAsStream(AGENT_FILE_PATH);
inputStreamReader = new InputStreamReader(resourceAsStream);
bufferedReader = new BufferedReader(inputStreamReader);