webmagic教程
入门案例
package com.hikktn.webmagic;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.model.ConsolePageModelPipeline;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.scheduler.BloomFilterDuplicateRemover;
import us.codecraft.webmagic.scheduler.QueueScheduler;
import us.codecraft.webmagic.scheduler.Scheduler;
/**
* @ClassName JobProcessor
* @Description TODO
* @Author lisonglin
* @Date 2021/5/4 12:43
* @Version 1.0
*/
public class JobProcessor implements PageProcessor {
public void process(Page page) {
System.out.println(page.getHtml());
// CSS选择器
page.putField("author", page.getHtml().css("div.b-wrap>a").all());
// XPath选择器
page.putField("div", page.getHtml().xpath("//div[@id=app]/div/div/div[@class=b-wrap]/div/div[@id" +
"=primaryPageTab]/ul/li/a/span/text()"));
// 正则表达式
page.putField("div3", page.getHtml().css("div.b-wrap>a").regex(".*年轻人.*").all());
// 处理结果API 返回一条数据
page.putField("div4", page.getHtml().css("div.b-wrap>a").regex(".*年轻人.*").get());
page.putField("div5", page.getHtml().css("div.b-wrap>a").regex(".*年轻人.*").toString());
// 获取链接 全部
page.addTargetRequests(page.getHtml().css("div.b-wrap").links().all());
// 获取上方查询出的url里面的元素
page.putField("url",page.getHtml().css("div.nav-search from input").all());
// 抓取链接
page.addTargetRequest("https://jobs.51job.com/chongqing-jlpq/123700142.html?s=sou_sou_soulb&t=0");
}
private Site site = Site.me()
.setCharset("utf8") // 设置编码
.setTimeOut(10000) // 设置超时时间,单位是ms毫秒
.setRetrySleepTime(3000) // 设置重试的间隔时间
.setSleepTime(3); // 设置重试次数
public Site getSite() {
return site;
}
public static void main(String[] args) {
// Spider.create(new JobProcessor())
// //初始访问url地址
// .addUrl("https://www.bilibili.com/").run();
Spider spider = Spider.create(new JobProcessor())
.addUrl("https://www.bilibili.com/") //设置爬取数据的页面
//.addPipeline(new FilePipeline("C:\\Users\\tree\\Desktop\\result")) // 保存到文件中
.thread(5) // 开启5个线程
.setScheduler(new QueueScheduler().setDuplicateRemover(new BloomFilterDuplicateRemover(10000000)));//设置布隆去重过滤器,指定最多对1000万数据进行去重操作
// Scheduler scheduler = spider.getScheduler();
//执行爬虫
spider.run();
}
}
准备
打开前程无忧网站,根据关键词搜索,选择你想要的招聘信息。
我们需要的是这些招聘信息
我们选择抓取的关键信息
按照抽取的页面信息,创建数据库
/*
Navicat Premium Data Transfer
Source Server : localhost
Source Server Type : MySQL
Source Server Version : 50723
Source Host : localhost:3306
Source Schema : test
Target Server Type : MySQL
Target Server Version : 50723
File Encoding : 65001
Date: 06/05/2021 14:03:34
*/
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for job_info
-- ----------------------------
DROP TABLE IF EXISTS `job_info`;
CREATE TABLE `job_info` (
`id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '主键id',
`company_name` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '公司名称',
`company_addr` varchar(200) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '公司联系方式',
`company_info` text CHARACTER SET utf8 COLLATE utf8_general_ci NULL COMMENT '公司信息',
`job_name` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '职位名称',
`job_addr` varchar(50) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '工作地点',
`job_info` text CHARACTER SET utf8 COLLATE utf8_general_ci NULL COMMENT '职位信息',
`salary_min` int(10) NULL DEFAULT NULL COMMENT '薪资范围,最小',
`salary_max` int(10) NULL DEFAULT NULL COMMENT '薪资范围,最大',
`technology` varchar(200) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '关键技术点',
`url` varchar(150) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '招聘信息详情页',
`time` varchar(10) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '职位最近发布时间',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 532 CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '招聘信息' ROW_FORMAT = Dynamic;
SET FOREIGN_KEY_CHECKS = 1;
大家看起来,一切都非常顺利,可惜的是,直接使用JavaScript渲染HTML,致使我们拿取数据的时候,比较麻烦。
转换一下格式
上面框起来的就是我们需要解析的数据
你看下面的链接,就是下一页的请求链接。
本来打算使用bean对象,JSON转换为对象,但是太麻烦了,还是使用直接转换为jsonobject对象,没想到里面内置有直接获取key-value,那就很好办了。
前面走的弯路,不需要再走了。
我的分析结束了,现在我们开始吧。
开始
pom
<dependencies>
<!--SpringMVC-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!--SpringData Jpa-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
</dependency>
<!--MySQL连接包-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
</dependency>
<!--WebMagic-->
<dependency>
<groupId>us.codecraft</groupId>
<artifactId>webmagic-core</artifactId>
<version>0.7.4</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>us.codecraft</groupId>
<artifactId>webmagic-extension</artifactId>
<version>0.7.4</version>
</dependency>
<!--WebMagic对布隆过滤器的支持-->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>16.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.24</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<!--<dependency>-->
<!--<groupId>org.jsoup</groupId>-->
<!--<artifactId>jsoup</artifactId>-->
<!--<version>1.13.1</version>-->
<!--</dependency>-->
<!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.12.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.75</version>
</dependency>
</dependencies>
数据库相关
package com.hikktn.pojo;
import javax.persistence.*;
import java.util.Objects;
/**
* @ClassName JobInfoEntity
* @Description TODO
* @Author lisonglin
* @Date 2021/5/6 0:23
* @Version 1.0
*/
@Entity
@Table(name = "job_info", schema = "test")
public class JobInfoEntity {
// 主键id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private long id;
// 公司名称
private String companyName;
// 公司联系方式
private String companyAddr;
// 公司信息
private String companyInfo;
// 职位名称
private String jobName;
// 工作地点
private String jobAddr;
// 职位信息
private String jobInfo;
// 薪资范围,最小
private Integer salaryMin;
// 薪资范围,最大
private Integer salaryMax;
// 关键技术点
private String technology;
// 招聘信息详情页
private String url;
// 职位最近发布时间
private String time;
@Id
@Column(name = "id", nullable = false)
public long getId() {
return id;
}
public void setId(long id) {
this.id = id;
}
@Basic
@Column(name = "company_name", nullable = true, length = 100)
public String getCompanyName() {
return companyName;
}
public void setCompanyName(String companyName) {
this.companyName = companyName;
}
@Basic
@Column(name = "company_addr", nullable = true, length = 200)
public String getCompanyAddr() {
return companyAddr;
}
public void setCompanyAddr(String companyAddr) {
this.companyAddr = companyAddr;
}
@Basic
@Column(name = "company_info", nullable = true, length = -1)
public String getCompanyInfo() {
return companyInfo;
}
public void setCompanyInfo(String companyInfo) {
this.companyInfo = companyInfo;
}
@Basic
@Column(name = "job_name", nullable = true, length = 100)
public String getJobName() {
return jobName;
}
public void setJobName(String jobName) {
this.jobName = jobName;
}
@Basic
@Column(name = "job_addr", nullable = true, length = 50)
public String getJobAddr() {
return jobAddr;
}
public void setJobAddr(String jobAddr) {
this.jobAddr = jobAddr;
}
@Basic
@Column(name = "job_info", nullable = true, length = -1)
public String getJobInfo() {
return jobInfo;
}
public void setJobInfo(String jobInfo) {
this.jobInfo = jobInfo;
}
@Basic
@Column(name = "salary_min", nullable = true)
public Integer getSalaryMin() {
return salaryMin;
}
public void setSalaryMin(Integer salaryMin) {
this.salaryMin = salaryMin;
}
@Basic
@Column(name = "salary_max", nullable = true)
public Integer getSalaryMax() {
return salaryMax;
}
public void setSalaryMax(Integer salaryMax) {
this.salaryMax = salaryMax;
}
@Basic
@Column(name = "technology", nullable = true, length = 200)
public String getTechnology() {
return technology;
}
public void setTechnology(String technology) {
this.technology = technology;
}
@Basic
@Column(name = "url", nullable = true, length = 150)
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
@Basic
@Column(name = "time", nullable = true, length = 10)
public String getTime() {
return time;
}
public void setTime(String time) {
this.time = time;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
JobInfoEntity that = (JobInfoEntity) o;
return id == that.id && Objects.equals(companyName, that.companyName) && Objects.equals(companyAddr,
that.companyAddr) && Objects.equals(companyInfo, that.companyInfo) && Objects.equals(jobName,
that.jobName) && Objects.equals(jobAddr, that.jobAddr) && Objects.equals(jobInfo, that.jobInfo) && Objects.equals(salaryMin, that.salaryMin) && Objects.equals(salaryMax, that.salaryMax) && Objects.equals(technology, that.technology) && Objects.equals(url, that.url) && Objects.equals(time, that.time);
}
@Override
public int hashCode() {
return Objects.hash(id, companyName, companyAddr, companyInfo, jobName, jobAddr, jobInfo, salaryMin, salaryMax
, technology, url, time);
}
}
package com.hikktn.dao;
import com.hikktn.pojo.JobInfoEntity;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import java.util.List;
/**
* @ClassName JobInfoDao
* @Description TODO
* @Author lisonglin
* @Date 2021/5/5 20:40
* @Version 1.0
*/
public interface JobInfoDao extends JpaRepository<JobInfoEntity,Long> {
@Query(value = "SELECT technology FROM job_info",nativeQuery = true)
public List<JobInfoEntity> findJobTechnology();
}
package com.hikktn.service;
import com.hikktn.pojo.JobInfoEntity;
import java.util.List;
/**
* @ClassName JobInfoService
* @Description TODO
* @Author lisonglin
* @Date 2021/5/5 20:42
* @Version 1.0
*/
public interface JobInfoService {
/**
* 保存数据
*
* @param jobInfoEntity
*/
public void save(JobInfoEntity jobInfoEntity);
/**
* 根据条件查询数据
*
* @param jobInfoEntity
* @return
*/
public List<JobInfoEntity> findJobInfo(JobInfoEntity jobInfoEntity);
public List<JobInfoEntity> findJobTechnology();
}
package com.hikktn.service.impl;
import com.hikktn.dao.JobInfoDao;
import com.hikktn.pojo.JobInfoEntity;
import com.hikktn.service.JobInfoService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.Example;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.util.List;
/**
* @ClassName JobInfoServiceImpl
* @Description TODO
* @Author lisonglin
* @Date 2021/5/5 20:43
* @Version 1.0
*/
@Service
public class JobInfoServiceImpl implements JobInfoService {
@Autowired
private JobInfoDao jobInfoDao;
@Override
@Transactional
public void save(JobInfoEntity jobInfoEntity) {
JobInfoEntity param =new JobInfoEntity();
param.setUrl(jobInfoEntity.getUrl());
param.setTime(jobInfoEntity.getTime());
List<JobInfoEntity> jobInfoList = this.findJobInfo(param);
if (jobInfoList.size() == 0){
this.jobInfoDao.saveAndFlush(jobInfoEntity);
}
}
@Override
public List<JobInfoEntity> findJobInfo(JobInfoEntity jobInfoEntity) {
Example<JobInfoEntity> jobInfoEntityExample = Example.of(jobInfoEntity);
List<JobInfoEntity> jobInfoDaoAll = this.jobInfoDao.findAll(jobInfoEntityExample);
return jobInfoDaoAll;
}
public List<JobInfoEntity> findJobTechnology(){
return this.jobInfoDao.findJobTechnology();
}
}
爬虫
package com.hikktn.task;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.hikktn.pojo.JobInfoEntity;
import com.hikktn.utils.MathSalary;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.scheduler.BloomFilterDuplicateRemover;
import us.codecraft.webmagic.scheduler.QueueScheduler;
import us.codecraft.webmagic.selector.Html;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @ClassName JobProcessorTask
* @Description TODO
* @Author lisonglin
* @Date 2021/5/5 20:54
* @Version 1.0
*/
@Component
public class JobProcessorTask implements PageProcessor {
private static final int PAGE = 1;
private static final String SHANG_URL = "https://search.51job.com/list/060000,000000,0000,32%252c01,9,99,java,2,";
private static final String XIA_URL =
".html?lang=c&postchannel=0000&workyear=99&cotype=99°reefrom=03&jobterm" + "=99&companysize=99" +
"&ord_field=0" + "&dibiaoid=0&line=&welfare=";
private static String total_page = "";
@Autowired
private SpringDataPipeline springDataPipeline;
@Override
public void process(Page page) {
//解析页面,获取招聘信息详情的url地址
// String html = page.getHtml().toString();
// System.out.println(html);
JSONObject jsonObject = null;
List<Request> targetRequests = page.getTargetRequests();
// 如果没有任务,才进行添加任务,进入详情页面
if (targetRequests.size() == 0) {
// 在主页上面读取数据 ,很麻烦的方式
Document document = page.getHtml().getDocument();
Element script = document.getElementsByTag("script").get(8);
String data = script.data();
String job = "";
if (data.indexOf("{\"top_ads\"") != -1) {
job = data.substring(data.indexOf("{\"top_ads\""), data.length());
// 将字符串转换为JSON对象
jsonObject = JSONObject.parseObject(job);
// 获取JSON对象
JSONArray engine_search_result = (JSONArray) jsonObject.get("engine_search_result");
for (int i = 0; i < engine_search_result.size(); i++) {
JSONObject list = (JSONObject) engine_search_result.get(i);
// 这里的获取,JSON数据不要,因为后头的处理,是进入详细页面获取相应的数据,而这里的数据仅仅只是准备第一页的所有连接,
// 只有for循环和下面的代码全部处理完毕,webmagic才会进行下一次请求,而后每次请求五个线程,分批次读取详细页面的数据
// 获取公司名称
// String companyind_text = list.get("company_name").toString();
// // 获取公司联系方式 -- 详情页面
// // 公司信息 -- 详情页面
// // 职位名称
// String job_name = list.get("job_name").toString();
// // 工作地点
// String workarea_text = list.get("workarea_text").toString();
// // 职位信息 -- 详情页面
// // 获取薪资
// Integer[] providesalary_texts = MathSalary.getSalary(list.get("providesalary_text").toString());
// // 薪资范围,最小 -- 详情页面
// // 薪资范围,最大 -- 详情页面
// // 关键技术点 -- 详情页面
// // 职位最近发布时间
// String issuedate = list.get("issuedate").toString();
// 招聘信息详情页
String job_href = list.get("job_href").toString();
// 把获取到的url地址放到任务队列中
page.addTargetRequest(job_href);
page.setRequest(targetRequests.get(i));
}
} else {
// 在子页面读取数据
saveJobInfo(page);
}
// 获取下一页的url
if (total_page.length() <= 0) {
total_page = jsonObject.get("total_page").toString();
} else {
// 不处理
}
int xia_page = 0;
if (PAGE < Integer.valueOf(total_page)) {
xia_page = PAGE + 1;
String URL = SHANG_URL + xia_page + XIA_URL;
// 把url放到任务队列中
page.addTargetRequest(URL);
} else {
// 不处理
}
} else {
// 任务等待执行
}
}
// 解析页面,获取招聘详情信息,保存数据
private void saveJobInfo(Page page) {
// 创建招聘详情对象
JobInfoEntity jobInfoEntity = new JobInfoEntity();
// 解析页面
Html html = page.getHtml();
// 获取公司名称
jobInfoEntity.setCompanyName(html.css("div.cn p.cname a","text").toString());
// 获取公司联系方式 -- 详情页面
String text = Jsoup.parse(html.css("div.bmsg").nodes().get(1).toString()).text();
if (text.length()>0){
jobInfoEntity.setCompanyAddr(text.substring(0,text.length()-2));
}else {
// 不处理
}
// 公司信息 -- 详情页面
jobInfoEntity.setCompanyInfo(Jsoup.parse(html.css("div.tmsg").toString()).text());
// 职位名称
jobInfoEntity.setJobName(html.css("div.cn h1","text").toString());
// 工作地点
jobInfoEntity.setJobAddr(html.css("div.cn p.ltype","text").regex(".*区").toString());
// 职位信息 -- 详情页面
jobInfoEntity.setJobInfo(Jsoup.parse(html.css("div.job_msg").toString()).text());
// 关键技术点
String technology = Jsoup.parse(html.css("div.job_msg").toString()).text();
// 正则匹配
Pattern compile = Pattern.compile("[a-zA-Z]+");
Matcher matcher = compile.matcher(technology);
ArrayList al=new ArrayList();
while (matcher.find()){
al.add(matcher.group(0));
}
// 去重
HashSet hs=new HashSet(al);
al.clear();
al.addAll(hs);
String str = al.toString();
jobInfoEntity.setTechnology(str);
// 获取薪资
Integer[] salary = MathSalary.getSalary(html.css("div.cn strong", "text").toString());
// 薪资范围,最小
jobInfoEntity.setSalaryMax(salary[0]);
// 薪资范围,最大
jobInfoEntity.setSalaryMin(salary[1]);
// 职位最近发布时间
String time = Jsoup.parse(html.css("div.cn p.ltype").regex(".*发布").toString()).text();
if (time.length()>0){
jobInfoEntity.setTime(time.substring(time.lastIndexOf("|")+1,time.length()-2));
}else {
// 不处理
}
// 招聘信息详情页
jobInfoEntity.setUrl(page.getUrl().toString());
page.putField("jobInfo", jobInfoEntity);
}
@Override
public Site getSite() {
Site site = Site.me().setCharset("gbk")//设置编码
.setTimeOut(10 * 1000)//设置超时时间
.setRetrySleepTime(3000)//设置重试的间隔时间
.setRetryTimes(3);//设置重试的次数
return site;
}
@Scheduled(initialDelay = 1000, fixedDelay = 1000 * 100)
public void process() {
String URL = SHANG_URL + PAGE + XIA_URL;
Spider.create(new JobProcessorTask()).addUrl(URL).
setScheduler(new QueueScheduler()
.setDuplicateRemover(new BloomFilterDuplicateRemover(10000000)))
.thread(5)
.addPipeline(this.springDataPipeline)
.run();
}
}
package com.hikktn.utils;
public class MathSalary {
/**
* 获取薪水范围
*
* @param salaryStr
* @return
*/
public static Integer[] getSalary(String salaryStr) {
//声明存放薪水范围的数组
Integer[] salary = new Integer[2];
//"500/天"
//0.8-1.2万/月
//5-8千/月
//5-6万/年
String date = salaryStr.substring(salaryStr.length() - 1, salaryStr.length());
//如果是按天,则直接乘以240进行计算
if (!"月".equals(date) && !"年".equals(date)) {
salaryStr = salaryStr.substring(0, salaryStr.length() - 2);
salary[0] = salary[1] = str2Num(salaryStr, 240);
return salary;
}
String unit = salaryStr.substring(salaryStr.length() - 3, salaryStr.length() - 2);
String[] salarys = salaryStr.substring(0, salaryStr.length() - 3).split("-");
salary[0] = mathSalary(date, unit, salarys[0]);
salary[1] = mathSalary(date, unit, salarys[1]);
return salary;
}
//根据条件计算薪水
private static Integer mathSalary(String date, String unit, String salaryStr) {
Integer salary = 0;
//判断单位是否是万
if ("万".equals(unit)) {
//如果是万,薪水乘以10000
salary = str2Num(salaryStr, 10000);
} else {
//否则乘以1000
salary = str2Num(salaryStr, 1000);
}
//判断时间是否是月
if ("月".equals(date)) {
//如果是月,薪水乘以12
salary = str2Num(salary.toString(), 12);
}
return salary;
}
private static int str2Num(String salaryStr, int num) {
try {
// 把字符串转为小数,必须用Number接受,否则会有精度丢失的问题
Number result = Float.parseFloat(salaryStr) * num;
return result.intValue();
} catch (Exception e) {
}
return 0;
}
}
package com.hikktn.task;
import com.hikktn.pojo.JobInfoEntity;
import com.hikktn.service.JobInfoService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.ResultItems;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.pipeline.Pipeline;
/**
* @ClassName SpringDataPipeline
* @Description TODO
* @Author lisonglin
* @Date 2021/5/6 4:52
* @Version 1.0
*/
@Component
public class SpringDataPipeline implements Pipeline {
@Autowired
private JobInfoService jobInfoService;
@Override
public void process(ResultItems resultItems, Task task) {
//获取封装好的招聘详情对象
JobInfoEntity jobInfo = resultItems.get("jobInfo");
//判断数据是否不为空
if (jobInfo != null) {
//如果不为空把数据保存到数据库中
this.jobInfoService.save(jobInfo);
}
}
}
最后启动一下spring服务,定时器自动执行。
搞定!