七.爬取机构信息以及上传七牛图床
1.service中的slider.js,防止一样的数据重复插入
const SliderModel = require('../do/models/slider');
const Slider = require('../do/models/slider');
class SliderService{
async addSliderData(data){
const cid = data.cid;
const result = await SliderModel.findOne({
where:{cid}
})
if(result){
return await SliderModel.update(data,{
where:{cid}
})
}else{
return await Slider.create(data)
}
}
}
module.exports = new SliderService();
2.config.js中更改配置,增加三个页面的路由地址
module.exports={
qiniu:{
keys:{
ak:'qMK7okFyL1xX2o8gjFzy1PrI_jXR_yf58naIcIs8',
sk:'j6Ngxxy6xSzXi9czKXun0Uju_5hDahAla9THml9C'
},
bucket:{
tximg:{
bucket_name:'crawler-txclass',
domain:'http://qexgb3yqx.hn-bkt.clouddn.com'
}
}
},
crawler:{
url:{
main:'https://msiwei.ke.qq.com/#tab=0&category=-1',
course:'https://msiwei.ke.qq.com/#tab=1&category=-1',
teacher:'https://msiwei.ke.qq.com/#tab=2&category=-1'
}
}
}
3.crawler下文件夹建立agencyinfo.js爬取机构信息
先修改slider.js中url的方式为引入方式
const Crawler = require('../lib/crawler'),
{crawler} = require('../config/config');
Crawler({
url:crawler.url.main,
callback(){
const $ = window.$,
$section = $('.agency-head');
return {
logoUrl: $section.find('.agency-head-logo').prop('src'),
name:$section.find('.ag-title-main').text(),
feedbackRate:$section.find('.ag-info').eq(0).text().replace(/[^0-9]/ig,''),
studentCount:$section.find('.js-item-num').attr('data-num'),
description:$section.find('.ag-info-des').text(),
qqLink:$section.find('.ag-info-btn').prop('href'),
logoKey:''
}
}
})
4.controller中的Crawler.js中新增函数
crawlAgencyInfo(){
startProcess({
path:'../crawler/agencyinfo',
async message(data){
if(data.logoUrl && !data.logoKey){
const qiniu = config.qiniu;
try {
const logoData = await qiniuUpload({
url:data.logoUrl,
bucket:qiniu.bucket.tximg.bucket_name,
ext:'.jpg'
});
if(logoData.key){
data.logoKey = logoData.key;
}
} catch (error) {
console.log(error);
}
}
},
async exit(data){
console.log(data);
},
async error(data){
console.log(data);
}
})
}
5.utils中更改,将ak,sk固定,且将之前控制器中传入的aksk删除
{qiniu} = require('../config/config');
const mac = new Qiniu.auth.digest.Mac(qiniu.keys.ak,qiniu.keys.sk),
6.routes中配置对应路由
const router = require('koa-router')(),
crawlerController = require('../controller/crawler')
router.prefix('/crawler')
router.get('/crawl_slider_data',crawlerController.crawlSliderData )
router.get('/crawl_agencyr_info',crawlerController.crawlAgencyInfo )
module.exports = router
访问路由访问
八.创建机构信息表模型以及信息入表操作
1.在models中建立agencyInfo.js,创建数据表
const seq = require('../connection/mysql_connect'),
{ STRING,INT } = require('../../config/db_type_config');
const AgencyInfo = seq.define('agency_info',{
logoUrl:{
comment:'Logo image url',
type:STRING,
allowNull:false
},
name:{
comment:'Agency name',
type:STRING,
allowNull:false
},
feedbackRate:{
comment:'Feedback rate',
type:INT,
allowNull:false
},
studentCount:{
comment:'Student total count',
type:INT,
allowNull:false
},
description:{
comment:'Agency slogan',
type:STRING,
allowNull:false
},
qqLink:{
comment:'QQ information link',
type:STRING,
allowNull:false
},
logoKey:{
comment:'Qiniu logo image name',
type:STRING,
allowNull:false
}
})
module.exports = AgencyInfo;
2.导出文件导出
const Slider = require('./slider');
AgencyInfo = require('./agencyinfo')
module.exports={
Slider,AgencyInfo
}
node do/sync.js创建表结构
3.在serviece中新建Agencyinfo.js
const AgencyInfoModel = require('../do/models/agencyinfo');
class AgencyInfoService {
async addAgencyInfo(data){
const id = 1;
const result = await AgencyInfoModel.findOne({
where:{id}
});
if(result){
return await AgencyInfoModel.update(data,{
where:{id}
})
}else{
return await AgencyInfoModel.create(data);
}
}
}
module.exports=new AgencyInfoService();
4.controller中修改
const {addAgencyInfo} = require('../service/agencyinfo'),
const logoData = await qiniuUpload({
url:data.logoUrl,
bucket:qiniu.bucket.tximg.bucket_name,
ext:'.jpg'
});
if(logoData.key){
data.logoKey = logoData.key;
}
const result = await addAgencyInfo(data);
if(result){
console.log('Data create Ok')
}else{
console.log('Data create failed')
}
九.爬取推荐课程数据以及上传七牛图床
1.crawlers文件夹下建立recomCourse文件
const Crawler = require('../lib/crawler'),
{crawler} = require('../config/config');
Crawler({
url:crawler.url.main,
callback(){
const $ = window.$,
$item = $('.spread-course-ul li'),
mainTitle = $('.agency-spread-wrap h4').text();
const data = [];
$item.each((index,item)=>{
const $el = $(item),
$itemLk = $el.find('a');
const dataItem = {
cid:parseInt($el.attr('report-tdw').match(/\&(.+?)\&/)[1].split('=')[1]),
href:$itemLk.prop('href'),
mainTitle,
title:$itemLk.prop('title'),
posterUrl:$itemLk.find('.spread-course-cover').prop('src'),
description:$el.find('.spread-course-des').text(),
teacherImg:$el.find('.spread-course-face img').prop('src'),
teacherName:$el.find('.spread-course-face span').eq(0).text(),
studentCount:parseInt($el.find('.spread-course-face span').eq(1).text().reqlace(/[^0-9]/ig,'')),
price:parseInt($el.find('.spread-course-price').text().slice(1)),
posterKey:'',
teacherImgKey:''
}
data.push(dataItem)
})
return data;
}
})
2.增加路由
const router = require('koa-router')(),
crawlerController = require('../controller/crawler')
router.prefix('/crawler')
router.get('/crawl_slider_data',crawlerController.crawlSliderData )
router.get('/crawl_agencyr_info',crawlerController.crawlAgencyInfo )
router.get('/crawl_recom_course',crawlerController.crawlRecomCourse )
module.exports = router
3.controller中增加方法
crawlRecomCourse(){
startProcess({
path:'../crawler/recomCourse',
async message(data){
data.map(async item=>{
try {
const qiniu = config.qiniu;
if(item.posterUrl && !item.posterKey){
const posterData = await qiniuUpload({
url:item.posterUrl,
bucket:qiniu.bucket.tximg.bucket_name,
ext:'.jpg'
})
if(posterData.key){
item.posterKey = posterData.key
}
}
if(item.teacherImg&&!item.teacherImgKey){
const teacherImgData = await qiniuUpload({
url:item.teacherImg,
bucket:qiniu.bucket.tximg.bucket_name,
ext:'.jpg'
})
if(teacherImgData.key){
item.teacherImgKey = teacherImgData.key
}
}
} catch (error) {
console.log(error)
}
})
},
async exit(data){
console.log(data);
},
async error(data){
console.log(data);
}
})
}