Koa2 + Puppeteer打造『爬虫系统』3

七.爬取机构信息以及上传七牛图床

1.service中的slider.js,防止一样的数据重复插入

const SliderModel = require('../do/models/slider');
const Slider = require('../do/models/slider');
class SliderService{
    async addSliderData(data){
        const cid = data.cid;
        const result = await SliderModel.findOne({
            where:{cid}
        })
        if(result){
            return await SliderModel.update(data,{
                where:{cid}
            })
        }else{
            return await Slider.create(data)
        }
        
    }
}
module.exports = new SliderService();

2.config.js中更改配置,增加三个页面的路由地址

module.exports={
    qiniu:{
        keys:{
            ak:'qMK7okFyL1xX2o8gjFzy1PrI_jXR_yf58naIcIs8',
            sk:'j6Ngxxy6xSzXi9czKXun0Uju_5hDahAla9THml9C'
        },
        bucket:{
            tximg:{
                bucket_name:'crawler-txclass',
                domain:'http://qexgb3yqx.hn-bkt.clouddn.com'
            }
        }
    },
    crawler:{
        url:{
            main:'https://msiwei.ke.qq.com/#tab=0&category=-1',
            course:'https://msiwei.ke.qq.com/#tab=1&category=-1',
            teacher:'https://msiwei.ke.qq.com/#tab=2&category=-1'
        }
    }
}

3.crawler下文件夹建立agencyinfo.js爬取机构信息

先修改slider.js中url的方式为引入方式

const Crawler = require('../lib/crawler'),
      {crawler} = require('../config/config');
Crawler({
    url:crawler.url.main,
    callback(){
        const $ = window.$,
              $section = $('.agency-head');
        return {
            logoUrl: $section.find('.agency-head-logo').prop('src'),
            name:$section.find('.ag-title-main').text(),
            feedbackRate:$section.find('.ag-info').eq(0).text().replace(/[^0-9]/ig,''),
            studentCount:$section.find('.js-item-num').attr('data-num'),
            description:$section.find('.ag-info-des').text(),
            qqLink:$section.find('.ag-info-btn').prop('href'),
            logoKey:''
        }      
    }
})      

4.controller中的Crawler.js中新增函数

crawlAgencyInfo(){
        startProcess({
            path:'../crawler/agencyinfo',
            async message(data){
                if(data.logoUrl && !data.logoKey){
                    const qiniu  = config.qiniu;
                    try {
                        const logoData = await qiniuUpload({
                            url:data.logoUrl,
                            bucket:qiniu.bucket.tximg.bucket_name,
                            ext:'.jpg'
                        });
                        if(logoData.key){
                            data.logoKey = logoData.key;
                        }
                    } catch (error) {
                        console.log(error);
                    }
                }
            },
            async exit(data){
                console.log(data);
            },
            async error(data){
                console.log(data);
            }
        })
    }

5.utils中更改,将ak,sk固定,且将之前控制器中传入的aksk删除

{qiniu} = require('../config/config');
const mac = new Qiniu.auth.digest.Mac(qiniu.keys.ak,qiniu.keys.sk),

6.routes中配置对应路由

const router = require('koa-router')(),
      crawlerController = require('../controller/crawler')
router.prefix('/crawler')
router.get('/crawl_slider_data',crawlerController.crawlSliderData )
router.get('/crawl_agencyr_info',crawlerController.crawlAgencyInfo )


module.exports = router

访问路由访问

八.创建机构信息表模型以及信息入表操作

1.在models中建立agencyInfo.js,创建数据表

const seq = require('../connection/mysql_connect'),
   { STRING,INT } = require('../../config/db_type_config');
const AgencyInfo = seq.define('agency_info',{
    logoUrl:{
        comment:'Logo image url',
        type:STRING,
        allowNull:false
    },
    name:{
        comment:'Agency name',
        type:STRING,
        allowNull:false
    },
    feedbackRate:{
        comment:'Feedback rate',
        type:INT,
        allowNull:false
    },
    studentCount:{
        comment:'Student total count',
        type:INT,
        allowNull:false
    },
    description:{
        comment:'Agency slogan',
        type:STRING,
        allowNull:false
    },
    qqLink:{
        comment:'QQ information link',
        type:STRING,
        allowNull:false
    },
    logoKey:{
        comment:'Qiniu logo image name',
        type:STRING,
        allowNull:false
    }
})   
module.exports = AgencyInfo;

2.导出文件导出

const Slider = require('./slider');
      AgencyInfo = require('./agencyinfo')
module.exports={
    Slider,AgencyInfo
}

node do/sync.js创建表结构

3.在serviece中新建Agencyinfo.js

const AgencyInfoModel = require('../do/models/agencyinfo');
class AgencyInfoService {
    async addAgencyInfo(data){
        const id = 1;
        const result = await AgencyInfoModel.findOne({
            where:{id}
        });
        if(result){
            return await AgencyInfoModel.update(data,{
                where:{id}
            })
        }else{
            return await AgencyInfoModel.create(data);
        }
    }
}
module.exports=new AgencyInfoService();

4.controller中修改

const {addAgencyInfo} = require('../service/agencyinfo'),
                     const logoData = await qiniuUpload({
                            url:data.logoUrl,
                            bucket:qiniu.bucket.tximg.bucket_name,
                            ext:'.jpg'
                        });
                        if(logoData.key){
                            data.logoKey = logoData.key;
                        }
                        const result = await addAgencyInfo(data);
                        if(result){
                            console.log('Data create Ok')
                        }else{
                            console.log('Data create failed')
                        }

5.访问路由写入http://localhost:3000/crawler/crawl_agencyr_info

九.爬取推荐课程数据以及上传七牛图床

1.crawlers文件夹下建立recomCourse文件

const Crawler = require('../lib/crawler'),
      {crawler} = require('../config/config');
 Crawler({
     url:crawler.url.main,
     callback(){
         const $ = window.$,
               $item = $('.spread-course-ul li'),
               mainTitle = $('.agency-spread-wrap h4').text();
         const data = [];
         $item.each((index,item)=>{
             const $el = $(item),
                   $itemLk = $el.find('a');
             const dataItem = {
                 cid:parseInt($el.attr('report-tdw').match(/\&(.+?)\&/)[1].split('=')[1]),
                 href:$itemLk.prop('href'),
                 mainTitle,
                 title:$itemLk.prop('title'),
                 posterUrl:$itemLk.find('.spread-course-cover').prop('src'),
                 description:$el.find('.spread-course-des').text(),
                 teacherImg:$el.find('.spread-course-face img').prop('src'),
                 teacherName:$el.find('.spread-course-face span').eq(0).text(),
                 studentCount:parseInt($el.find('.spread-course-face span').eq(1).text().reqlace(/[^0-9]/ig,'')),
                 price:parseInt($el.find('.spread-course-price').text().slice(1)),
                 posterKey:'',
                 teacherImgKey:''
             }     
             data.push(dataItem)   
         })  
         return data; 
     }
 })    

2.增加路由

const router = require('koa-router')(),
      crawlerController = require('../controller/crawler')
router.prefix('/crawler')
router.get('/crawl_slider_data',crawlerController.crawlSliderData )
router.get('/crawl_agencyr_info',crawlerController.crawlAgencyInfo )
router.get('/crawl_recom_course',crawlerController.crawlRecomCourse )

module.exports = router

3.controller中增加方法

crawlRecomCourse(){
        startProcess({
            path:'../crawler/recomCourse',
            async message(data){
                data.map(async item=>{
                   try {
                       const qiniu = config.qiniu;
                       if(item.posterUrl && !item.posterKey){
                           const posterData = await qiniuUpload({
                               url:item.posterUrl,
                               bucket:qiniu.bucket.tximg.bucket_name,
                               ext:'.jpg'
                           })
                           if(posterData.key){
                               item.posterKey = posterData.key
                           }
                       }
                       if(item.teacherImg&&!item.teacherImgKey){
                           const teacherImgData = await qiniuUpload({
                               url:item.teacherImg,
                               bucket:qiniu.bucket.tximg.bucket_name,
                               ext:'.jpg'
                           })
                           if(teacherImgData.key){
                               item.teacherImgKey = teacherImgData.key
                           }
                       }
                   } catch (error) {
                       console.log(error)
                   }
                    
                })
            },
            async exit(data){
                console.log(data);
            },
            async error(data){
                console.log(data);
            }
        })
    }

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值