'use strict';
const Controller = require('egg').Controller;
const superagent = require('superagent')
const cheerio = require('cheerio')
function format3Mooc(html) {
let $ = cheerio.load(html);
let arr = [];
$('#aCoursesList .demo4-course .clearfix li a').each((index,item)=>{
arr.push({
href:$(item).attr('href'),
})
})
$('#aCoursesList .demo4-course .clearfix li a .txtOf').each((index,item)=>{
arr[index].text = $(item).text()
})
$('#aCoursesList .demo4-course .clearfix li a .demo4-cou-item-pic .img-responsive').each((index,item)=>{
arr[index].src = $(item).attr('xsrc')
})
return arr
}
class MoccController extends Controller {
async index() {
const { ctx } = this;
let arr = await new Promise((resolve,reject)=>{
superagent.get('https://www.3mooc.com/').end((err,res)=>{
if(err){
reject(err)
}else{
let result = format3Mooc(res.text)
resolve(result)
}
})
})
let result = await ctx.service.mocc.set(arr); //存入数据库
ctx.body = result
}
}
module.exports = MoccController;
node.js 爬虫
最新推荐文章于 2024-07-25 17:11:44 发布