nodejs实现新闻爬虫
const KEYWORD = '费德勒';
const KEYWORD_REG = new RegExp(KEYWORD, 'i');
let newsArry = [];
function spider() {
return axios.get('http://sports.sina.com.cn/tennis/').then(response => {
if (response.status === 200) {
let $ = cheerio.load(response.data, {
decodeEntities: false
});
let newsList = $('a[href]');
newsArry = [];
for (let i = 0; i < newsList.length; ++i) {
let obj = $(newsList[i]);
let text = $(newsList[i]).text();
//收集数据
if (KEYWORD_REG.test(text)) {
newsArry.push({
'title': text.trim(),
'href': obj.attr('href')
})
}
}
}
}).catch(e => {
console.log('爬虫失败了');
console.log(error);
const configData = require('./config.json');
//生成发送字符串
function formStr(arr) {
let html = '';
for (let data of arr) {
html += `<p><a target="_blank" href="${data.href}">${data.title}</a></p>` // red green blue
}
return html;
}
//邮件发送函数
function sendEmail(opts) {
let transporter = nodemailer.createTransport({
service: 'QQ',
auth: configData.auth
}, {
from: configData.auth.user
})
var message = {
//收件人用逗号间隔
to: opts.to,
//信息主题
subject: opts.subject,
//内容
html: opts.html
};