python和nodejs爬虫_一款基于NodeJS的爬虫

crawler.js

var Crawler = require("crawler");

var mysql = require('mysql');

var link = mysql.createConnection({

host: 'localhost',

user: 'root',

password: 'root',

database: 'mof'

}) link.connect();

var c = new Crawler({

maxConnections: 10,

});

var Competition = function() {

c.queue([{

url: 'https://cubingchina.com/competition',

callback: function(error, res, done) {

if (error) {

console.log(error);

} else {

var $ = res.$;

// $默认使用Cheerio

// 这是为服务端设计的轻量级jQuery核心实现

var sql = '';

for (var i = 0; i <= $('tbody>:not(tr[class="active"])').length - 1; i++) {

//console.log($("tbody tr").eq(i).text())

var dom = $('tbody>:not(tr[class="active"])').eq(i) var CompetitionName = dom.find('td:nth-child(2)>a').text();

var CompetitionTimer = dom.find('td:nth-child(1)').text();

var province = dom.find('td:nth-child(3)').text();

var city = dom.find('td:nth-child(4)').text();

var site = dom.find('td:nth-child(5)').text();

var CompetitionURL = dom.find('td:nth-child(2)>a').attr('href');

var s = 'INSERT INTO `competition`(`Id`, `CompetitionName`, `CompetitionTimer`, `province`, `city`, `site`, `CompetitionURL`, `player`) VALUES (null,"' + CompetitionName + '","' + CompetitionTimer + '","' + province + '","' + city + '","' + site + '","' + CompetitionURL + '","");';

link.query(s,

function(error, results, fields) {

link.query("SELECT `CompetitionURL` FROM `competition` WHERE `Id` =" + results.insertId,

function(err, result) {

//console.log()

player(results.insertId, result[0].CompetitionURL)

})

});

};

console.log('结束')

}

done();

}

}])

}

let player = (id, url) = >{

c.queue([{

url: url + "/competitors",

callback: function(error, res, done) {

if (error) {

console.log(error);

} else {

var $ = res.$;

//项目

//选手

var players = [];

if ($('tbody tr[class]').length != 0) {

for (var i = 0; i <= $('tbody tr[class]').length - 1; i++) {

var dom = $('tbody tr[class]').eq(i);

var NumberID = dom.find('td:nth-child(1)').text(); //参赛ID

var Name = dom.find('td:nth-child(2)').text(); //姓名

var sex = dom.find('td:nth-child(3)').text(); //性别

var from = dom.find('td:nth-child(4)').text(); //来自

var project = new Array(); //参赛项目

for (var x = 4; x <= dom.find('td').length - 1; x++) {

var Dnode = dom.find('td').eq(x);

if (Dnode.html() != "") {

var icon = Dnode.find('i').attr('class');

var level = Dnode.find('i').attr('title');

//project.push("{icon:"+icon+",level:"+level+"}")

project.push({

"icon": icon,

"level": level

})

}

};

players.push({

NumberID: NumberID,

Name: Name,

sex: sex,

from: from,

project: project

})

}

}

var sql = "UPDATE  `mof`.`competition` SET  `player` =  '" + JSON.stringify(players) + "' WHERE  `competition`.`Id` =" + id + ";";

//console.log(sql)

link.query(sql,

function(error, results, fields) {

if (error) {

//console.log('错误')

} else {

console.log('正确')

//console.log(results)

}

})

}

done();

}

}])

}

Competition();

// // 爬取一个URL,使用默认的callback

// c.queue('http://www.amazon.com');

// // 爬取URL列表

// c.queue(['http://www.google.com/','http://www.yahoo.com']);

// // 爬取页面,自定义callback和参数

// c.queue([{

//     uri: 'http://parishackers.org/',

//     jQuery: false,

//     // 覆盖全局的callback

//     callback: function (error, res, done) {

//         if(error){

//             console.log(error);

//         }else{

//             console.log('Grabbed', res.body.length, 'bytes');

//         }

//         done();

//     }

// }]);

// // 在队列中加入一些HTML代码,无需爬取(mostly for tests)

// c.queue([{

//     html: '

This is a test

'

// }]);

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值