crawler.js
var Crawler = require("crawler");
var mysql = require('mysql');
var link = mysql.createConnection({
host: 'localhost',
user: 'root',
password: 'root',
database: 'mof'
}) link.connect();
var c = new Crawler({
maxConnections: 10,
});
var Competition = function() {
c.queue([{
url: 'https://cubingchina.com/competition',
callback: function(error, res, done) {
if (error) {
console.log(error);
} else {
var $ = res.$;
// $默认使用Cheerio
// 这是为服务端设计的轻量级jQuery核心实现
var sql = '';
for (var i = 0; i <= $('tbody>:not(tr[class="active"])').length - 1; i++) {
//console.log($("tbody tr").eq(i).text())
var dom = $('tbody>:not(tr[class="active"])').eq(i) var CompetitionName = dom.find('td:nth-child(2)>a').text();
var CompetitionTimer = dom.find('td:nth-child(1)').text();
var province = dom.find('td:nth-child(3)').text();
var city = dom.find('td:nth-child(4)').text();
var site = dom.find('td:nth-child(5)').text();
var CompetitionURL = dom.find('td:nth-child(2)>a').attr('href');
var s = 'INSERT INTO `competition`(`Id`, `CompetitionName`, `CompetitionTimer`, `province`, `city`, `site`, `CompetitionURL`, `player`) VALUES (null,"' + CompetitionName + '","' + CompetitionTimer + '","' + province + '","' + city + '","' + site + '","' + CompetitionURL + '","");';
link.query(s,
function(error, results, fields) {
link.query("SELECT `CompetitionURL` FROM `competition` WHERE `Id` =" + results.insertId,
function(err, result) {
//console.log()
player(results.insertId, result[0].CompetitionURL)
})
});
};
console.log('结束')
}
done();
}
}])
}
let player = (id, url) = >{
c.queue([{
url: url + "/competitors",
callback: function(error, res, done) {
if (error) {
console.log(error);
} else {
var $ = res.$;
//项目
//选手
var players = [];
if ($('tbody tr[class]').length != 0) {
for (var i = 0; i <= $('tbody tr[class]').length - 1; i++) {
var dom = $('tbody tr[class]').eq(i);
var NumberID = dom.find('td:nth-child(1)').text(); //参赛ID
var Name = dom.find('td:nth-child(2)').text(); //姓名
var sex = dom.find('td:nth-child(3)').text(); //性别
var from = dom.find('td:nth-child(4)').text(); //来自
var project = new Array(); //参赛项目
for (var x = 4; x <= dom.find('td').length - 1; x++) {
var Dnode = dom.find('td').eq(x);
if (Dnode.html() != "") {
var icon = Dnode.find('i').attr('class');
var level = Dnode.find('i').attr('title');
//project.push("{icon:"+icon+",level:"+level+"}")
project.push({
"icon": icon,
"level": level
})
}
};
players.push({
NumberID: NumberID,
Name: Name,
sex: sex,
from: from,
project: project
})
}
}
var sql = "UPDATE `mof`.`competition` SET `player` = '" + JSON.stringify(players) + "' WHERE `competition`.`Id` =" + id + ";";
//console.log(sql)
link.query(sql,
function(error, results, fields) {
if (error) {
//console.log('错误')
} else {
console.log('正确')
//console.log(results)
}
})
}
done();
}
}])
}
Competition();
// // 爬取一个URL,使用默认的callback
// c.queue('http://www.amazon.com');
// // 爬取URL列表
// c.queue(['http://www.google.com/','http://www.yahoo.com']);
// // 爬取页面,自定义callback和参数
// c.queue([{
// uri: 'http://parishackers.org/',
// jQuery: false,
// // 覆盖全局的callback
// callback: function (error, res, done) {
// if(error){
// console.log(error);
// }else{
// console.log('Grabbed', res.body.length, 'bytes');
// }
// done();
// }
// }]);
// // 在队列中加入一些HTML代码,无需爬取(mostly for tests)
// c.queue([{
// html: '
This is a test
'// }]);