mysql爬虫_Node、MySQL爬虫

const http = require('http');

const cheerio= require('cheerio');

const fs= require('fs');

const mysql= require('mysql');

const child_process= require('child_process');

const urls=[];

const datas=[];//遍历url

functionstartGetHtml() {for (var i=1; i<=33; i++) {

urls.push('http://www.imooc.com/course/list?page='+i);

}

urls.forEach((url, page)=>{

parseHtml(url, page);

});

}//解析html

functionparseHtml(url, page) {

http.get(url, (res)=>{varhtml;

res.on('data', (data) =>{

html+=data;

});

res.on('end', () =>{var $ =cheerio.load(html);

$('.course-card').each((index, ele) =>{var title = $(ele).find('.course-card-name').text();var imgUrl = $(ele).find('.course-banner').attr('src');var videoUrl = 'http://www.imooc.com'+$(ele).attr('href');

datas[index]= [page+1+'-'+index, title, videoUrl];

saveImg('http:'+imgUrl, title);

saveText(title, videoUrl);

});

saveDatabase(datas);

});

});

}//保存为图片

functionsaveImg(url, title) {

http.get(url, (res)=>{var imgData = "";

res.setEncoding("binary");

res.on("data", (chunk) =>{

imgData+=chunk;

});

res.on("end", () =>{

fs.writeFile("node_download/img/"+title+".jpg", imgData, "binary", (err) =>{if(err) {

console.log(err);

}else{

console.log(title);

}

});

});

});

}//保存为文本

functionsaveText(title, videoUrl) {

fs.writeFile("node_download/txt/"+title+".txt", videoUrl, 'utf8', (err) =>{if(err) {

console.log(err);

}else{

console.log(title);

}

});

}//保存到MySQL

functionsaveDatabase(datas) {var connection =mysql.createConnection({

host :'localhost',

user :'root',

password :'123456',

database :'download'});var sql = "INSERT INTO imooc(`page`,`title`,`url`) VALUES ?";

connection.query(sql, [datas], (err)=>{if(err) {

console.log('INSERT ERROR - ', err.message);

}else{

console.log("INSERT SUCCESS");

}

});

}//查询MySQL

functiongetDatabase() {var connection =mysql.createConnection({

host :'localhost',

user :'root',

password :'123456',

port:'3306',

database:'download'});var sql = 'SELECT * FROM imooc';

connection.query(sql, (err, result)=>{if(err) {

console.log('[SELECT ERROR] - ',err.message);

}else{

http.createServer((req, res)=>{

res.writeHead(200, {'Content-Type': 'text/html;charset=utf-8'});

res.end(JSON.stringify(result));

}).listen(3000);

}

});

}//startGetHtml();//getDatabase();

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值