const http = require('http');
const cheerio= require('cheerio');
const fs= require('fs');
const mysql= require('mysql');
const child_process= require('child_process');
const urls=[];
const datas=[];//遍历url
functionstartGetHtml() {for (var i=1; i<=33; i++) {
urls.push('http://www.imooc.com/course/list?page='+i);
}
urls.forEach((url, page)=>{
parseHtml(url, page);
});
}//解析html
functionparseHtml(url, page) {
http.get(url, (res)=>{varhtml;
res.on('data', (data) =>{
html+=data;
});
res.on('end', () =>{var $ =cheerio.load(html);
$('.course-card').each((index, ele) =>{var title = $(ele).find('.course-card-name').text();var imgUrl = $(ele).find('.course-banner').attr('src');var videoUrl = 'http://www.imooc.com'+$(ele).attr('href');
datas[index]= [page+1+'-'+index, title, videoUrl];
saveImg('http:'+imgUrl, title);
saveText(title, videoUrl);
});
saveDatabase(datas);
});
});
}//保存为图片
functionsaveImg(url, title) {
http.get(url, (res)=>{var imgData = "";
res.setEncoding("binary");
res.on("data", (chunk) =>{
imgData+=chunk;
});
res.on("end", () =>{
fs.writeFile("node_download/img/"+title+".jpg", imgData, "binary", (err) =>{if(err) {
console.log(err);
}else{
console.log(title);
}
});
});
});
}//保存为文本
functionsaveText(title, videoUrl) {
fs.writeFile("node_download/txt/"+title+".txt", videoUrl, 'utf8', (err) =>{if(err) {
console.log(err);
}else{
console.log(title);
}
});
}//保存到MySQL
functionsaveDatabase(datas) {var connection =mysql.createConnection({
host :'localhost',
user :'root',
password :'123456',
database :'download'});var sql = "INSERT INTO imooc(`page`,`title`,`url`) VALUES ?";
connection.query(sql, [datas], (err)=>{if(err) {
console.log('INSERT ERROR - ', err.message);
}else{
console.log("INSERT SUCCESS");
}
});
}//查询MySQL
functiongetDatabase() {var connection =mysql.createConnection({
host :'localhost',
user :'root',
password :'123456',
port:'3306',
database:'download'});var sql = 'SELECT * FROM imooc';
connection.query(sql, (err, result)=>{if(err) {
console.log('[SELECT ERROR] - ',err.message);
}else{
http.createServer((req, res)=>{
res.writeHead(200, {'Content-Type': 'text/html;charset=utf-8'});
res.end(JSON.stringify(result));
}).listen(3000);
}
});
}//startGetHtml();//getDatabase();