源码:
var http = require('http');
var url = 'http://www.imooc.com/learn/637';
http.get(url,function(res){
var html = '';
res.on('data',function(data){
html += data ;
})
res.on('end',function(){
console.log(html);
})
}).on('error',function(){
console.log('出现错误');
})
第二种获取标题的爬虫代码
var http = require('http')
var cheerio = require('cheerio')
var url = 'http://www.imooc.com/learn/348'
function filechapters (html){
var $ = cheerio.load(html)
var chapters = $('.chapter')
// [{
// chaptertitle" '',
// videos:[
// tile:'',
// id:''
// ]
// }]
var coursedata = []
chapters.each(function(item){
var chapter = $(this)
var chaptertitle = chapter.find('strong').text()
var videos = chapter.find('.video').children('li')
var chapterdata = {
chaptertitle : chaptertitle,
videos:[]
}
videos.each(function(item){
var video = $(this).find('.J-media-item')
var videotitle = video.text()
var id = video.attr('href').split('video/')[1]
chapterdata.videos.push({
title:videotitle,
id:id
})
})
coursedata.push(chapterdata)
})
return coursedata
}
function printcouseinfo (coursedata){
coursedata.forEach(function(item){
var chaptertitle = item.chaptertitle
console.log(chaptertitle + '\n')
var videos = item.videos
videos.forEach(function(key){
console.log('[' + key.id + ']' + key.title)
})
})
}
http.get(url,function(res){
var html = ''
res.on('data',function(data){
html += data
})
res.on('end',function(){
var coursedata = filechapters (html)
printcouseinfo (coursedata)
})
}).on('error',function(){
console.log('出现错误')
})