1 爬取某个URL页面
var http = require('http')
// 所要爬取的页面url
var url = '...'
http.get(url, function (res) {
var html = ''
res.on('data',function (data) {
html += data
})
res.on('end',function () {
console.log(html)
})
}).on('error',function () {
console.log('获取页面数据出错!')
})
2 通过cheerio解析数据
首先安装cheerio模块: npm install cheerio
引入cheerio模块:require('cheerio')
var http = require('http')
var cheerio = require('cheerio')
var url = '...'
http.get(url, function (res) {
var html = ''
res.on('data',function (data) {
html += data
})
res.on('end',function () {
var html_obj = filterData(html)
printInfo(html_obj)
})
}).on('error',function () {
console.log('获取页面数据出错!')
})
function filterData(html) {
var $ = cheerio.load(html)
var payObj = $('.payModeContent')
var pay_types = payObj.find('.payName')
var pay_icons = payObj.find('.payIcon img')
var pay_type_array = []
var pay_icon_array = []
pay_types.each(function () {
var pay_type = $(this).text()
pay_type_array.push(pay_type)
})
pay_icons.each(function () {
var pay_icon = $(this).attr('src').split('images/')[1]
pay_icon_array.push(pay_icon)
})
var payInfo = {
payType:pay_type_array,
payIcon:pay_icon_array
}
return payInfo;
}
function printInfo(obj) {
console.log(obj)
}
结果:
{ payType: [ '微信支付方式', '支付宝钱包支付', '储蓄卡支付' ],
payIcon: [ 'weichatIcon.png', 'alipayIcon.png', 'depositcardpayIcon.png' ] }