首先说明 这是自己学习node的过程中自己的小练习,想通过自己学习的几个模块,简单实现爬取页面链接的小工具,若有不足之处,希望大家多多指教。
const superAgent = require('superagent')//superagent是nodejs里一个非常方便的客户端请求代理模块(类似ajax),当你想处理get,post,put,delete,head请求时,你就应该想起该用它了. const cheerio = require('cheerio')//为服务器特别定制的,快速、灵活、实施的jQuery核心实现. const fs = require('fs') const path = require('path') const testUrl = "http://ah.10086.cn/m"//测试链接 function getLinkByUrl(url){ var readLink = new Promise(function(resolve,reject){ superAgent.get(testUrl) .end((err,res)=>{ if (err){ console.log('无效地址111') reject('无效地址222') }else{ console.log('=========html=============\n ',res.text) let $ = cheerio.load(res.text) let obj = { title:'', linkArry:[], count:0 } obj.title = $('title').text() $('a').each(function(ind,element){ let href = $(element).attr('href')||'' let name = $(element).text().trim() let a = { name, href } obj.linkArry.push(a) obj.count++ }); resolve(obj); } }) }) return readLink } function writeJsonFile(data){ let promise = new Promise(function(resolve,reject){ fs.exists('./data',function(exists){ if(!exists){ console.log('data文件夹不存在。。。') fs.mkdir('./data',function(err){ if (err) return console.log(err) console.log("文件夹创建成功"); var file = path.join(__dirname,`data/${data.title}.json`) fs.writeFile(file,JSON.stringify(data),function(err){ if(err){ return reject('json文件创建失败。。。') }else{ resolve('json文件创建成功!') } }) }) }else{ console.log('data文件夹存在!') fs.exists(`./data/${data.title}.json`,function(exists){ if(!exists){ console.log('json文件不存在。。。') var file = path.join(__dirname,`data/${data.title}.json`) fs.writeFile(file,JSON.stringify(data),function(err){ if(err){ return reject('json文件创建失败2。。。') }else{ resolve('json文件创建成功2!') } }) }else{ resolve('json文件存在!') } }) } }) }) return promise } getLinkByUrl(testUrl) .then(function(resolve){ let obj = resolve console.log('=============resolve1=',obj) return writeJsonFile(obj) },function(reject){ console.log('=============reject1=',reject) }) .then(function(resolve){ console.log('===========resolve2=',resolve) },function(reject){ console.log('===========resolve2=',reject) }) .catch(function(err){ console.log('=========err=',err) })
执行后,生成文件内容
{ "title": "安徽移动个人触屏版网厅", "linkArry": [ { "name": "", "href": "javascript:void\n\n(window.location.href='http://ah.10086.cn/mpad/pad/num/number_list.html');" }, { "name": "", "href": "javascript:void\n\n(window.location.href='http://ah.10086.cn/mpad/pad/num/number_list.html');" }, { "name": "+充话费", "href": "" }, { "name": "+充流量", "href": "" }, { "name": "业务办理", "href": "http://ah.10086.cn/m/pages/pad/operate/openBusiIndex.html" }, { "name": "手机卖场", "href": "http://ah.10086.cn/mpad/pad/index.html" }, { "name": "宽带专区", "href": "http://ah.10086.cn/m/pages/pad/kdzq/index.html" }, { "name": "选号入网", "href": "http://ah.10086.cn/mpad/pad/num/number_list.html" }, { "name": "流量专区", "href": "http://ah.10086.cn/m/pages/pad/operate/flowZQ/index.html" }, { "name": "流量红包", "href": "http://ah.10086.cn/m/pages/draw/downloadkhd/downloadkhd.html?code=4&&WT.mc_ev=GXHXZY4" }, { "name": "4G特惠", "href": "http://ah.10086.cn/dt/khd" }, { "name": "下载手厅", "href": "http://ah.10086.cn/dt/khd" }, { "name": "", "href": "http://ah.10086.cn/dt/khd" }, { "name": "", "href": "http://ah.10086.cn/mpad/pad/act/haokarwy/index2.html" }, { "name": "", "href": "http://ah.10086.cn/mpad/hhg" }, { "name": "", "href": "http://ah.10086.cn/m/pages/draw/broadpromotion/index.html" }, { "name": "", "href": "http://ah.10086.cn/zsyyt/ahmobile/download/mobileDownLoadApk.do" }, { "name": "", "href": "" }, { "name": "马上下载", "href": "http://ah.10086.cn/zsyyt/ahmobile/download/mobileDownLoadApk.do" } ], "count": 19 }