node_爬虫2
-
小说爬取案列
const request = require("request"); const cheerio = require("cheerio"); const fs = require("fs"); request.get( "http://book.zongheng.com/showchapter/907701.html", //单个地址直接引号即可 (err,res,body)=>{ //err:一般为错误报告 if (err)return; /*虚拟DOM*/ let $ = cheerio.load(body); /*找到所有的a*/ $(".col-4 a").each(function(index){ /*继续请求a*/ reqA( $(this).prop("href"),index ); //传参 }); } ); //模块函数 function reqA(url,index){ request.get( url, (err,res,body)=>{ if (err)return; /*每个章节单独虚拟DOM*/ let $ = cheerio.load(body); let txt = $(".content").text(); txt = txt.replace( /(\s{2,})/g, //对文件类容进行处理:遇到空格添加回车符 $1=>"\r"+$1 //可以吧所有的txt内容拼接,然后在存入到一个文件中 ); fs.writeFileSync( "./xiaoshuo/"+(index+1)+".txt", txt ); } ) }
-
案列:爬取百度图片
const request = require("request"); const fs = require("fs"); const path = require("path"); getImg("不相干的",1000); //搜索的内容,和保存图片的张数 function getImg(word,num){ /*真正开发插件的时候,要写很多的类型判断*/ if (typeof word !== "string" || typeof num !== "number" || num <= 0)return; /* * 先创建word队友的目录 * */ fs.readdir( path.join(__dirname,"./img"), (err,data)=>{ if (err)return; if (data.indexOf(word) === -1) { fs.mkdir("./img/"+word,()=>{ init(); }); }else{ init(); } } ); /*初始化开始运行*/ function init(){ /*要调用几次req*/ let ci = Math.ceil(num / 60); for (let i=0;i<ci;i++){ req( word, i*60, Math.min(60,num-i*60) ); } } /*请求的封装*/ function req(word,pn,rn){ request.get( "https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&word="+encodeURI(word)+"&pn="+pn+"&rn="+rn, (err,res,body)=>{ if (err){ console.log(err); return; } // console.log(body); //百度搞人心态。。格式有时候不对 // let {data} = JSON.parse(body); let data = body.match(/https.+?\.jpg/g); data = data || []; data = [...new Set(data)]; /*遍历访问*/ data.forEach(url=>{ /*创建随机数名字*/ let str = (new Date().getTime() + Math.floor(Math.random()*999999999999)).toString(16); request(url) .pipe(fs.createWriteStream("./img/"+word+"/"+str+".jpg")); }); } ); } }