node_爬虫1
-
request包
安装request包 npm i request -S 引入request包: const request = require("request"); 使用request包: request.get({ url : "https://www.baidu.com/" },(err,res,body)=>{ /*回调函数 * err:错误对象 * res:响应相关一些信息 * body:返回的数据 * */ // console.log(err); if(err)return; console.log(body); });
-
案列1:将爬取的图片地址存储到txt文档中
* 后端发送请求: const request = require("request"); const fs = require("fs"); request.get({ url : "https://www.meitulu.com/item/7667.html" },(err,res,body)=>{ /* * err:错误对象 * res:响应相关一些信息 * body:返回的数据 * */ if(err)return; //分析得到的图片地址:https: hmtl.gzhuibei.com/images/img/7667/1.jpg let reg = /https:\/\/mtl.gzhuibei.com\/images\/img\/7667\/\d+?\.(jpg|png)/g; //正则匹配地址 let arr = body.match(reg); //得到地址 if (arr){ //遍历得到的地址,存储到文档 arr.forEach(item=>{ fs.writeFileSync("./xixixi.txt",item+"\r",{flag:"a"}); //将爬取的图片地址存储到txt文档中 }); } }); /* * 1.得到数据 * 2.处理数据 * 3.提取数据 * 4.分析数据 * */
-
案列2:将爬取的图片存储到文件夹中:–流
列1:将图片流转换成文件 const request = require("request"); const fs = require("fs"); request( "https://mtl.gzhuibei.com/images/img/7667/13.jpg") //请求地址 .pipe( //流 fs.createWriteStream("./img/1.jpg") //创建文件 ); request( "https://res.shiguangkey.com/res/special/tanzhouEdu/video/tz.mp4") .pipe( fs.createWriteStream("./img/x.mp4") ); ------------------------------------------------------------------- 列2:将得到的图片地址遍历存储到文件夹 const request = require("request"); const fs = require("fs"); request.get({ url : "https://www.meitulu.com/item/7667.html" },(err,res,body)=>{ if(err)return; let reg = /https:\/\/mtl.gzhuibei.com\/images\/img\/7667\/\d+?\.(jpg|png)/g; let arr = body.match(reg); if (arr){ //遍历获取的图片地址将图片存储起来 arr.forEach((item,index)=>{ request(item).pipe(fs.createWriteStream("./img/"+index+".jpg")); }); } });
-
案列3:多个页面的图片进行存储
const request = require("request"); const fs = require("fs"); let index = 0; //用于图片命名 //先对访问有图片的网页进行遍历访问,将地址存储到url中 for (let i=1;i<=14;i++){ let url = ""; if (i === 1){ url = "https://www.meitulu.com/item/7667.html"; }else{ url = "https://www.meitulu.com/item/7667_"+i+".html"; } //对每个页面的图片进行请求 request.get(url,(err,res,body)=>{ if(err)return; let reg = /https:\/\/mtl.gzhuibei.com\/images\/img\/7667\/\d+?\.(jpg|png)/g; let arr = body.match(reg); if (arr){ arr.forEach(item=>{ request(item).pipe(fs.createWriteStream("./img1/"+(index++)+".jpg")); }); } }); }
-
小说爬取并存储–虚拟DOM
const fs = require("fs"); const request = require("request"); //原生js dom的包 // const {JSDOM} = require("jsdom"); // const cheerio = require("cheerio"); //需要先安装jquery包 //npm i request cheerio -S //http://book.zongheng.com/showchapter/907701.html //http://book.zongheng.com/chapter/907701/59100296.html request( "http://book.zongheng.com/showchapter/907701.html", (err, res, body) => { if (err) return; let $ = cheerio.load(body); // console.log($(".content").text()); console.log($(".col-4").text()); $(".col-4 a").each(function(index){ reqA($(this).prop("href"),index); }); function reqA(url,index){ request.get( url, (err,res,body)=>{ if(err)return let $ = cheerio.load(body); fs.writeFile( "./xiaoshuo/"+(index+1)+".txt", $(".content").text(), { encoding:"utf8", flag:"a" }, (err)=>{ if(err){ console.log("写入出错了",err); }else{ console.log("写入成功"); }; }, ); }) }; } );