首先我们要会安装node 环境,然后安装三个模块
const rp = require('request-promise'); //进入request-promise模块
const cheerio = require('cheerio');//引入cheerio模块
const fs = require('fs');//引入fs模块
下面就是爬取一个页面所有图片并下载下来的小爬虫啦
const rp = require('request-promise'); //进入request-promise模块
const cheerio = require('cheerio');//引入cheerio模块
const fs = require('fs');//引入fs模块
const savePath = 'D:/blog/'; //存储图片的路径
const wormPath = 'http://vip66.sushenyue.cn/sh56/04/';//我们要爬取的网址
const getData = async ( url ) => {
const data = await rp({url:url}) //获取目标dom
const $ = cheerio.load(data) //将目标dom解析为jq模式
$('img').each((i,e)=>{//获取页面所有img元素 遍历
const url = e.attribs.src //获取图片地址
const name = e.attribs.src.replace('images/','')
downLoad(url,name)
})
};
const downLoad = async ( url , name ) => {
let headers = {
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Cache-Control": "no-cache",
Host: "i.meizitu.net",
Pragma: "no-cache",
"Proxy-Connection": "keep-alive",
Referer: wormPath+url,
"Upgrade-Insecure-Requests": 1,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.19 Safari/537.36"
};//反防盗链
await rp({
url:wormPath+url,
resolveWithFullResponse: true,
headers
}).pipe(fs.createWriteStream(`${savePath}/${name}`))
};
getData(wormPath)