使用Puppeteer爬取图片
- 使用的包
const puppeteer = require('puppeteer');
const request = require("request");
const fs = require('fs');
- 操作浏览器
var scrape = async () => {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: {
width: 1200,
height: 1000
}
});
const page = await browser.newPage();
let word = '壁纸'
await page.goto('https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&hs=0&xthttps=111110&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=' + word);
await page.evaluate(function () {
window.scrollTo(0,1000)
setTimeout(()=>{window.scrollTo(0,2000)},1000)
})
await page.waitForTimeout(3000)
const data = await page.evaluate( ()=> {
let dom = document.querySelectorAll('#imgid img')
let arr = []
for(let i=0;i<dom.length-1;i++){
if(!dom[i].src.includes('base64'))
arr.push(dom[i].src)
}
return arr
})
browser.close()
return data
}
- 保存图片
scrape().then((value) => {
for(let i = 1; i < value.length-1; i++) {
let imgUrl = value[i];
let filename = `${new Date().getTime()}imgage${i}.png`;
fs.mkdir('C:/temp',(e)=>{
if(!e || (e&&e).code === 'EEXIST'){
if(imgUrl)
request(imgUrl).pipe(fs.createWriteStream("C:/temp/" + filename));
else
console.log('地址为空')
}
else{
console.log('err')
}
})
}
});