Puppeteer 是 Chrome 开发团队在 2017 年发布的一个 Node.js 包,用来模拟 Chrome 浏览器的运行
官网
中文
const puppeteer = require('puppeteer')
const fs = require('fs')
var request = require('request')
// 下载图片
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
request(uri)
.pipe(fs.createWriteStream(__dirname + `/imgs/${filename}`))
.on('close', function(){
console.log('Finish Copy Images')
})
})
}
// 等一下
function wait(ms){
return new Promise(resolve => setTimeout(()=> resolve(), ms))
}
;(async()=>{
const browser = await puppeteer.launch({
// headless: false, // false 启动完整版本的浏览器 true 关闭无头模式 ,对查看浏览器显示的内容很有用
// slowMo: 100 // slowMo 选项会将 Puppeteer 操作浏览器减慢指定的毫秒数
})
const page = await browser.newPage()
await page.goto('http://baijiahao.baidu.com/s?id=1704795850072632109', {
waitUntil: 'domcontentloaded'
})
// Get the height of the rendered page
const bodyHandle = await page.$('body')
const { height } = await bodyHandle.boundingBox()
await bodyHandle.dispose()
// Scroll one viewport at a time, pausing to let content load
const viewportHeight = page.viewport().height
let viewportIncr = 0
while(viewportIncr + viewportHeight < height){
await page.evaluate(_viewportHeight =>{
window.scrollBy(0, _viewportHeight)
}, viewportHeight)
await wait(20)
viewportIncr = viewportIncr + viewportHeight
}
// Scroll back to top
await page.evaluate(_=>{
window.scrollTo(0, 0)
})
// Some extra delay to let images load
await wait(1000)
let imageLink = await page.evaluate(()=>{
const images = Array.from(document.querySelectorAll('img'))
return images.map(img=>img.src)
.filter(imgText=>imgText.includes('jpeg?token'))
})
console.log(imageLink)
imageLink.forEach((img, index)=>{
download(img, index+'.jpg', function(){
console.log('done')
})
})
// 监听控制台事件
// page.on('console', msg => console.log('PAGE LOG:', msg.text()));
await browser.close()
})()