一、依赖
npm i puppeteer
二、网页截图
- 加载完页面后,对页面进行截图方法
screenshot()
- 基本截图
const puppeteer = require('puppeteer');
async function getPic() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://blog.csdn.net/weixin_46037781');
await page.screenshot({path: './static/my-csdn.png'});
await browser.close();
}
getPic();
const puppeteer = require('puppeteer');
async function getPic() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://blog.csdn.net/weixin_46037781');
await page.screenshot({path: './static/my-csdn.png', fullPage:true});
await browser.close();
}
getPic();
三、生成PDF
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://blog.csdn.net/weixin_46037781', {waitUntil: 'networkidle2'});
await page.pdf({path: './static/csdn.pdf', format: 'A4'});
await browser.close();
})();
四、自动化操作
自动CSDN
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: { width: 1536, height: 722 },
args: ['--start-maximized']
});
const page = await browser.newPage();
await page.goto('https://blog.csdn.net/weixin_46037781', {waitUntil: 'networkidle2'});
await page.type('#toolbar-search-input', '自动化', {delay: 300});
await page.keyboard.press('Enter');
})();
五、通过脚本方法截取指定元素的图
const puppeteer = require('puppeteer');
async function getPic() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://blog.csdn.net/weixin_46037781');
let clip = await page.evaluate(() => {
let {
x,
y,
width,
height
} = document.getElementsByClassName('navList-box')[0].getBoundingClientRect();
return {
x,
y,
width,
height
};
});
await page.screenshot({path: './static/list-csdn.png', clip: clip});
await browser.close();
}
getPic();
六、扩展
js执行py脚本
const execSync = require('child_process').execSync;
const output = execSync('python D:\\.mygitee\\my-py\\读取网站内容.py')
console.log('sync: ' + output.toString())
console.log('over')
from urllib.request import urlopen
myURL = urlopen("https://www.runoob.com/")
print(myURL.read(300))
Puppeteer中文API文档