参考:https://blog.csdn.net/weixin_33736048/article/details/88020821
参考:https://juejin.cn/post/6844903544919687181
参考:https://www.jianshu.com/p/5d09aefae3b2
参考:https://www.jianshu.com/u/e87383f81f38
参考:https://blog.csdn.net/qupan1993/category_8567616.html
参考:https://www.cnblogs.com/wuweiblogs/p/12913193.html
参考:https://www.lfhacks.com/tech/puppeteer-skip-download-chromium/
参考:https://segmentfault.com/blog/develop_table
参考:https://www.jianshu.com/p/5d09aefae3b2
环境 node v14.12.1
{
"name": "1",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"devDependencies": {
"puppeteer": "^10.4.0"
}
}
$ sudo npm install puppeteer@1.8.0 --unsafe-perm=true --allow-root
案例一
截图
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto("https://y.qq.com");
// 在跳转之后添加
await page.waitForNavigation(); // 等待页面跳转
await page.screenshot({ path: "yqq.png" });
browser.close();
})();
案例二
// const puppeteer = require('puppeteer');
// (async () => {
// const browser = await puppeteer.launch();
// const page = await browser.newPage();
// await page.emulate(puppeteer.devices['iPhone 6']);
// await page.goto('https://www.baidu.cn');
// await page.screenshot({ path: 'full.png', fullPage: true });
// await browser.close();
// })();
const puppeteer = require("puppeteer");
const iPhone = puppeteer.devices["iPhone 6"];
let timeout = function (delay) {
return new Promise((resolve, reject) => {
setTimeout(() => {
try {
resolve(1);
} catch (e) {
reject(0);
}
}, delay);
});
};
(async () => {
const browser = await puppeteer.launch({
headless: false, //这里我设置成false主要是为了让大家看到效果,设置为true就不会打开浏览器
});
const page = await browser.newPage();
// 参数:
// {
// 'name': 'Galaxy S5', //设备名
// 'userAgent': 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Mobile Safari/537.36', //UA
// 'viewport': {
// 'width': 360,//屏幕宽度
// 'height': 640,//屏幕高度
// 'deviceScaleFactor': 3,//缩放比例
// 'isMobile': true,//是否是移动设备
// 'hasTouch': true,//是否支持touch事件
// 'isLandscape': false//是否横屏
// }
// }
await page.emulate(iPhone);
console.log("进入页面");
// await page.goto(
// "https://y.qq.com/m/digitalbum/gold/index.html?_video=true&id=2210323&g_f=tuijiannewupload#index/fans"
// );
await page.goto("https://www.processon.com/");
await timeout(1000);
// 在跳转之后添加
await page.waitForNavigation(); // 等待页面跳转
await page.screenshot({
path: "1.png",
});
// await page.tap(".js_buyalbum"); //btn_buy js_buyBtn c_btn1 js_buyalbum|js_sale_buyalbum
await page.tap(".collapse-btn"); // 当前第一个
await page.screenshot({
path: "2.png",
});
await page.tap(".login_btn"); // 当前第一个
console.log("跳转");
// await page.waitForNavigation(); // 页面跳转后等待
let dom3 = await page.$("#login_email");
await dom3.type("132***2403", { delay: 20 });
let dom4 = await page.$("#login_password");
await dom4.type("******", { delay: 20 });
await page.tap("#signin_btn"); // 当前第一个
await page.screenshot({
path: "3.png",
});
console.log("登录");
// await page.goBack();
// # 前进
// await page.goForward()
// # 刷新
// await page.reload()
// # 保存 PDF
// await page.pdf(); x
// # 截图
// await page.screenshot()
// # 设置页面 HTML
await page.setContent("<h2>Hello World</h2>");
// # 设置 User-Agent
// await page.setUserAgent('Python')
// # 设置 Headers
// await page.setExtraHTTPHeaders(headers={})
// # 关闭
// await page.close()
// await browser.close()
return;
// await page.tap(".js_login_select"); //直接操作dom选择器,是不是很方便
// let $dom1 = await page.$$(".js_login_select"); // 数组请求
// await $dom1[0].tap();
// await page.type("132****2403");
await page.type("*******"); //这里密码就不展示了哈
await page.tap("#onekey");
await timeout(3000);
await page.screenshot({
path: "3.png",
});
console.log("登录");
await page.tap("#u"); //直接操作dom选择器,是不是很方便
await page.type("521017853");
await page.tap("#p");
await page.type("*********"); //这里密码就不展示了哈
await page.tap("#go");
await timeout(3000);
await page.screenshot({
path: "3.png",
});
console.log("登录成功");
//点击购买
console.log("点击立即购买按钮");
await page.tap(".js_sale_buyalbum");
await page.screenshot({
path: "4.png",
});
console.log("点击支付浮层上的立即支付");
await page.tap(".js_buyalbum_pay");
await timeout(5000);
console.log("进入 米大师支付浮层");
await page.screenshot({
path: "5.png",
});
let $frame = page.mainFrame();
let midas_frame = $frame.childFrames()[0]; //获取到midas对应的frame
console.log("点击确定 米大师支付浮层测试环境提示 的确认按钮");
let $dom = await midas_frame.$(".fusion-pm-fl-wrapper .fpm-default");
await $dom.tap();
await page.screenshot({
path: "6.png",
});
console.log("点击 米大师支付浮层 确认支付按钮");
$dom = await midas_frame.$("#wrap .fpm-default");
await $dom.tap();
await timeout(5000);
await page.screenshot({
path: "7.png",
});
console.log("点击 米大师支付浮层 支付完成");
$dom = await midas_frame.$("#wrap .btn-primary");
await $dom.tap();
await timeout(2000);
console.log("已购铭牌页");
await page.screenshot({
path: "8.png",
});
browser.close();
})();
性能
const puppeteer = require("puppeteer");
const iPhone = puppeteer.devices["iPhone 6"];
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.emulate(iPhone);
await page.tracing.start({ path: "./trace.json" });
await page.goto(
"https://y.qq.com/m/digitalbum/gold/index.html?_video=true&id=2210323&g_f=tuijiannewupload#index/fans"
);
// 在跳转之后添加
await page.waitForNavigation(); // 等待页面跳转
await page.tracing.stop();
browser.close();
})();
案例三
const puppeteer = require("puppeteer");
let timeout = function (delay) {
return new Promise((resolve, reject) => {
setTimeout(() => {
try {
resolve(1);
} catch (e) {
reject(0);
}
}, delay);
});
};
const getSearch = async () => {
const browser = await puppeteer.launch({
headless: false,
// args:['--no-sandbox'] // CentOS 下需要
});
const page = await browser.newPage();
await page.goto("http://m.xbiquge.la/modules/article/waps.php", {
waitUntil: "networkidle2",
});
// await timeout(1000);
// page.waitForNavigation();
// page.waitForNavigation({ timeout: 480000 });
// yazhengma = await page.waitForSelector('#codePic') # 通过css selector定位验证码元素
// await yazhengma.screenshot({'path': 'yazhengma.png'}) # 注意这里用的是ele.screenshot方法与教程1 page.screenshot是不同的
// await page.waitFor(3 * 1000)
// await page.type('#TPL_username_1', '123123', {'delay': input_time_random() - 50})
// await page.type('#TPL_password_1', '232322332', {'delay': input_time_random()})
// await page.click('#J_QRCodeLogin > div.login-links > a.forget-pwd.J_Quick2Static')
// el = await page.querySelector('#nc_1_n1z')
// box = await el.boundingBox()
// await page.hover('#nc_1_n1z') #鼠标移动方块上
// await page.mouse.down() #鼠标拖动操作包括按下、移动、放开
// await page.mouse.move(box['x']+1000,box['y'], {'delay': random.randint(1000, 2000),'steps':3})
// await page.mouse.up()
// browser = await launch({'headless':False})
// page = await browser.newPage()
// await page.goto('https://www.jianshu.com')
// await page.waitFor(3 * 1000)
// await page.evaluate('window.scrollBy(0, window.innerHeight)') #淘宝滚动加载用
// await page.evaluate('window.scrollBy(0, document.body.scrollHeight)')
// await page.waitFor(5 * 1000)
// await browser.close()
const input_search = await page.$("#s_key");
await input_search.type("斗罗");
const search_btn = await page.$(".go");
await search_btn.click();
// await page.goto("http://m.xbiquge.la/modules/article/waps.php", {
// waitUntil: "networkidle2",
// });
await timeout(1000); // 点击后
// yazhengma = await page.waitForSelector(".read_book .block"); // # 通过css selector定位验证码元素
// await yazhengma.screenshot({ path: "yazhengma.png" }); //# 注意这里用的是ele.screenshot方法与教程1 page.screenshot是不同的
let res = await page.evaluate(() => {
let $ = window.$; // window 中的jq
let items = $(".read_book .block");
let lsArray = [];
let linkArray = [];
if (items.length >= 1) {
items.each((index, item) => {
// jq的方法
let ele = $(item);
let link = ele.find(".block_img a").attr("href");
let image = ele.find(".block_img a img").attr("src");
let title = ele.find(".block_txt h2 a").text();
// let latestChapter = ele.find('.block_txt p').eq(0).text();
// let author = ele.find('.block_txt p').eq(2).text();
if (linkArray.indexOf(link) === -1) {
title = title.replace(/\s*/g, "");
// latestChapter = abstract.replace(/\s*/g,'');
// author = author.replace(/\s*/g,'');
linkArray.push(link);
lsArray.push({
link,
title,
image,
// latestChapter,
// author
});
}
});
}
return lsArray;
});
console.log(res);
await browser.close(); //关闭浏览器
};
getSearch();
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({headless:false});
const page = await browser.newPage();
await page.goto('https://www.baidu.com/',{waitUntil:'networkidle2'});
await page.waitFor('#u1');
// 得到百度右上角的标题和相应的URL网址
const result = await page.evaluate(() => {
let data = []; // 初始化空数组来存储数据
let elements = document.querySelectorAll('#u1 > a'); // 获取所有元素
for (var element of elements){
let title = element.innerText; // 获取标题
let url = element.href;//获取网址
data.push({title,url}); // 存入数组
}
return data;
});
console.log(result);//打印出信息
await page.waitFor(3000);
await browser.close();
})();
案例四
const puppeteer = require('puppeteer');
const html = `
<html>
<body>
<div id="element">element inner html</div>
</body>
</html>`;
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(`data:text/html,${html}`);
try {
await page.waitForSelector('#element', { timeout: 1000 });
const element = await page.$('#element');
console.log(await (await element.getProperty('innerHTML')).jsonValue());
} catch (e) {
console.log('element probably not exists');
}
await browser.close();
})();
案例五
xhr = 'http://www.yoursite.com/api'
await btn.click()
await page.waitForResponse(res => {
return res.request().url().startsWith(xhr) &&
res.ok()
})
xhr = '/api'
await btn.click()
await page.waitForResponse(res => {
return res.request().url().includes(xhr) &&
res.ok()
})