const puppeteer = require("puppeteer");
const fs = require("fs-extra");
const jschardet = require("jschardet"); // 判断文件编码
const url = "https://www.yibaixun.com/";
(async () => {
const browser = await puppeteer.launch({
headless: true,
// slowMo: 250,
userDataDir: "./data",
});
const page = await browser.newPage();
// 拦截器
// await page.setRequestInterception(true);
// page.on("request", (request) => {
// console.log(request.continue())
// // if (request.resourceType() === "document") {
// // request.continue();
// // } else {
// // request.abort();
// // }
// });
// 响应事件
page.on("response", async (response) => {
const file = './www/';
const responseUrl = response.url();
const fileType = response.headers()['content-type'];
const text = await response.buffer();
const encoding = jschardet.detect(text).encoding;
if (encoding == 'UTF-8') {
console.log('UTF-8=', responseUrl);
}
if (encoding == 'ascii') {
console.log('ascii=', responseUrl);
}
if (encoding == 'ISO-8859-2') {
console.log('ISO-8859-2=', responseUrl);
}
if (encoding == 'windows-1252') {
console.log('windows-1252=', responseUrl);
}
// console.log('ma=', ma);
let name = responseUrl.match(new RegExp(`${url}(\\S*)`));
if (name == null) {
const oss = 'https://oss.yibaixun.com/';
name = responseUrl.match(new RegExp(`${oss}(\\S*)`))[1];
} else {
name = name[1];
}
// console.log('name=', name);
if (fileType == 'text/html') {
fs.outputFileSync(`${file}index.html`, text);
} else {
fs.outputFileSync(`${file}${name}`, text);
}
});
const data = await page.goto(url);
// await fse.outputFile(`www`, await data.buffer()); //下载到你想要的路径
// console.log('data=', data)
const c = await page.screenshot({ path: "example.png" });
// console.log('c=', c)
const dimensions = await page.evaluate(() => {
return {
width: document.documentElement.clientWidth,
height: document.documentElement.clientHeight,
deviceScaleFactor: window.devicePixelRatio,
};
});
// console.log('Dimensions:', dimensions);
// let content = await page.content()
// console.log(content);
// page.on('console', msg => console.log('PAGE LOG:', ...msg.args));
// await page.evaluate(() => console.log(`url is ${location.href}`));
// 保存 html 文件
const html = await page.content();
const file = "./index.html";
fs.outputFileSync(file, html);
await browser.close();
})();
puppeteer下载网页
于 2022-04-29 22:39:19 首次发布