chrome headless爬取http访问记录,代码如下
const Puppeteer = require("puppeteer");
(async () => {
const browser = await Puppeteer.launch({
headless: true
}).catch(() => browser.close);
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', request => {
console.log(request.url());
request.continue();
});
page.on('response', response => {
console.log(response.url());
});
page.on('requestfailed', request => {
console.log(request.url());
});
page.on('requestfinished', request => {
console.log(request.url());
});
await page.goto('https://www.baidu.com').catch(() => browser.close);
//await page.waitFor(500);
await browser.close();
})()
以上是一个简单的例子;
基于以上思路,实现静态资源、api爬取,然后做URL相似度分析存储,供漏洞扫描使用,迭代中,敬请期待。