puppeteer官网
如下图(爬取掘金的文章标题):
主要是以下代码:
index.js
const puppeteer = require('puppeteer')
;(async () => {
const browser = await puppeteer.launch({
ignoreDefaultArgs: ['--enable-automation'], // 破解个别反爬虫
})
const page = await browser.newPage()
await page.goto('https://juejin.im', {
waitUntil: 'networkidle2',
})
let url_list = await page.$$eval('.title-row', (arts) => {
let list = []
for (let i = 0; i < arts.length; i++) {
let item = arts[i]
let title = item.querySelector('.title').innerHTML
list.push(title)
}
return list
})
console.log(url_list)
await browser.close()
})()
package.json
{
"name": "demo_1",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"puppeteer": "^3.1.0"
}
}
puppeteer
完整代码分享:reptile-juejin