const puppeteer = require('puppeteer');
const fs = require("fs");
let scrape = async () => {
// headless为true时不会弹出浏览器
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
for (var p = 1; p < 33; p++) {
try {
await page.goto('https://www.91dub.com/list?page=' + p, {
timeout: 10000,
waitUntil: 'networkidle2'
});
await page.waitFor('ul');
let length = await page.evaluate(() => {
return document.querySelectorAll('#app > div > ul > li').length;
});
let result;
for (var i = 1; i <= length; i++) {
try {
await page.waitFor(500);
await page.click('#app > div > ul > li:nth-child(' + i + ') > div > a');
await page.waitFor('.btn');
let len = await page.evaluate(() => {
return document.querySelectorAll('#app > div > div.episode > ul > li').length;
});
for (var j = 1; j <= len; j++) {
try {
result = await page.evaluate(() => {
let href = document.querySelector('#app > div > div.episode > div.btn > a').getAttribute("href");
return href
});
fs.appendFile('url', result + '\n', function (err) {
if (err) {
return console.error(err);
}
});
if (j == len) {
break
}
await page.click('#app > div > div.episode > ul > li:nth-child(' + (j + 1) + ') > span');
} catch (e) {
console.log('error:', e);
}
}
await page.goto('https://www.91dub.com/list?page=' + p, {
timeout: 10000,
waitUntil: 'networkidle2'
});
await page.waitFor('ul');
} catch (e) {
console.log('error:', e);
}
}
} catch (e) {
console.log('https://www.91dub.com/list?page= ' + p + ':', e);
}
}
browser.close();
return 'success';
};
scrape().then((value) => {
console.log(value); // Success!
});
官方参考文档:https://zhaoqize.github.io/puppeteer-api-zh_CN/#/class-Page?id=pagegobackoptions
简化版文档:http://blog.xcatliu.com/2018/09/18/puppeteer_tutorial/
nodeJS相关:https://www.runoob.com/nodejs/nodejs-fs.html