之前用了cheerio ,selenium superagent 貌似都没有这次的这个厉害 这次就找了一个能获取js动态生成的元素的模块puppeteer 模块 ,附带一下我查api的网站:
这个还能获取iframe 里面的,废话不多说 ,直接上我写的简单例子吧(简单的获取网易云音乐评论):
index.js
const puppeteer = require('puppeteer');
(async () => {
let url = 'https://music.163.com/#/song?id=1338695683';
let browser = await puppeteer.launch();
let page = await browser.newPage();
await page.goto(url);
// 获取歌单的iframe
let iframe = await page.frames().find(f => f.name() === 'contentFrame');
let data = await iframe.evaluate(function () {
let list_n = document.querySelectorAll('.cmmts .itm .cnt a');
let list_d = document.querySelectorAll('.cmmts .itm .cnt');
let list_i = document.querySelectorAll('.cmmts .itm .head img');
let data = [];
for (let i = 0; i < list_n.length; i++) {
let name = list_n[i].innerText;
let icon = list_i[i].getAttribute('src');
data.push({
icon:icon.replace(/\/\r\n/g,'').trim(),
name: name.trim(),
desc: list_d[i].innerText.replace(name+':',' ').replace(/\/\r\n/g,'').trim(),
})
}
return data;
});
console.log('data', data);
})();