一、爬虫
//生成一个npm的仓库
npm init -y
//安装nodemon
npm i nodemon -S
npm i puppeteer -S
全局安装yarn
npm i yarn -g
//yarn是facebook出的一个node.js的包原理方案,相对会快一些
yarn config set registry https://registry.npm.taobao.org
二、爬虫的代码
npm i koa koa-router koa2-cors -S
//package.json
{
"name": "server",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"start": "nodemon index.js",
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"nodemon": "^2.0.4",
"puppeteer": "^3.3.0"
}
}
//配置index.js
const run = require('./reptile/db');
const koa = require("koa");
const Router = require('koa-router');
const router = new Router();
var cors = require('koa2-cors');
const app = new koa();
router.get("/",async ctx=>{
var data = await run();
ctx.body = data;
})
app.use(cors());
app.use(router.routes())
app.listen(8080)
//爬取堆糖
const puppeteer = require("puppeteer");
const url = "https://www.duitang.com/"
async function run() {
const browser = await puppeteer.launch()
const page = await browser.newPage();
await page.goto(url, {
waitUntil: 'networkidle2'
})
await page.waitFor(2000)
await page.hover(".dynamic-feed-item")
let res = await page.evaluate(() => {
var $ = window.jQuery
var items = $(".dynamic-feed-item")
// .user-info .avatar --src属性 $(".user-info .avatar").attr("src")
// .user-info .username $(".user-info .username").html()
var links = [];
if(items.length>0){
items.each((index,item)=>{
let avatar = $(item).find(".user-info .avatar").attr("src");
let username = $(item).find(".user-info .username").html();
links.push({
avatar:avatar,
username:username
})
})
}
return links;
})
console.log(res);
browser.close();
return res;
}
run();
// module.exports = run;
npm start
访问:http://localhost:8080/