使用nodejs抓取 https://themeforest.net 网页模版预览放入mongoose。
以后找模版就不用那么费劲了,我是太懒了。。。
执行图片:
代码:
var request = require('request');
var cheerio = require('cheerio');
var mongoose = require('mongoose');
var Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/theme');
var Theme = new Schema({
text: String,
url: String,
img: String
});
var ThemeModel = mongoose.model('Themes', Theme);
var urlPrefix = 'https://themeforest.net/';
var baseUrl = 'https://themeforest.net/category/site-templates/creative?page=';
var currentPage = 1;
var countPage = 60; // 抓取网页总页数
//延时
var itemCount = 0;
var itemLoad = 0;
function getPageList(page) {
if (page > countPage) {
console.log('所有数据加载完毕!');
process.exit(0);
}
console.log(`当前加载第 ${page} 页数据`);
request(baseUrl + page, function(err, response, body) {
if (!err && response.statusCode == 200) {
analysisPage(body);
} else {
console.log('get page error url => ' + baseUrl + page, err);
}
});
}
getPageList(currentPage);
function analysisPage(body) {
var items;
var url;
var $ = cheerio.load(body);
// 抽出列表 li
items = $('.js-google-analytics__list-event-container');
itemCount = items.length;
items.map(function(i, item) {
item = $(item);
var linkA = item.find('.js-google-analytics__list-event-trigger.t-link');
var text = linkA.text();
var url = linkA.attr('href');
var img = item.find('.landscape-image-magnifier').attr('data-preview-url');
saveFile(
urlPrefix + url,
text,
img
);
});
}
function saveFile(url, text, img) {
var theme = new ThemeModel({
url: url,
text: text,
img: img
});
theme.save(function(err) {
if (err) {
console.log('save mongoose err! ');
} else {
itemLoad++;
if (itemLoad === itemCount) {
// 已保存完数据清空当前加载数
itemLoad = 0;
getPageList(++currentPage);
}
}
});
}
package.json
{
"name": "theme",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"cheerio": "^0.22.0",
"mongoose": "^4.7.0",
"request": "^2.79.0"
}
}
更新一个前端查看页面:
var express = require('express');
var app = express();
var mongoose = require('mongoose');
var Schema = mongoose.Schema;
var Theme = new Schema({
text: String,
url: String,
img: String
});
var ThemeModel = mongoose.model('Themes', Theme);
var html = `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>themes</title>
</head>
<body>
%{body}
</body>
</html>
`;
mongoose.connect('mongodb://localhost/theme');
app.get('/', function(req, res) {
var body = '<div>';
var responseText = '';
res.set('Content-Type', 'text/html');
ThemeModel.find({}, function(err, items) {
items.map(function(item, i) {
body += [
`<img src="${item.img}" />`,
`<p><a href="${item.url}" target="_blank">${item.text}</a></p>`
].join('');
});
body += '</div>';
responseText = html.replace('%{body}', body);
res.send(responseText);
});
});
var server = app.listen(3000, function() {
var host = server.address().address;
var port = server.address().port;
console.log('Example app listening at http://%s:%s', host, port);
});