基本图示
柱状图
统计不同日期的新闻发布数
饼状图
统计不同作者编辑新闻的数量
折线图
关键词数量随日期的变化
词云
所有新闻内容jieba分词的词云展示
数据来源
项目一存入mysql的爬虫数据
建立网站
express --view=ejs echarts_site
在项目文件夹下执行命令行,ejs为一种模板引擎.
npm install mysql -save
安装mysql模块,并将其加入依赖项.
npm install
在echart_site文件夹内命令行运行
接下来重点讲jieba的安装
jieba
jieba需要c++,python,node-gyp,make环境支持
c++:
yum -y install gcc
yum -y install gcc-c++
检查是否安装:
gcc -v
g++ -v
测试:
touch test.cpp
vim test.cpp
建立一个cpp文件
#include <iostream>
int main(int argc,char * argv[])
{
std::cout << "running\n";
return 0;
}
按下ESC,输入:wq保存退出
cmd执行:
g++ -Wall test.cpp -o test
-Wall显示警告信息
-o则是输出成可执行文件
chmod u+x test
./test
结果出现running,g++正常运行.
python2.7
我采用宝塔面板,安装python项目管理器(其中包含python2.7.5)
具体参考项目一
make
yum install make -y
node-gyp
或者尝试在线安装:
npm install -g node-gyp
npm install -g node-pre-gyp
我们也可以用宝塔面板pm2管理器安装
jieba
进入echarts_site文件夹
cmd运行:
npm install nodejieba -save
实际操作
mysql.js
var mysql = require("mysql");
var pool = mysql.createPool({
host: '127.0.0.1',//本机连接,用127.0.0.1
user: 'root',//这里填你的账户名
password: 'root',//这里填你的密码
database: 'crawl'//贴你要链接的数据库
});
var query = function(sql, sqlparam, callback) {
pool.getConnection(function(err, conn) {
if (err) {
callback(err, null, null);
} else {
conn.query(sql, sqlparam, function(qerr, vals, fields) {
conn.release(); //释放连接
callback(qerr, vals, fields); //事件驱动回调
});
}
});
};
var query_noparam = function(sql, callback) {
pool.getConnection(function(err, conn) {
if (err) {
callback(err, null, null);
} else {
conn.query(sql, function(qerr, vals, fields) {
conn.release(); //释放连接
callback(qerr, vals, fields); //事件驱动回调
});
}
});
};
exports.query = query;
exports.query_noparam = query_noparam;
这里的内容不做讨论
不要忘了cmd运行:
node mysql.js
main.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Echarts图片</title>
<link rel="stylesheet" href="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/css/bootstrap.min.css">
<script src="https://cdn.staticfile.org/jquery/2.1.1/jquery.min.js"></script>
<script type="text/javascript" src="https://cdn.jsdelivr.net/npm/echarts@4.7.0/dist/echarts.min.js"></script>
<script src='javascripts/dist/echarts-wordcloud.min.js'></script>
<script src="javascripts/histogram.js" type="text/javascript"></script>
<script src="javascripts/pie.js" type="text/javascript"></script>
<script src="javascripts/line.js" type="text/javascript"></script>
<script src="javascripts/wordcloud.js" type="text/javascript"></script>
</head>
<body>
<div class="container">
<h4>Echarts绘图示例</h4>
<ul class="nav nav-tabs" role="tablist">
<li class="nav-item">
<a class="nav-link active" data-toggle="tab" aria-selected="true" onclick="histogramFun()">柱状图</a>
</li>
<li class="nav-item">
<a class="nav-link active" data-toggle="tab" aria-selected="true" onclick="pieFun()">饼状图</a>
</li>
<li class="nav-item">
<a class="nav-link active" data-toggle="tab" aria-selected="true" onclick="lineFun()">折线图</a>
</li>
<li class="nav-item">
<a class="nav-link active" data-toggle="tab" aria-selected="true" onclick="wordcloudFun()">词云</a>
</li>
</ul>
<!-- 所有的图片都绘制在main1位置-->
<span id="main1" style="width: 1000px;height:600px;"></span>
</div>
</body>
</html>
<head>
<meta charset="UTF-8">
<!--设置编码>
<title>Echarts图片</title>
<link rel="stylesheet" href="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/css/bootstrap.min.css">
<script src="https://cdn.staticfile.org/jquery/2.1.1/jquery.min.js"></script>
<script type="text/javascript" src="https://cdn.jsdelivr.net/npm/echarts@4.7.0/dist/echarts.min.js"></script>
<script src='javascripts/dist/echarts-wordcloud.min.js'></script>
<!--这里引用服务器外部的代码>
<script src="javascripts/histogram.js" type="text/javascript"></script>
<script src="javascripts/pie.js" type="text/javascript"></script>
<script src="javascripts/line.js" type="text/javascript"></script>
<script src="javascripts/wordcloud.js" type="text/javascript"></script>
<!--这里引用服务器内部的代码>
<!--参考下图,注意文件名的书写>
</head>
我们的js代码放在javascripts文件夹中
运行后会出现:
有4个选项,点击一个会触发相应的函数,转到相应的图示.
路由(routes)
index.js
var express = require('express');
var router = express.Router();
var mysql = require('../mysql.js');
var mywordcutModule = require('./wordcut.js');
var myfreqchangeModule = require('./freqchange.js');
/* GET home page. */
router.get('/', function(req, res, next) {
res.render('index', { title: 'Express' });
});
router.get('/histogram', function(request, response) {
//sql字符串和参数
var fetchSql = "select publish_date as x,count(publish_date) as y from fetches group by publish_date order by publish_date;";
mysql.query(fetchSql, function(err, result, fields) {
response.writeHead(200, {
"Content-Type": "application/json",
"Cache-Control": "no-cache, no-store, must-revalidate",
"Pragma": "no-cache",
"Expires": 0
});
response.write(JSON.stringify(result));
response.end();
});
});
router.get('/pie', function(request, response) {
//sql字符串和参数
var fetchSql = "select author as x,count(author) as y from fetches group by author;";
mysql.query(fetchSql, function(err, result, fields) {
response.writeHead(200, {
"Content-Type": "application/json",
"Cache-Control": "no-cache, no-store, must-revalidate",
"Pragma": "no-cache",
"Expires": 0
});
response.write(JSON.stringify(result));
response.end();
});
});
router.get('/line', function(request, response) {
//sql字符串和参数
var keyword = '疫情'; //也可以改进,接受前端提交传入的搜索词
var fetchSql = "select content,publish_date from fetches where content like'%"+keyword+"%' order by publish_date;";
mysql.query(fetchSql, function(err, result, fields) {
response.writeHead(200, {
"Content-Type": "application/json",
"Cache-Control": "no-cache, no-store, must-revalidate",
"Pragma": "no-cache",
"Expires": 0
});
response.write(JSON.stringify(myfreqchangeModule.freqchange(result,keyword)));
response.end();
});
});
router.get('/wordcloud', function(request, response) {
//sql字符串和参数
var fetchSql = "select content from fetches;";
mysql.query(fetchSql, function(err, result, fields) {
response.writeHead(200, {
"Content-Type": "application/json",
"Cache-Control": "no-cache, no-store, must-revalidate",
"Pragma": "no-cache",
"Expires": 0
});
response.write(JSON.stringify(mywordcutModule.wordcut(result)));//返回处理过的数据
response.end();
});
});
module.exports = router;
users.js
var express = require('express');
var router = express.Router();
/* GET users listing. */
router.get('/', function(req, res, next) {
res.send('respond with a resource');
});
module.exports = router;
wordcut.js
var nodejieba = require('nodejieba');
//正则表达式去掉一些无用的字符,与高频但无意义的词。
const regex = /[\t\s\r\n\d\w]|[\+\-\(\),\.。,!?《》@、【】"'::%-\/“”]/g;
var wordcut = function(vals) {
var word_freq = {};
vals.forEach(function (content){
var newcontent = content["content"].replace(regex,'');
if(newcontent.length !== 0){
// console.log();
var words = nodejieba.cut(newcontent);
words.forEach(function (word){
word = word.toString();
word_freq[word] = (word_freq[word] +1 ) || 1;
});
};
});
return word_freq;
};
exports.wordcut = wordcut;
freqchange.js
// 获取关键词 疫情 随日期变化的出现次数【折线图】
var nodejieba = require('nodejieba');
// var mysql = require('../mysql.js');
//正则表达式去掉一些无用的字符。
const regex_c = /[\t\s\r\n\d\w]|[\+\-\(\),\.。,!?《》@、【】"'::%-\/“”]/g;
var regex_d = /\w{3}\s(.*?) 2020/; //只留下日期的年月
var freqchange = function(vals, keyword) {
var regex_k = eval('/'+keyword+'/g');
var word_freq = {};
vals.forEach(function (data){
var content = data["content"].replace(regex_c,'');
var publish_date = regex_d.exec(data['publish_date'])[1];
var freq = content.match(regex_k).length;// 直接搜这个词。
word_freq[publish_date] = (word_freq[publish_date] + freq ) || 0;
});
return word_freq;
};
exports.freqchange = freqchange;
js动态处理
histogram.js
function histogramFun() {
$.get('/histogram', function (data) {
// var newdata = washdata(data);
let xdata=[], ydata=[], newdata;
var pattern = /\d{4}-(\d{2}-\d{2})/;
data.forEach(function (element){
// "x":"2020-04-28T16:00:00.000Z" ,对x进行处理,只取 月日
xdata.push(pattern.exec(element["x"])[1]);
ydata.push(element["y"]);
});
newdata = {"xdata":xdata, "ydata":ydata};
var myChart = echarts.init(document.getElementById('main1'));
// 指定图表的配置项和数据
var option = {
title: {
text: '新闻发布数 随时间变化'
},
tooltip: {},
legend: {
data: ['新闻发布数']
},
xAxis: {
data: newdata["xdata"]
},
yAxis: {},
series: [{
name: '新闻数目',
type: 'bar',
data: newdata["ydata"]
}]
};
// 使用刚指定的配置项和数据显示图表。
myChart.setOption(option);
});
}
line.js
function lineFun() {
$.get('/line', function (data) {
console.log(data);
var myChart = echarts.init(document.getElementById("main1"));
option = {
title: {
text: '"疫情"该词在新闻中的出现次数随时间变化图'
},
xAxis: {
type: 'category',
data: Object.keys(data)
},
yAxis: {
type: 'value'
},
series: [{
data: Object.values(data),
type: 'line',
itemStyle : { normal: {label : {show: true}}}
}],
};
if (option && typeof option === "object") {
myChart.setOption(option, true);
}
});
};
pie.js
var pieFun = function() {
$.get('/pie', function (data) {
// var newdata = washdata(data);
let newdata = [];
var pattern = /责任编辑:(.+)/;//匹配名字
data.forEach(function (element){
// "x": 责任编辑:李夏君 ,对x进行处理,只取 名字
newdata.push({name:pattern.exec(element["x"])[1],value:element["y"]});
});
var myChart = echarts.init(document.getElementById('main1'));
var app = {};
option = null;
// 指定图表的配置项和数据
var option = {
title: {
text: '作者发布新闻数量',
x: 'center'
},
tooltip: {
trigger: 'item',
formatter: "{a} <br/>{b} : {c} ({d}%)"
},
legend: {
orient: 'vertical',
left: 'left',
// data: ['直接访问', '邮件营销', '联盟广告', '视频广告', '搜索引擎']
},
series: [
{
name: '访问来源',
type: 'pie',
radius: '55%',
center: ['50%', '60%'],
data: newdata,
itemStyle: {
emphasis: {
shadowBlur: 10,
shadowOffsetX: 0,
shadowColor: 'rgba(0, 0, 0, 0.5)'
}
}
}
]
};
// myChart.setOption(option);
app.currentIndex = -1;
setInterval(function () {
var dataLen = option.series[0].data.length;
// 取消之前高亮的图形
myChart.dispatchAction({
type: 'downplay',
seriesIndex: 0,
dataIndex: app.currentIndex
});
app.currentIndex = (app.currentIndex + 1) % dataLen;
// 高亮当前图形
myChart.dispatchAction({
type: 'highlight',
seriesIndex: 0,
dataIndex: app.currentIndex
});
// 显示 tooltip
myChart.dispatchAction({
type: 'showTip',
seriesIndex: 0,
dataIndex: app.currentIndex
});
}, 1000);
if (option && typeof option === "object") {
myChart.setOption(option, true);
};
});
};
wordcloud.js
function wordcloudFun() {
$.get('/wordcloud', function (keywords) {
var mainContainer = document.getElementById('main1');
var chart = echarts.init(mainContainer);
var data = [];
for (var name in keywords) {
data.push({
name: name,
value: Math.sqrt(keywords[name])
})
}
var maskImage = new Image();
maskImage.src = './images/logo.png';
var option = {
title: {
text: '所有新闻内容 jieba分词 的词云展示'
},
series: [ {
type: 'wordCloud',
sizeRange: [12, 60],
rotationRange: [-90, 90],
rotationStep: 45,
gridSize: 2,
shape: 'circle',
maskImage: maskImage,
drawOutOfBound: false,
textStyle: {
normal: {
fontFamily: 'sans-serif',
fontWeight: 'bold',
// Color can be a callback function or a color string
color: function () {
// Random color
return 'rgb(' + [
Math.round(Math.random() * 160),
Math.round(Math.random() * 160),
Math.round(Math.random() * 160)
].join(',') + ')';
}
},
emphasis: {
shadowBlur: 10,
shadowColor: '#333'
}
},
data: data
} ]
};
maskImage.onload = function () {
// option.series[0].data = data;
chart.clear();
chart.setOption(option);
};
window.onresize = function () {
chart.resize();
};
// chart.setOption(option);
// window.onresize = chart.resize;
});
}
dist
测试部分
echarts_site文件夹下,cmd运行:
node /bin/www
或者
npm start
打开http://127.0.0.1:3000/main.html(使用云服务器请更改ip)
我们也可以将main.html重命名为index.html,这样可少输入main.html
non-MySQL errors
找到Parser.js,437行改为
console.error(err);
return false;
(这里只是示例,其实是看你运行结果报错在哪个文件,进入这个文件照这么改,就可以了)
(不保证一定适用)
cannot read property 1 of null:有可能发布日期没有拿到,exec出来的就是null
做一个!=null的判空即可(推荐在获取数据时就进行处理)
Uncaught SyntaxError: Invalid or unexpected token:
去除页面上中文符号,改成英文符号
展示
我的服务器带宽较小,反应有点慢。
更新:
关于favicon.ico 404问题:
禁止 favicon.ico 请求
关于wordcloud的形状,我们需要一个logo图片