期末作业 爬虫数据展示网站
要求
用Angular.js+express实现。
由于nodejieba无法安装,采用老师提供的新方法,
改进后就没有模块nodejieba找不到的问题了
正式开始
1.建立2个mysql表,保存 用户信息&操作日志
--之前的新闻数据表
CREATE TABLE `fetches` (
`id_fetches` int(11) NOT NULL AUTO_INCREMENT,
`url` varchar(200) DEFAULT NULL,
`source_name` varchar(200) DEFAULT NULL,
`source_encoding` varchar(45) DEFAULT NULL,
`title` varchar(200) DEFAULT NULL,
`keywords` varchar(200) DEFAULT NULL,
`author` varchar(200) DEFAULT NULL,
`publish_date` date DEFAULT NULL,
`crawltime` datetime DEFAULT NULL,
`content` longtext,
`createtime` datetime DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id_fetches`),
UNIQUE KEY `id_fetches_UNIQUE` (`id_fetches`),
UNIQUE KEY `url_UNIQUE` (`url`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
--创建用户信息数据表
CREATE TABLE `crawl`.`user` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
`username` VARCHAR(45) NOT NULL,
`password` VARCHAR(45) NOT NULL,
`registertime` datetime DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
UNIQUE KEY `username_UNIQUE` (`username`))
ENGINE=InnoDB DEFAULT CHARSET=utf8;
--记录用户的登陆,查询(具体查询语句)操作
CREATE TABLE `crawl`.`user_action` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
`username` VARCHAR(45) NOT NULL,
`request_time` VARCHAR(45) NOT NULL,
`request_method` VARCHAR(20) NOT NULL,
`request_url` VARCHAR(300) NOT NULL,
`status` int(4),
`remote_addr` VARCHAR(100) NOT NULL,
PRIMARY KEY (`id`))
ENGINE=InnoDB DEFAULT CHARSET=utf8;
2.建立一个新项目,建立连接mysql配置文件
module.exports = {
mysql: {
host: 'localhost',
user: 'root',
password: 'root',
database:'crawl',
// 最大连接数,默认为10
connectionLimit: 10
}
};
3.注册登录界面
要求:
登录页面的代码
<link rel="stylesheet" type="text/css" href="stylesheets/index.css">
<script type="text/javascript" src="javascripts/index.js"></script>
<script>
var app = angular.module('login', []);
app.controller('loginCtrl', function ($scope, $http, $timeout) {
// 登录时,检查用户输入的账户密码是否与数据库中的一致
$scope.check_pwd = function () {
var data = JSON.stringify({
username: $scope.username,
password: $scope.password
});
$http.post("/users/login", data)
.then(
function (res) {
if(res.data.msg=='ok') {
window.location.href='/news.html';
}else{
$scope.msg=res.data.msg;
}
},
function (err) {
$scope.msg = err.data;
});
};
登陆页路由
router.post('/login', function(req, res) {
var username = req.body.username;
var password = req.body.password;
// var sess = req.session;
userDAO.getByUsername(username, function (user) {
if(user.length==0){
res.json({msg:'用户不存在!请检查后输入'});
}else {
if(password===user[0].password){
req.session['username'] = username;
res.cookie('username', username);
res.json({msg: 'ok'});
// res.json({msg:'ok'});
}else{
res.json({msg:'用户名或密码错误!请检查后输入'});
}
}
});
});
注册新用户
router.post('/register', function (req, res) {
var add_user = req.body;
// 先检查用户是否存在
userDAO.getByUsername(add_user.username, function (user) {
if (user.length != 0) {
// res.render('index', {msg:'用户不存在!'});
res.json({msg: '用户已存在!'});
}else {
userDAO.add(add_user, function (success) {
res.json({msg: '成功注册!请登录'});
})
}
});
});
要求保存session信息,不然记录用户操作日志时,不知道是哪位用户进行的操作。
所以来设置session,
app.use(session({
secret: 'sessiontest',//与cookieParser中的一致
resave: true,
saveUninitialized: false, // 是否保存未初始化的会话
cookie : {
maxAge : 1000 * 60 * 60, // 设置 session 的有效时间,单位毫秒
},
}));
3.实现注册页面
<form id="register-form" method="post" role="form" style="display: none;">
<div class="form-group">
<input ng-model="add_username" tabindex="1" class="form-control" placeholder="Username" value=""/>
</div>
<div class="form-group">
<input type="password" ng-model="add_password" tabindex="2" class="form-control" placeholder="Password">
</div>
<div class="form-group">
<input type="password" ng-model="confirm_password" tabindex="2" class="form-control" placeholder="Confirm Password">
</div>
<div class="form-group">
<div class="row">
<div class="col-sm-6 col-sm-offset-3">
<button tabindex="4" class="form-control btn btn-register" ng-click="doAdd()">Register Now</button>
</div>
</div>
</div>
</form>
注册页的js代码
//增加注册用户
$scope.doAdd = function () {
// 检查用户注册时,输入的两次密码是否一致
if($scope.add_password!==$scope.confirm_password){
// $timeout(function () {
// $scope.msg = '两次密码不一致!';
// },100);
$scope.msg = '两次密码不一致!';
}
else {
var data = JSON.stringify({
username: $scope.add_username,
password: $scope.add_password
});
$http.post("/users/register", data)
.then(function (res) {
if(res.data.msg=='成功注册!请登录') {
$scope.msg=res.data.msg;
$timeout(function () {
window.location.href='index.html';
},2000);
} else {
$scope.msg = res.data.msg;
}
}, function (err) {
$scope.msg = err.data;
});
}
};
注册页路由
/* add users */
router.post('/register', function (req, res) {
var add_user = req.body;
// 先检查用户是否存在
userDAO.getByUsername(add_user.username, function (user) {
if (user.length != 0) {
// res.render('index', {msg:'用户不存在!'});
res.json({msg: '用户已存在!'});
}else {
userDAO.add(add_user, function (success) {
res.json({msg: '成功注册!请登录'});
})
}
});
});
退出登录(退出时,注意清除session)
// 退出登录
router.get('/logout', function(req, res, next){
// 备注:这里用的 session-file-store 在destroy 方法里,并没有销毁cookie
// 所以客户端的 cookie 还是存在,导致的问题 --> 退出登陆后,服务端检测到cookie
// 然后去查找对应的 session 文件,报错
// session-file-store 本身的bug
req.session.destroy(function(err) {
if(err){
res.json('退出登录失败');
return;
}
// req.session.loginUser = null;
res.clearCookie('username');
res.json({result:'/index.html'});
});
});
module.exports = router;
登录注册页面展示
用Google打开登陆页面
文件cmd运行打开终端,net start操作后
再浏览器进入登陆页面http://localhost:3000/
新用户注册,注册成功后显示成功注册,跳转到登陆页面
登录成功后跳转到news页面
4.实现查询词支持布尔表达式
首先在news.html引入查询页面
<div ng-include="'search.html'"></div>
查询页面的前端页面
<form class="form-horizontal" role="form">
<div class="row" style="margin-bottom: 10px;">
<label class="col-lg-2 control-label">标题关键字</label>
<div class="col-lg-3">
<input type="text" class="form-control" placeholder="标题关键字" ng-model="$parent.title1">
</div>
<div class="col-lg-1">
<select class="form-control" autocomplete="off" ng-model="$parent.selectTitle">
<option selected="selected">AND</option>
<option>OR</option>
</select>
</div>
<div class="col-lg-3">
<input type="text" class="form-control" placeholder="标题关键字" ng-model="$parent.title2">
</div>
</div>
<div class="row" style="margin-bottom: 10px;">
<label class="col-lg-2 control-label">内容关键字</label>
<div class="col-lg-3">
<input type="text" class="form-control" placeholder="内容关键字" ng-model="$parent.content1">
</div>
<div class="col-lg-1">
<select class="form-control" autocomplete="off" ng-model="$parent.selectContent">
<option selected="selected">AND</option>
<option>OR</option>
</select>
</div>
<div class="col-lg-3">
<input type="text" class="form-control" placeholder="内容关键字" ng-model="$parent.content2">
</div>
</div>
<div class="form-group">
<div class="col-md-offset-9">
<button type="submit" class="btn btn-default" ng-click="search()">查询</button>
</div>
</div>
</form>
查询页面的js代码
查询数据 排序按发表时间排
// 查询数据
$scope.search = function () {
var title1 = $scope.title1;
var title2 = $scope.title2;
var selectTitle = $scope.selectTitle;
var content1 = $scope.content1;
var content2 = $scope.content2;
var selectContent = $scope.selectContent;
var sorttime = $scope.sorttime;
// 检查用户传的参数是否有问题
//用户有可能这样输入:___ and/or 新冠(直接把查询词输在了第二个位置)
if(typeof title1=="undefined" && typeof title2!="undefined" && title2.length>0){
title1 = title2;
}
if(typeof content1=="undefined" && typeof content2!="undefined" && content2.length>0){
content1 = content2;
}
// 用户可能一个查询词都不输入,默认就是查找全部数据
var myurl = `/news/search?t1=${title1}&ts=${selectTitle}&t2=${title2}&c1=${content1}&cs=${selectContent}&c2=${content2}&stime=${sorttime}`;
$http.get(myurl).then(
function (res) {
if(res.data.message=='data'){
$scope.isisshowresult = true; //显示表格查询结果
// $scope.searchdata = res.data;
$scope.initPageSort(res.data.result)
}else {
window.location.href=res.data.result;
}
},function (err) {
$scope.msg = err.data;
});
};
查询页路由
router.get('/search', function(request, response) {
console.log(request.session['username']);
//sql字符串和参数
if (request.session['username']===undefined) {
// response.redirect('/index.html')
response.json({message:'url',result:'/index.html'});
}else {
var param = request.query;
newsDAO.search(param,function (err, result, fields) {
response.json({message:'data',result:result});
})
}
});
使用newsDAO.search函数
search :function(searchparam, callback) {
// 组合查询条件
var sql = 'select * from fetches ';
if(searchparam["t2"]!="undefined"){
sql +=(`where title like '%${searchparam["t1"]}%' ${searchparam['ts']} title like '%${searchparam["t2"]}%' `);
}else if(searchparam["t1"]!="undefined"){
sql +=(`where title like '%${searchparam["t1"]}%' `);
};
if(searchparam["t1"]=="undefined"&&searchparam["t2"]=="undefined"&&searchparam["c1"]!="undefined"){
sql+='where ';
}else if(searchparam["t1"]!="undefined"&&searchparam["c1"]!="undefined"){
sql+='and ';
}
if(searchparam["c2"]!="undefined"){
sql +=(`content like '%${searchparam["c1"]}%' ${searchparam['cs']} content like '%${searchparam["c2"]}%' `);
}else if(searchparam["c1"]!="undefined"){
sql +=(`content like '%${searchparam["c1"]}%' `);
}
if(searchparam['stime']!="undefined"){
if(searchparam['stime']=="1"){
sql+='ORDER BY publish_date ASC ';
}else {
sql+='ORDER BY publish_date DESC ';
}
}
sql+=';';
pool.getConnection(function(err, conn) {
if (err) {
callback(err, null, null);
} else {
conn.query(sql, function(qerr, vals, fields) {
conn.release(); //释放连接
callback(qerr, vals, fields); //事件驱动回调
});
}
});
},
显示查询结果的设置
<!--显示查询结果-->
<div ng-show="isisshowresult">
<table class="table table-striped">
<thead>
<tr>
<td>序号</td>
<td>标题</td>
<td>作者</td>
<!-- <td>内容</td>-->
<td>关键词</td>
<td>链接</td>
<td>发布时间</td>
</tr>
</thead>
<tbody>
<tr ng-repeat="(key, item) in items">
<td>{{index+key}}</td>
<td>{{item.title}}</td>
<td>{{item.author}}</td>
<!-- <td>{{item.content}}</td>-->
<td>{{item.keywords}}</td>
<td>{{item.url}}</td>
<td>{{item.publish_date}}</td>
</tr>
</tbody>
</table>
5.布尔查询结果分页设置
使用了AngularJs分页的实现:不需要后台配合,前台一次性拿完所有数据,然后进行分页展示;这种方式只是为了界面上对用户更友好,并没有实际提升页面的效率(数据量过大时页面加载压力比较大)
查询结果展示,以期中作业爬的澎湃新闻网数据为来源
6.用Echarts实现4个数据分析图表
用词云展示数据
$scope.wordcloud = function () {
$scope.isShow = false;
$http.get("/news/wordcloud").then(
function (res) {
if(res.data.message=='url'){
window.location.href=res.data.result;
}else {
var mainContainer = document.getElementById('main1');
var chart = echarts.init(mainContainer);
var data = [];
for (var name in res.data.result) {
data.push({
name: name,
value: Math.sqrt(res.data.result[name])
})
}
var maskImage = new Image();
maskImage.src = './images/go.png';
var option = {
title: {
text: '所有新闻内容 的词云展示'
},
series: [{
type: 'wordCloud',
sizeRange: [12, 60],
rotationRange: [-90, 90],
rotationStep: 45,
gridSize: 2,
shape: 'circle',
maskImage: maskImage,
drawOutOfBound: false,
textStyle: {
normal: {
fontFamily: 'sans-serif',
fontWeight: 'bold',
// Color can be a callback function or a color string
color: function () {
// Random color
return 'rgb(' + [
Math.round(Math.random() * 160),
Math.round(Math.random() * 160),
Math.round(Math.random() * 160)
].join(',') + ')';
}
},
emphasis: {
shadowBlur: 10,
shadowColor: '#333'
}
},
data: data
}]
};
maskImage.onload = function () {
// option.series[0].data = data;
chart.clear();
chart.setOption(option);
};
window.onresize = function () {
chart.resize();
};
}
});
}
});
展示结果
用柱状图展示结果
$scope.histogram = function () {
$scope.isShow = false;
$http.get("/news/histogram")
.then(
function (res) {
if(res.data.message=='url'){
window.location.href=res.data.result;
}else {
// var newdata = washdata(data);
let xdata = [], ydata = [], newdata;
var pattern = /\d{4}-(\d{2}-\d{2})/;
res.data.result.forEach(function (element) {
// "x":"2020-04-28T16:00:00.000Z" ,对x进行处理,只取 月日
xdata.push(pattern.exec(element["x"])[1]);
ydata.push(element["y"]);
});
newdata = {"xdata": xdata, "ydata": ydata};
var myChart = echarts.init(document.getElementById('main1'));
// 指定图表的配置项和数据
var option = {
title: {
text: '新闻发布数 随时间变化'
},
tooltip: {},
legend: {
data: ['新闻发布数']
},
xAxis: {
data: newdata["xdata"]
},
yAxis: {},
series: [{
name: '新闻数目',
type: 'bar',
data: newdata["ydata"]
}]
};
// 使用刚指定的配置项和数据显示图表。
myChart.setOption(option);
}
},
function (err) {
$scope.msg = err.data;
});
};
结果
用饼图展示结果
$scope.pie = function () {
$scope.isShow = false;
$http.get("/news/pie").then(
function (res) {
if(res.data.message=='url'){
window.location.href=res.data.result;
}else {
let newdata = [];
var pattern = /责任编辑:(.+)/;
res.data.result.forEach(function (element) {
newdata.push({name: pattern.exec(element["x"]), value: element["y"]});
});
var myChart = echarts.init(document.getElementById('main1'));
var app = {};
option = null;
var option = {
title: {
text: '中国新冠疫情控制情况',
y: 'top',
itemGap: 30,
backgroundColor: '#EEE',
textStyle: {
fontSize: 26,
fontWeight: 'bolder',
color: '#FA5882'
},
subtextStyle: {
fontSize: 18,
color: '#FFFFFF'
}
},
legend: {
y: 'center',
itemWidth: 20,
itemHeight: 14,
textStyle: {
color: '#666'
},
itemGap: 30,
backgroundColor: '#eee',
data: ['北京','上海','⼴东','深圳','四川','重庆']
},
series: [
{
name: '去向',
type: 'pie',
radius: ['25%', '50%'],
center: ['18%', '40%'],
data: [
{value:13, name:'北京'},
{value:54, name:'上海'},
{value:158, name:'⼴东'},
{value:44, name:'深圳'},
{value:45, name:'四川'},
{value:1, name:'重庆'}
], itemStyle: {
emphasis: {
shadowBlur: 10,
shadowOffsetX: 0,
shadowColor: 'rgba(30, 144, 255,0.5)'
}
},
labelLine: {
normal: {
show: false
}
},
label: {
normal: {
position: 'inner',
formatter: '{c}'
}
}
}
],
tooltip: {
trigger: 'item',
showDelay: 20,
hideDelay: 20,
backgroundColor: 'rgba(255,0,0,0.7)',
textStyle: {
fontSize: '14px',
color: '#000'
},
formatter: '{a} <br/>{b} : {c}个 ({d}%)'
},
color: ['#0080FF', '#01DFD7', '#D0A9F5', '#E6E6E6', '#F6D8CE', '#F78181']
};
app.currentIndex = -1;
setInterval(function () {
var dataLen = option.series[0].data.length;
myChart.dispatchAction({
type: 'downplay',
seriesIndex: 0,
dataIndex: app.currentIndex
});
app.currentIndex = (app.currentIndex + 1) % dataLen;
myChart.dispatchAction({
type: 'highlight',
seriesIndex: 0,
dataIndex: app.currentIndex
});
myChart.dispatchAction({
type: 'showTip',
seriesIndex: 0,
dataIndex: app.currentIndex
});
}, 1000);
if (option && typeof option === "object") {
myChart.setOption(option, true);
}; }
});
}
结果
用折线图展示结果
$scope.line = function () {
$scope.isShow = false;
$http.get("/news/line").then(
function (res) {
if(res.data.message=='url'){
window.location.href=res.data.result;
}else {
var myChart = echarts.init(document.getElementById("main1"));
option = {
title: {
text: '"疫情"该词在新闻中的出现次数随时间变化图'
},
xAxis: {
type: 'category',
data: Object.keys(res.data.result)
},
yAxis: {
type: 'value'
},
series: [{
data: Object.values(res.data.result),
type: 'line',
itemStyle: {normal: {label: {show: true}}}
}],
};
if (option && typeof option === "object") {
myChart.setOption(option, true);
}
}
});
};
实例中折线图不知道为什么无法显示……
7.用户注册、登录、查询等操作记入数据库中的日志
直接在app.js中引入morgan包,借助中间件保存的信息
let method = '';
app.use(logger(function (tokens, req, res) {
console.log('打印的日志信息:');
var request_time = new Date();
var request_method = tokens.method(req, res);
var request_url = tokens.url(req, res);
var status = tokens.status(req, res);
var remote_addr = tokens['remote-addr'](req, res);
if(req.session){
var username = req.session['username']||'notlogin';
}else {
var username = 'notlogin';
}
MySQL中的表结果展示
1,保存的用户信息
2.保存的操作日志
END
这次web作业相比期中来说难了很多,做作业的过程中真的觉得自己还要非常多的地方需要学习,真的特别感谢老师助教和同学的帮助。