基本要求:
1. 用户可注册登录网站,非注册用户不可登录查看数据
2. 用户注册、登录、查询等操作记入数据库中的日志
3. 爬虫数据查询结果列表支持分页和排序
4. 用Echarts或者D3实现3个以上的数据分析图表展示在网站中
扩展要求:
实现对爬虫数据中文分词的查询
实现查询结果按照主题词打分的排序
所需模块:angular.js express mysql
1.代码分析
一.数据库
(1)先在数据库crawl中创建两个表user和user_action
user用于存储注册用户的用户名、密码以及注册时间
user_action用于存储用户注册、登录、查询等操作
CREATE TABLE `crawl`.`user` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
`username` VARCHAR(45) NOT NULL,
`password` VARCHAR(45) NOT NULL,
`registertime` datetime DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
UNIQUE KEY `username_UNIQUE` (`username`))
ENGINE=InnoDB DEFAULT CHARSET=utf8;
--记录用户的登陆,查询(具体查询语句)操作
CREATE TABLE `crawl`.`user_action` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
`username` VARCHAR(45) NOT NULL,
`request_time` VARCHAR(45) NOT NULL,
`request_method` VARCHAR(20) NOT NULL,
`request_url` VARCHAR(300) NOT NULL,
`status` int(4),
`remote_addr` VARCHAR(100) NOT NULL,
PRIMARY KEY (`id`))
ENGINE=InnoDB DEFAULT CHARSET=utf8;
(2)创建连接数据库的配置文件mysqlConf.js
module.exports = {
mysql: {
host: 'localhost',
user: 'root',
password: 'root',
database:'crawl',
// 最大连接数,默认为10
connectionLimit: 10
}
};
二.登录操作的实现
需包含功能:提示用户不存在 输入用户名或密码错误
1.前端 index.html
登录页面
这段代码为登录页面建立了用户输入用户名与密码的位置以及登录提交的按键
<link rel="stylesheet" type="text/css" href="stylesheets/index.css">
引入自己喜欢的css样式,效果如下
angular代码
var app = angular.module('login', []);
app.controller('loginCtrl', function ($scope, $http, $timeout) {
// 登录时,检查用户输入的账户密码是否与数据库中的一致
$scope.check_pwd = function () {
var data = JSON.stringify({
username: $scope.username,
password: $scope.password
});
$http.post("/users/login", data)
.then(
function (res) {
if (res.data.msg == 'ok') {
window.location.href = '/news.html';
} else {
$scope.msg = res.data.msg;
}
},
function (err) {
$scope.msg = err.data;
});
定义check_pwd函数,将用户输入的用户名与密码获取之后检查用户是否已注册过,若已注册,则返回ok,window对象的location.href改变,将页面跳转到news.html,若还未注册,则显示警告
路由代码 routes/users.js
var userDAO = require('../dao/userDAO');
router.post('/login', function(req, res) {
var username = req.body.username;
var password = req.body.password;
// var sess = req.session;
userDAO.getByUsername(username, function (user) {
if(user.length==0){
res.json({msg:'用户不存在!请检查后输入'});
}else {
if(password===user[0].password){
req.session['username'] = username;
res.cookie('username', username);
res.json({msg: 'ok'});
// res.json({msg:'ok'});
}else{
res.json({msg:'用户名或密码错误!请检查后输入'});
}
}
});
});
将用户输入的用户名传入数据库,然后获取相应的密码,如果输入用户名为空,则显示“用户不存在!请检查后输入”,如果输入用户名存在于数据库,则将数据库内对应的密码与用户输入的密码进行比较,如果一致则返回ok,不一致则显示警告“用户名或密码错误!请检查后输入”
三.注册操作的实现
1.前端页面 index.html
建立用户输入用户名、密码以及二次确认密码的表格与用户注册提交的按键,由于注册页的代码与登录页的代码在同一html文件,之前已经引入过css文件,因此不用再引入新的css文件
angular代码
$scope.doAdd = function () {
// 检查用户注册时,输入的两次密码是否一致
if ($scope.add_password !== $scope.confirm_password) {
// $timeout(function () {
// $scope.msg = '两次密码不一致!';
// },100);
$scope.msg = '两次密码不一致!';
}
else {
var data = JSON.stringify({
username: $scope.add_username,
password: $scope.add_password
});
$http.post("/users/register", data)
.then(function (res) {
if (res.data.msg == '成功注册!请登录') {
$scope.msg = res.data.msg;
$timeout(function () {
window.location.href = 'index.html';
}, 2000);
} else {
$scope.msg = res.data.msg;
}
}, function (err) {
$scope.msg = err.data;
});
}
};
});
定义doAdd函数,获取用户输入的用户名以及两次输入的密码,先对两次密码输入进行检查,若两次密码输入不同,则返回“两次密码不一致”,若两次密码一致,将用户输入的用户名和密码存入数据库并返回“成功注册!请登录”,window对象的location.href属性改变,页面跳转到index.html
路由代码 routes/users.js
router.post('/register', function (req, res) {
var add_user = req.body;
// 先检查用户是否存在
userDAO.getByUsername(add_user.username, function (user) {
if (user.length != 0) {
// res.render('index', {msg:'用户不存在!'});
res.json({msg: '用户已存在!'});
}else {
userDAO.add(add_user, function (success) {
res.json({msg: '成功注册!请登录'});
})
}
});
});
// 退出登录
router.get('/logout', function(req, res, next){
req.session.destroy(function(err) {
if(err){
res.json('退出登录失败');
return;
}
// req.session.loginUser = null;
res.clearCookie('username');
res.json({result:'/index.html'});
});
});
检查用户是否存在,若已存在则无需再注册,若不存在,则提示注册成功
四.用户注册登录及查询操作存入数据库
这步操作需要在app.js中实现
app.use(session({
secret: 'sessiontest',//与cookieParser中的一致
resave: true,
saveUninitialized: false, // 是否保存未初始化的会话
cookie : {
maxAge : 1000 * 60 * 60, // 设置 session 的有效时间,单位毫秒
},
}));
设置session
app.use(logger(function (tokens, req, res) {
console.log('打印的日志信息:');
var request_time = new Date();
var request_method = tokens.method(req, res);
var request_url = tokens.url(req, res);
var status = tokens.status(req, res);
var remote_addr = tokens['remote-addr'](req, res);
if(req.session){
var username = req.session['username']||'notlogin';
}else {
var username = 'notlogin';
}
使用express的morgan将用户操作存入数据库,app.js中已引入了中间件var logger = require("morgan"),保存效果如下
这是用户注册的用户名和密码以及注册时间
这是用户各种操作的存储结果
之后进入新闻查询页面
news.html
<nav class="navbar navbar-inverse navbar-fixed-top">
<div class="container">
<div class="navbar-header">
<a class="navbar-brand" href="#">News</a>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li ><a ng-click="showSearch()">检索</a></li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown">图片<span class="caret"></span></a>
<ul class="dropdown-menu">
<li><a ng-click="histogram()">柱状图</a></li>
<li><a ng-click="pie()">饼状图</a></li>
<li><a ng-click="line()">折线图</a></li>
<li><a ng-click="wordcloud()">词云</a></li>
</ul>
</li>
<li>
<a href="#" class="dropdown-toggle" data-toggle="dropdown">账号管理<span class="caret"></span></a>
<ul class="dropdown-menu">
<li class="dropdown-header">账号</li>
<li><a ng-click="logout()">退出登录</a></li>
</ul>
</li>
</ul>
</div>
</div>
</nav>
<!-- 所有的图片都绘制在main1位置-->
<span ng-hide="isShow" id="main1" style="width: 1000px;height:600px;position:fixed; top:70px;left:80px"></span>
<div ng-show="isShow" style="width: 1300px;position:relative; top:70px;left: 80px">
<!-- 查询页面-->
<div ng-include="'search.html'"></div>
</div>
查询页面 search.html
<form class="form-horizontal" role="form">
<div class="row" style="margin-bottom: 10px;">
<label class="col-lg-2 control-label">标题关键字</label>
<div class="col-lg-3">
<input type="text" class="form-control" placeholder="标题关键字" ng-model="$parent.title1">
</div>
<div class="col-lg-1">
<select class="form-control" autocomplete="off" ng-model="$parent.selectTitle">
<option selected="selected">AND</option>
<option>OR</option>
</select>
</div>
<div class="col-lg-3">
<input type="text" class="form-control" placeholder="标题关键字" ng-model="$parent.title2">
</div>
</div>
<div class="row" style="margin-bottom: 10px;">
<label class="col-lg-2 control-label">内容关键字</label>
<div class="col-lg-3">
<input type="text" class="form-control" placeholder="内容关键字" ng-model="$parent.content1">
</div>
<div class="col-lg-1">
<select class="form-control" autocomplete="off" ng-model="$parent.selectContent">
<option selected="selected">AND</option>
<option>OR</option>
</select>
</div>
<div class="col-lg-3">
<input type="text" class="form-control" placeholder="内容关键字" ng-model="$parent.content2">
</div>
</div>
<div class="form-group">
<div class="col-md-offset-9">
<button type="submit" class="btn btn-default" ng-click="search()">查询</button>
</div>
</div>
</form>
效果如下
五.爬虫查询结果实现分页和排序
// 分页
$scope.initPageSort=function(item){
$scope.pageSize=5; //每页显示的数据量,可以随意更改
$scope.selPage = 1;
$scope.data = item;
$scope.pages = Math.ceil($scope.data.length / $scope.pageSize); //分页数
$scope.pageList = [];//最多显示5页,后面6页之后不会全部列出页码来
$scope.index = 1;
// var page = 1;
// for (var i = page; i < $scope.pages+1 && i < page+5; i++) {
// $scope.pageList.push(i);
// }
var len = $scope.pages> 5 ? 5:$scope.pages;
$scope.pageList = Array.from({length: len}, (x,i) => i+1);
//设置表格数据源(分页)
$scope.items = $scope.data.slice(0, $scope.pageSize);
};
可以通过对pageSize赋值改变来决定每页显示的数据量,pageList用于决定最多显示的页数
$scope.selectPage = function (page) {
//不能小于1大于最大(第一页不会有前一页,最后一页不会有后一页)
if (page < 1 || page > $scope.pages) return;
//最多显示分页数5,开始分页转换
var pageList = [];
if(page>2){
for (var i = page-2; i <= $scope.pages && i < page+3; i++) {
pageList.push(i);
}
}else {
for (var i = page; i <= $scope.pages && i < page+5; i++) {
pageList.push(i);
}
}
$scope.index =(page-1)*$scope.pageSize+1;
$scope.pageList = pageList;
$scope.selPage = page;
$scope.items = $scope.data.slice(($scope.pageSize * (page - 1)), (page * $scope.pageSize));//通过当前页数筛选出表格当前显示数据
console.log("选择的页:" + page);
};
用于打印当前选中页
分页效果如下
六.用Echarts实现数据分析图
1.柱形图
前端代码
$scope.histogram = function () {
$scope.isShow = false;
$http.get("/news/histogram")
.then(
function (res) {
if(res.data.message=='url'){
window.location.href=res.data.result;
}else {
// var newdata = washdata(data);
let xdata = [], ydata = [], newdata;
var pattern = /\d{4}-(\d{2}-\d{2})/;
res.data.result.forEach(function (element) {
// "x":"2020-04-28T16:00:00.000Z" ,对x进行处理,只取 月日
xdata.push(pattern.exec(element["x"])[1]);
ydata.push(element["y"]);
});
newdata = {"xdata": xdata, "ydata": ydata};
var myChart = echarts.init(document.getElementById('main1'));
// 指定图表的配置项和数据
var option = {
title: {
text: '新闻发布数 随时间变化'
},
tooltip: {},
legend: {
data: ['新闻发布数']
},
xAxis: {
data: newdata["xdata"]
},
yAxis: {},
series: [{
name: '新闻数目',
type: 'bar',
data: newdata["ydata"]
}]
};
// 使用刚指定的配置项和数据显示图表。
myChart.setOption(option);
}
},
function (err) {
$scope.msg = err.data;
});
};
后端代码 routes/news.js
router.get('/histogram', function(request, response) {
//sql字符串和参数
console.log(request.session['username']);
//sql字符串和参数
if (request.session['username']===undefined) {
// response.redirect('/index.html')
response.json({message:'url',result:'/index.html'});
}else {
var fetchSql = "select publish_date as x,count(publish_date) as y from fetches2 group by publish_date order by publish_date;";
newsDAO.query_noparam(fetchSql, function (err, result, fields) {
response.writeHead(200, {
"Content-Type": "application/json",
"Cache-Control": "no-cache, no-store, must-revalidate",
"Pragma": "no-cache",
"Expires": 0
});
response.write(JSON.stringify({message:'data',result:result}));
response.end();
});
}
});
2.饼图
前端代码
$scope.pie = function () {
$scope.isShow = false;
$http.get("/news/pie").then(
function (res) {
if(res.data.message=='url'){
window.location.href=res.data.result;
}else {
let newdata = [];
var pattern = /责任编辑:(.+)/;//匹配名字
res.data.result.forEach(function (element) {
// "x": 责任编辑:李夏君 ,对x进行处理,只取 名字
newdata.push({name: pattern.exec(element["x"])[1], value: element["y"]});
});
var myChart = echarts.init(document.getElementById('main1'));
var app = {};
option = null;
// 指定图表的配置项和数据
var option = {
title: {
text: '作者发布新闻数量',
x: 'center'
},
tooltip: {
trigger: 'item',
formatter: "{a} <br/>{b} : {c} ({d}%)"
},
legend: {
orient: 'vertical',
left: 'left',
// data: ['直接访问', '邮件营销', '联盟广告', '视频广告', '搜索引擎']
},
series: [
{
name: '访问来源',
type: 'pie',
radius: '55%',
center: ['50%', '60%'],
data: newdata,
itemStyle: {
emphasis: {
shadowBlur: 10,
shadowOffsetX: 0,
shadowColor: 'rgba(0, 0, 0, 0.5)'
}
}
}
]
};
后端代码
router.get('/pie', function(request, response) {
//sql字符串和参数
console.log(request.session['username']);
//sql字符串和参数
if (request.session['username']===undefined) {
// response.redirect('/index.html')
response.json({message:'url',result:'/index.html'});
}else {
var fetchSql = "select author as x,count(author) as y from fetches2 group by author;";
newsDAO.query_noparam(fetchSql, function (err, result, fields) {
response.writeHead(200, {
"Content-Type": "application/json",
"Cache-Control": "no-cache, no-store, must-revalidate",
"Pragma": "no-cache",
"Expires": 0
});
response.write(JSON.stringify({message:'data',result:result}));
response.end();
});
}
});
七.项目总结
花费了大量时间总算是完成了期末作业,当初老师布置任务说二选一的时候,我果断选择了与期中作业爬虫挂钩的选项一作业,然而我还是too young too naive,真正上手做项目的时候才发现这也不简单,在这一年的学习时间里,我第一次接触了JavaScript,学习了网页的相关知识,还是很有收获的。在这里附上我的css文件
body {
padding-top: 200px;
min-height:100vh;
background-image: linear-gradient(120deg,#3498db,#8e44ad);
}
.panel-login {
border-color: #ccc;
-webkit-box-shadow: 0px 2px 3px 0px rgba(0,0,0,0.2);
-moz-box-shadow: 0px 2px 3px 0px rgba(0,0,0,0.2);
box-shadow: 0px 2px 3px 0px rgba(0,0,0,0.2);
}
.panel-login>.panel-heading {
color: #00415d;
background-color: #fff;
border-color: #fff;
text-align:center;
}
.panel-login>.panel-heading a{
text-decoration: none;
color: #666;
font-weight: bold;
font-size: 15px;
-webkit-transition: all 0.1s linear;
-moz-transition: all 0.1s linear;
transition: all 0.1s linear;
}
.panel-login>.panel-heading a.active{
color: #029f5b;
font-size: 18px;
}
.panel-login>.panel-heading hr{
margin-top: 10px;
margin-bottom: 0px;
clear: both;
border: 0;
height: 1px;
background-image: -webkit-linear-gradient(left,rgba(0, 0, 0, 0),rgba(0, 0, 0, 0.15),rgba(0, 0, 0, 0));
background-image: -moz-linear-gradient(left,rgba(0,0,0,0),rgba(0,0,0,0.15),rgba(0,0,0,0));
background-image: -ms-linear-gradient(left,rgba(0,0,0,0),rgba(0,0,0,0.15),rgba(0,0,0,0));
background-image: -o-linear-gradient(left,rgba(0,0,0,0),rgba(0,0,0,0.15),rgba(0,0,0,0));
}
.panel-login input[type="text"],.panel-login input[type="email"],.panel-login input[type="password"] {
height: 45px;
border: 1px solid #ddd;
font-size: 16px;
-webkit-transition: all 0.1s linear;
-moz-transition: all 0.1s linear;
transition: all 0.1s linear;
}
.panel-login input:hover,
.panel-login input:focus {
outline:none;
-webkit-box-shadow: none;
-moz-box-shadow: none;
box-shadow: none;
border-color: #ccc;
}
/* .btn-login {
background-color: #59B2E0;
outline: none;
color: #fff;
font-size: 14px;
height: auto;
font-weight: normal;
padding: 14px 0;
text-transform: uppercase;
border-color: #59B2E6;
} */
.btn-login:hover,
.btn-login:focus {
color: #fff;
background-color: #53A3CD;
border-color: #53A3CD;
}
.forgot-password {
text-decoration: underline;
color: #888;
}
.forgot-password:hover,
.forgot-password:focus {
text-decoration: underline;
color: #666;
}
.btn-register {
background-color: #1CB94E;
outline: none;
color: #fff;
font-size: 14px;
height: auto;
font-weight: normal;
padding: 14px 0;
text-transform: uppercase;
border-color: #1CB94A;
}
.btn-register:hover,
.btn-register:focus {
color: #fff;
background-color: #1CA347;
border-color: #1CA347;
}
.UMR{
display: block;
width: 100%;
height: 50px;
border: none;
background: linear-gradient(120deg,#3498db,#8e44ad,#3498db);
background-size: 200%;
color: #fff;
outline: none;
cursor: pointer;
transition: .5s;
}
.container{
height:300px;
width:800px;
}
最后,祝愿所有学这门课的同学作业顺利,学习进步!