对第一个爬虫项目的完善

实验要求

实验过程
1.在mysql中新建两个表,用来记录用户信息和操作日志

记录用户信息的表user:

CREATE TABLE `crawl`.`user` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
`username` VARCHAR(45) NOT NULL,
`password` VARCHAR(45) NOT NULL,
`registertime` datetime DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
UNIQUE KEY `username_UNIQUE` (`username`))
ENGINE=InnoDB DEFAULT CHARSET=utf8;

记录操作日志的表user_action:

CREATE TABLE `crawl`.`user_action` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
`username` VARCHAR(45) NOT NULL,
`request_time` VARCHAR(45) NOT NULL,
`request_method` VARCHAR(20) NOT NULL,
`request_url` VARCHAR(300) NOT NULL,
`status` int(4),
`remote_addr` VARCHAR(100) NOT NULL,
PRIMARY KEY (`id`))
ENGINE=InnoDB DEFAULT CHARSET=utf8;

建立连接mysql配置文件 mysqlConf.js

module.exports = {
    mysql: {
        host: 'localhost',
        user: 'root',
        password: 'root'
        database: 'crawl'
        connectionLimit: 10
        //最大连接数,默认为10
    }
};

2.实现用户注册和登录
引入前端框架angular

<script src="/angular/angular.min.js"></script>

引入自己的样式与js

<link rel="stylesheet" type="text/css" href="stylesheets/index.css">
    <script type="text/javascript" src="javascripts/index.js"></script>
    <script>
        var app = angular.module('login', []);
        app.controller('loginCtrl', function ($scope, $http, $timeout) {
        // 登录时,检查用户输入的账户密码是否与数据库中的一致            
        $scope.check_pwd = function () {                
        var data = JSON.stringify({                    
            username: $scope.username,                    
            password: $scope.password                
        });                
        $http.post("/users/login", data)                    
        .then(                    
        function (res) {                        
        if(res.data.msg=='ok') {                            
        window.location.href='/news.html';                        
        }
        else{                            
        $scope.msg=res.data.msg;                       
         }                    
         },                        
         function (err) {                       
          $scope.msg = err.data;                    
                }};
          //增加注册用户            
          $scope.doAdd = function () {                
          // 检查用户注册时,输入的两次密码是否一致                
               if($scope.add_password!==$scope.confirm_password){                   
                   $scope.msg = '两次密码不一致!'}                
                 else {                                   
                 var data = JSON.stringify({                        
                 username: $scope.add_username,                        
                 password: $scope.add_password                    
                 });                    
                 $http.post("/users/register", data)                        
                     .then(function (res) {                            
                        if(res.data.msg=='成功注册!请登录') {                                
                        $scope.msg=res.data.msg;                                
                        $timeout(function () {                                    
                        window.location.href='index.html';                                
                            },2000);
                        } 
                        else {                                
                           $scope.msg = res.data.msg;                            }                        
                           }, function (err) {                            
                           $scope.msg = err.data;                        
                           });                
                     }            
                };        
           });

引入angular.js后,登录时检测user数据表中是否存在该用户名与密码,如果登录成功会跳转到news.html页面,用户则可以看到主界面。

设置登录页的路由
调用userDAO,因为session信息只保存在启动网站内存中,所以要注意保存session信息,记录用户的操作日志

router.post('/login', function(req, res) {
  var username = req.body.username;
  var password = req.body.password;
  userDAO.getByUsername(username, function (user) {
  if(user.length==0){
      res.json({msg:'用户不存在!请检查后输入'});
      }else {
      if(password===user[0].password){
        req.session['username'] = username;
        res.cookie('username', username);
        res.json({msg: 'ok'});
        // res.json({msg:'ok'});
      }else{res.json({msg:'用户名或密码错误!请检查后输入'});
      }
    }
  });
});

调用的userDAO代码:

var mysql = require('mysql');
var mysqlConf = require('../conf/mysqlConf');
var userSqlMap = require('./userSqlMap');
var pool = mysql.createPool(mysqlConf.mysql);
module.exports = {
    add: function (user, callback) {
        pool.query(userSqlMap.add, [user.username, user.password], function (error, result) {
            if (error) throw error;
            callback(result.affectedRows > 0);
        });
    },
    getByUsername: function (username, callback) {
    pool.query(userSqlMap.getByUsername, [username], function (error, result) {
            if (error) throw error;
            callback(result);
            });
      },
};      

最终结果如下:

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

3.实现查询词支持布尔表达式

在news.html引入查询页面

<div ng-include="'search.html'"></div>

search.html代码:
设置查询界面按钮

<form class="form-horizontal" role="form">
    <div class="row" style="margin-bottom: 10px;">
        <label class="col-lg-2 control-label">标题关键字</label>
        <div class="col-lg-3">
            <input type="text" class="form-control" placeholder="标题关键字" ng-model="$parent.title1">
        </div>
        <div class="col-lg-1">
            <select class="form-control" autocomplete="off" ng-model="$parent.selectTitle">
                <option selected="selected">AND</option>
                <option>OR</option>
                </select>
        </div>
        <div class="col-lg-3">
            <input type="text" class="form-control" placeholder="标题关键字" ng-model="$parent.title2">
        </div>
    </div>
<div class="row" style="margin-bottom: 10px;">
        <label class="col-lg-2 control-label">内容关键字</label>
        <div class="col-lg-3">
            <input type="text" class="form-control" placeholder="内容关键字" ng-model="$parent.content1">
        </div>
        <div class="col-lg-1">
            <select class="form-control" autocomplete="off" ng-model="$parent.selectContent">
                <option selected="selected">AND</option>
                <option>OR</option>
            </select>
        </div>
        <div class="col-lg-3">
            <input type="text" class="form-control" placeholder="内容关键字" ng-model="$parent.content2">
        </div>
    </div>
   <div class="form-group">
        <div class="col-md-offset-9">
            <button type="submit" class="btn btn-default" ng-click="search()">查询</button>
         </div>    
   </div>
</form>          

显示查询内容结果:

<div ng-show="isisshowresult">
<table class="table table-striped">
        <thead>
            <tr>
                <td>序号</td>
                <td>标题</td>
                <td>作者</td>
                <td>关键词</td>
                <td>链接</td>
                <td>发布时间</td>
            </tr>
                <td>关键词</td>
                <td>链接</td>
                <td>发布时间</td>
            </tr>           
            <td>{{item.keywords}}</td>
            <td>{{item.url}}</td>
            <td>{{item.publish_date}}</td>
        </tr>
        </tbody>
    </table>
    <div class="row">
        <div class="pull-left" style="margin-top: 12px;">
            <button type="submit" class="btn btn-primary" ng-click="searchsortASC()" >发布时间升序</button>
            <button type="submit" class="btn btn-primary" ng-click="searchsortDESC()">发布时间降序</button>
        </div>
        <div class="pull-right">
            <nav>
                <ul class="pagination">
                    <li>
                        <a ng-click="Previous()" role="button"><span role="button">上一页</span></a>
                    </li>
                    <li ng-repeat="page in pageList" ng-class="{active:isActivePage(page)}" role="button">
                        <a ng-click="selectPage(page)" >{{ page }}</a>
                    </li>
                    <li>
                        <a ng-click="Next()" role="button"><span role="button">下一页</span></a>
                    </li>
                </ul>
            </nav>
        </div>
    </div>
</div>              

后端news.js代码:
将用户输入的数据用get方式传给后端处理。对于用户的输入,若一个词都不输入,则输出全部数据。

$scope.search = function () {
        var title1 = $scope.title1;
        var title2 = $scope.title2;
        var selectTitle = $scope.selectTitle;
        var content1 = $scope.content1;
        var content2 = $scope.content2;
        var selectContent = $scope.selectContent;
        var sorttime = $scope.sorttime;
        if(typeof title1=="undefined" && typeof title2!="undefined" && title2.length>0){
            title1 = title2;
        }
        if(typeof content1=="undefined" && typeof content2!="undefined" && content2.length>0){
            content1 = content2;
        }
        var myurl = `/news/search?t1=${title1}&ts=${selectTitle}&t2=${title2}&c1=${content1}&cs=${selectContent}&c2=${content2}
$stime=${sorttime}`;
$http.get(myurl).then(
            function (res) {
                if(res.data.message=='data'){
                    $scope.isisshowresult = true; 
                    // $scope.searchdata = res.data;
                    $scope.initPageSort(res.data.result)
                }else {
                    window.location.href=res.data.result;
                }
            },function (err) {
                $scope.msg = err.data;
            });
    };

设置查询页路由:

router.get('/search', function(request, response) {
    console.log(request.session['username']);
    if (request.session['username']===undefined) {
        response.json({message:'url',result:'/index.html'});
    }else {
        var param = request.query;
        newsDAO.search(param,function (err, result, fields) {
            response.json({message:'data',result:result});
        })
    }
});

newsDAO.search用来拼起sql

search :function(searchparam, callback) {
    // 组合查询条件
    var sql = 'select * from fetches ';
     if(searchparam["t2"]!="undefined"){
        sql +=(`where title like '%${searchparam["t1"]}%' ${searchparam['ts']} title like '%${searchparam["t2"]}%' `);
    }else if(searchparam["t1"]!="undefined"){
        sql +=(`where title like '%${searchparam["t1"]}%' `);
    };
    if(searchparam["t1"]=="undefined"&&searchparam["t2"]=="undefined"&&searchparam["c1"]!="undefined"){
        sql+='where ';
    }else if(searchparam["t1"]!="undefined"&&searchparam["c1"]!="undefined"){
        sql+='and ';
    }
    if(searchparam["c2"]!="undefined"){
        sql +=(`content like '%${searchparam["c1"]}%' ${searchparam['cs']} content like '%${searchparam["c2"]}%' `);
    }else if(searchparam["c1"]!="undefined"){
        sql +=(`content like '%${searchparam["c1"]}%' `);
    }
     if(searchparam['stime']!="undefined"){
        if(searchparam['stime']=="1"){
            sql+='ORDER BY publish_date ASC ';
        }else {
            sql+='ORDER BY publish_date DESC ';
        }
    }
sql+=';';
    pool.getConnection(function(err, conn) {
        if (err) {
            callback(err, null, null);
        } else {
            conn.query(sql, function(qerr, vals, fields) {
                conn.release(); //释放连接
                callback(qerr, vals, fields); //事件驱动回调
            });
        }
    });
},
};

分页显示查询结果:

 $scope.initPageSort=function(item){
        $scope.pageSize=5;  //每页显示的数据量,可以随意更改
        $scope.selPage = 1;
        $scope.data = item;
        $scope.pages = Math.ceil($scope.data.length / $scope.pageSize); 
        $scope.pageList = [];
        $scope.index = 1;
        var len = $scope.pages> 5 ? 5:$scope.pages;
        $scope.pageList = Array.from({length: len}, (x,i) => i+1);
        $scope.items = $scope.data.slice(0, $scope.pageSize);
    };
    $scope.selectPage = function (page) {
        if (page < 1 || page > $scope.pages) return;
        var pageList = [];
        if(page>2){
            for (var i = page-2; i <= $scope.pages && i < page+3; i++) {
                pageList.push(i);
            }
        }else {
            for (var i = page; i <= $scope.pages && i < page+5; i++) {
                pageList.push(i);
            }
        }
        $scope.index =(page-1)*$scope.pageSize+1;
        $scope.pageList = pageList;
        $scope.selPage = page;
        $scope.items = $scope.data.slice(($scope.pageSize * (page - 1)), (page * $scope.pageSize));
        console.log("选择的页:" + page);
    };

最终结果:
在这里插入图片描述
在这里插入图片描述

4.用Echarts实现了3个数据分析图表
(因为没有持续爬取数据,所以最后的图表不准确)
路由代码:

router.get('/histogram', function(request, response) {
    console.log(request.session['username']);
    if (request.session['username']===undefined) {
        // response.redirect('/index.html')
        response.json({message:'url',result:'/index.html'});
    }else {
        var fetchSql = "select publish_date as x,count(publish_date) as y from fetches group by publish_date order by publish_date;";
        newsDAO.query_noparam(fetchSql, function (err, result, fields) {
            response.writeHead(200, {
                "Content-Type": "application/json",
                "Cache-Control": "no-cache, no-store, must-revalidate",
                "Pragma": "no-cache",
                "Expires": 0
            });
            response.write(JSON.stringify({message:'data',result:result}));
            response.end();
        });
    }
});

router.get('/line', function(request, response) {
    console.log(request.session['username']);
    if (request.session['username']===undefined) {
        // response.redirect('/index.html')
        response.json({message:'url',result:'/index.html'});
    }else {
        var keyword = '疫情'; 
        var fetchSql = "select content,publish_date from fetches where content like'%" + keyword + "%' order by publish_date;";
        newsDAO.query_noparam(fetchSql, function (err, result, fields) {
            response.writeHead(200, {
                "Content-Type": "application/json",
                "Cache-Control": "no-cache, no-store, must-revalidate",
                "Pragma": "no-cache",
                "Expires": 0
            });
            response.write(JSON.stringify({message:'data',result:myfreqchangeModule.freqchange(result, keyword)}));
            response.end();
        });
    }
});

router.get('/wordcloud', function(request, response) {
    console.log(request.session['username']);
    if (request.session['username']===undefined) {
        // response.redirect('/index.html')
        response.json({message:'url',result:'/index.html'});
    }else {
        var fetchSql = "select content from fetches;";
        newsDAO.query_noparam(fetchSql, function (err, result, fields) {
            response.writeHead(200, {
                "Content-Type": "application/json",
                "Cache-Control": "no-cache, no-store, must-revalidate",
                "Pragma": "no-cache",
                "Expires": 0
            });
            response.write(JSON.stringify({message:'data',result:mywordcutModule.wordcut(result)}));//返回处理过的数据
            response.end();
        });
    }
});

柱状图:

$scope.histogram = function () {
        $scope.isShow = false;
        $http.get("/news/histogram")
            .then(
                function (res) {
                 if(res.data.message=='url'){
                        window.location.href=res.data.result;
                    }else {
                    let xdata = [], ydata = [], newdata;
                     var pattern = /\d{4}-(\d{2}-\d{2})/;
                        res.data.result.forEach(function (element) {
                            // "x":"2020-04-28T16:00:00.000Z" ,对x进行处理,只取 月日
                            xdata.push(pattern.exec(element["x"])[1]);
                            ydata.push(element["y"]);
                        });
                        newdata = {"xdata": xdata, "ydata": ydata};
                        ar myChart = echarts.init(document.getElementById('main1'));
                        var option = {
                            title: {
                                text: '新闻发布数 随时间变化'
                            },
                            tooltip: {},
                            legend: {
                                data: ['新闻发布数']
                            },
                            xAxis: {
                                data: newdata["xdata"]
                            },
                            yAxis: {},
                            series: [{
                                name: '新闻数目',
                                type: 'bar',
                                data: newdata["ydata"]
                            }]
                        };
                        // 使用刚指定的配置项和数据显示图表。
                        myChart.setOption(option);
                    }
                },
                function (err) {
                    $scope.msg = err.data;
                });
 };


折线图:

$scope.line = function () {
        $scope.isShow = false;
        $http.get("/news/line").then(
            function (res) {
                if(res.data.message=='url'){
                    window.location.href=res.data.result;
                }else {
                    var myChart = echarts.init(document.getElementById("main1"));
                    option = {
                        title: {
                            text: '"疫情"该词在新闻中的出现次数随时间变化图'
                        },
                        xAxis: {
                            type: 'category',
                            data: Object.keys(res.data.result)
                        },
                        yAxis: {
                            type: 'value'
                        },
                        series: [{
                            data: Object.values(res.data.result),
                            type: 'line',
                            itemStyle: {normal: {label: {show: true}}}
                        }],
                    };
                    if (option && typeof option === "object") {
                        myChart.setOption(option, true);
                    }
                }
            });
    };

在这里插入图片描述查看不同的关键词随时间出现次数需要需要在路由代码中修改keyword的关键词

词云:

    $scope.wordcloud = function () {
        $scope.isShow = false;
        $http.get("/news/wordcloud").then(
            function (res) {
                if(res.data.message=='url'){
                    window.location.href=res.data.result;
                }else {
                    var mainContainer = document.getElementById('main1');
                    var chart = echarts.init(mainContainer);
                    var data = [];
                    for (var name in res.data.result) {
                        data.push({
                            name: name,
                            value: Math.sqrt(res.data.result[name])
                        })
                    }
                    var maskImage = new Image();
                    maskImage.src = './images/logo.png';
                    var option = {
                        title: {
                            text: '所有新闻内容 jieba分词 的词云展示'
                        },
                        series: [{
                            type: 'wordCloud',
                            sizeRange: [12, 60],
                            rotationRange: [-90, 90],
                            rotationStep: 45,
                            gridSize: 2,
                            shape: 'circle',
                            maskImage: maskImage,
                            drawOutOfBound: false,
                            textStyle: {
                                normal: {
                                    fontFamily: 'sans-serif',
                                    fontWeight: 'bold',
                                    // Color can be a callback function or a color string
                                    color: function () {
                                        // Random color
                                        return 'rgb(' + [
                                            Math.round(Math.random() * 160),
                                            Math.round(Math.random() * 160),
                                            Math.round(Math.random() * 160)
                                        ].join(',') + ')';
                                    }
                                },
                                emphasis: {
                                    shadowBlur: 10,
                                    shadowColor: '#333'
                                }
                            },
                            data: data
                        }]
                    };
                    maskImage.onload = function () {
                        // option.series[0].data = data;
                        chart.clear();
                        chart.setOption(option);
                    };
                    window.onresize = function () {
                        chart.resize();
                    };
                }
            });
    }

在这里插入图片描述
修改 maskImage.src = './images/logo.png’可以以其他图片样式显示词云

5.将用户操作记录数据库
直接引入var logger = require('morgan’);保存信息

app.use(logger(function (tokens, req, res) {
  console.log('打印的日志信息:');
  var request_time = new Date();
  var request_method = tokens.method(req, res);
  var request_url = tokens.url(req, res);
  var status = tokens.status(req, res);
  var remote_addr = tokens['remote-addr'](req, res);
  if(req.session){
    var username = req.session['username']||'notlogin';
  }else {
    var username = 'notlogin';
  }
  if(username!='notlogin'){
    logDAO.userlog([username,request_time,request_method,request_url,status,remote_addr], function (success) {
      console.log('成功保存!');
    })
  }

在这里插入图片描述
在这里插入图片描述

实验总结
这次试验原以为有了上次的积累会容易一些,没想到真正自己操作时还是无从下手,只写了一些断断续续的代码,最终还是在老师的代码示例帮助下完成了实验,这次最重要的经验在于实验前要把整体思路想好,不同功能的代码有序放在不同的子文件夹。自己的代码能力也还存在很大不足,其中Echarts实现饼状图时因为爬取的网站中作者名称的表现形式不同,想要做一个关键词的饼状图因为两边参数修改的问题也失败了。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值