山东大学舆情分析系统项目融合日志 2021.05.30_山东大学舆情监测系统-CSDN博客

本文链接：https://blog.csdn.net/Mrz_orz/article/details/117791071

项目融合日志 2021.05.30

一、后台主系统编写
1、核心系统

from . import data_processing as dp
from . import SQL
from . import WordsCount as wc
from . import emotion as emo
import os

BasicPath = os.path.dirname(os.path.abspath(__file__)) + "\\Spider\\"

Data_path = ['Spider_baidu\\Spider_baidu\\spiders\\baidu.csv',
             'Spider_cctv\\Spider_cctv\\spiders\\cctv.csv',
             'Spider_headline\\Spider_headline\\spiders\\headline.csv',
             'Spider_sina\\Spider_sina\\spiders\\sina.csv',
             'Spider_wangyi\\Spider_wangyi\\spiders\\wangyi.csv',
             'Spider_iqilu\\Spider_iqilu\\spiders\\iqilu.csv']

def main():
    #爬虫爬取
    os.system(BasicPath + 'Spider_baidu\\main.py')
    os.system(BasicPath + 'Spider_cctv\\main.py')
    os.system(BasicPath + 'Spider_headline\\main.py')
    os.system(BasicPath + 'Spider_sina\\main.py')
    os.system(BasicPath + 'Spider_wangyi\\main.py')
    os.system(BasicPath + 'Spider_zhihu\\main.py')
    os.system(BasicPath + 'Spider_weibo\\main.py')
    os.system(BasicPath + 'Spider_iqilu\\main.py')

    #数据获取
    data_list = []

    for path in Data_path:
        temp_list = dp.data_processing(BasicPath + path)
        for item in temp_list:
            data_list.append(item)


    data1, pl1 = dp.data_processing_zhihu(BasicPath + 'Spider_zhihu\\Spider_zhihu\\spiders\\zhihu.csv')
    data2, pl2 = dp.data_processing_weibo(BasicPath + 'Spider_weibo\\Spider_weibo\\spiders\\weibo.csv')


    for item in data1:
        data_list.append(item)
    for item in data2:
        data_list.append(item)

    temp_list = SQL.get_data()
    SQL.set_data(data_list)

    #热词处理
    for item in temp_list:
        data_list.append(item)

    wc.InfoSet_Comput(data_list)
    SQL.set_InfoKW(wc.get_all())


    #评论处理
    emo_list = SQL.get_Emotion()
    zhihu_list = emo.get_emotion(pl1, emo_list[0])
    zhihu_list.append('zhihu')
    weibo_list = emo.get_emotion(pl2, emo_list[1])
    weibo_list.append('weibo')

    SQL.set_Emotion(zhihu_list)
    SQL.set_Emotion(weibo_list)

2、主系统（核心系统控制系统）

import threading
import time
from .MyNLP import main

class MainSystem:

    def __init__(self):
        self.flag = 1
        self.is_work_time = 0
        threading.Thread(target = self.get_cmd_input).start()
        threading.Thread(target = self.get_work_time).start()
        threading.Thread(target = self.main_System).start()
        print('系统开始运行')

    def main_System(self):
        while True:
            while self.flag:
                if self.is_work_time:
                    print('系统核心开始运行')
                    self.flag = 10000

                    main.main()
                    
                    self.flag = 1
                    self.is_work_time = 0
                    print('系统核心结束运行')
            if self.flag == -1:
                exit(0)

    def get_cmd_input(self):

        while True:
            try:
                cmd_str = input()
                if cmd_str == 'start':
                    self.sys_start()
                elif cmd_str == 'end' or cmd_str == 'stop':
                    self.sys_end()
                elif cmd_str == 'help' or cmd_str == 'h':
                    print('本系统支持如下命令：')
                    print('\tstart\t\t启动系统')
                    print('\tend/stop\t\t停止系统')
                    print('\texit\t\t退出系统')
                    print('\thelp/h\t\t获取帮助')
                elif cmd_str == 'exit':
                    self.sys_exit()
                else:
                    print('输入有误，请重试！')
                    print('如需帮助，请输入”help“或“h”以查看使用说明')
            except EOFError:
                print('请手动关闭服务器')
                self.sys_exit()
                

    def get_work_time(self):
        while True:
            time.sleep(60 * 60)
            now = time.localtime(time.time())
            if not self.is_work_time and now.tm_hour == 4:
                self.is_work_time = 1
            if self.flag == -1:
                exit(0)

    def sys_start(self):
        if self.flag != 10000:
            self.flag = 1
        print('系统已启动')

    def sys_end(self):
        if self.flag == 10000:
            print('系统核心运行中，暂时无法停止，该状态将不会持续超过……')
        else:
            self.flag = 0
            print('系统已停止')

    def sys_exit(self):
        if self.flag == 10000:
            print('系统核心运行中，暂时无法退出系统，该状态将不会持续超过……')
        else:
            print('已退出系统')
            self.flag = -1
            exit(0)

    def is_running(self):
        return self.flag == 1

    def sys_main_is_running(self):
        return self.flag == 100000

    def sys_main_start(self):
        if is_running():
            return 0
        else:
            self.is_work_time = 1
            return 1
        return -1

二、系统融合

1、优化文件路径选择，使其支持脱离编译环境
2、增加主系统与Django服务器的并行运行

主系统在服务器启动时同步启动
主系统可在服务器控制台操作，也可在网页通过服务器操作
主系统将在每日4~5点自动启动核心系统进行全网爬取，也可在网页端手动启动核心系统

3、通过编写入口程序，实现核心系统对scrapy框架下的爬虫的调用

最后形成如下文件结构

│ db.sqlite3
│ manage.py
│ sdu_poas
│
├─app
│ │ admin.py
│ │ apps.py
│ │ models.py
│ │ tests.py
│ │ urls.py
│ │ views.py
│ │ init.py
│ ├─migrations
│ └─__pycache__
│
├─django_vue
│ │ asgi.py
│ │ settings.py
│ │ urls.py
│ │ wsgi.py
│ │ init.py
│ └─__pycache__
│
├─my_program
│ │ my_bean.py
│ │ my_sql.py
│ │ system.py
│ │ init.py
│ │
│ ├─MyNLP
│ │ │ data_processing.py
│ │ │ emotion.py
│ │ │ main.py
│ │ │ MyNLP.py
│ │ │ SQL.py
│ │ │ WordsCount.py
│ │ │ init.py
│ │ │
│ │ ├─pkuseg
│ │ │ │ BosonNLP_sentiment_score.txt
│ │ │ │ IrrelevantWords.txt
│ │ │ │ lexicon.txt
│ │ │ │ stopwords.txt
│ │ │ │
│ │ │ └─news
│ │ │ features.pkl
│ │ │ weights.npz
│ │ │
│ │ ├─Spider
│ │ └─__pycache__
│ │
│ └─__pycache__
│
└─xiangmu
　　└─dist
　　　　│ favicon.ico
　　　　│ index.html
　　　　├─css
　　　　├─fonts
　　　　├─img
　　　　└─js