新闻搜索引擎网页部分_网页新闻检索-CSDN博客

本文链接：https://blog.csdn.net/qq_41785852/article/details/106295051
为了将原搜索引擎网页部分利用于推荐系统课程设计网页部分，特定将untitle文件夹下的view文件记录如下，如果想使用新闻推荐，直接将views.py文件复制过来。

from django.http import HttpResponse
from django.shortcuts import render
from snownlp import SnowNLP
import matplotlib.pyplot as plt
import time
import json
import matplotlib.pyplot as plt
import datetime
import os
# -*- coding: utf-8 -*-
import jieba
import numpy as np
import re

# Create your views here.
'''

'''
# the part works underground


def is_valid_date(str_date):
    try:
        if ":" in str_date:
            time.strptime(str_date, "%Y-%m-%d %H:%M:%S")
        else:
            time.strptime(str_date, "%Y-%m-%d")
        return True
    except BaseException:
        return False


'''
对json文件进行处理吗，
筛选出一天时间评论数最多的新闻。
'''


# 寻找某一天，最重要的前num条新闻
def findImportant(date, num):
    Maxnum = num  # 选择评论数最高的几条数据
    count = 0
    importDict = {}  # 返回标题和评论数。
    importCommentDict = {}  # 返回标题和情感
    openfileName = str(date) + "count.json"
    with open(openfileName, 'r') as load_f:
        load_dict = json.load(load_f)
    sortedDict = sorted(
        load_dict.items(),
        key=lambda item: item[1],
        reverse=True)

    commentOpenFile = str(date) + ".json"
    with open(commentOpenFile, 'r') as load_g:
        comment_loaddict = json.load(load_g)

    for key, value in sortedDict:
        if count < Maxnum:
            importDict[key] = value
            importCommentDict[key] = comment_loaddict[key]
            count = count + 1
        else:
            break
    print('import count', importDict)
    print('import comment', importCommentDict)
    return importDict, importCommentDict


# 对一条信息处理
def handleMood(sentimentslist, key):
    plt.hist(sentimentslist, bins=np.arange(0, 1, 0.01), facecolor="b")
    plt.xlabel("probability")
    plt.ylabel("number")
    titlename = str(key) + '.png'
    plt.title('the react from the crowd ')
    plt.show()
    # 记录积极数量
    posCount = 0
    # 记录消极数量
    nevCount = 0.0
    # 记录总数数量
    Count = 0.0
    # 记录支持比例
    posPro = 0.0
    # 记录消极比例
    nevPro = 0.0
    for i in sentimentslist:
        if i > 0.7:
            posCount += 1
            Count += 1
        elif i < 0.4:
            nevCount += 1
            Count += 1
        else:
            Count += 1
    print('pos', posCount)
    print('nev', nevCount)
    posPro = ('%.2f%%' % ((posCount / Count) * 100))
    nevPro = ('%.2f%%' % ((nevCount / Count) * 100))
    return posPro, nevPro
    # plt.savefig(titlename)


def get_all_file(floder_path):
    file_list = []
    if floder_path is None:
        raise Exception("floder_path is None")
    for dirpath, dirnames, filenames in os.walk(floder_path):
        for name in filenames:
            file_list.append(dirpath + '\\' + name)
    return file_list


def walk_dir(path, filename):
    filter_file_name = str(filename) + '.json'
    print(filter_file_name)
    for root, dirs, files in os.walk(path):
        for dir_item in dirs:
            if dir_item == 'comments':
                value_dir = os.path.join(root, dir_item, filter_file_name)
                print("hhehe", value_dir)
    # 打开文件


# 获取当前日期
def getdate():
    today = datetime.date.today()
    formatted_today = today.strftime('%y%m%d')
    return formatted_today


def findnews(keyword):
    num = 10
    # 遍历一定日期，前十天，所有日期文件里包含关键词评论的评论数和评论情感变化
    today = int(getdate())
    startday = int(today) - num
    # 创建一个相关内容字典，把与查询句子语义相关度大于0.5的都放进改字典里
    contextDict = {}
    # 创建一个dict用来存储十天之内的json文件
    dateFileDict = {}
    date = startday
    for i in range(num + 1):
        filename = str(date) + '.json'
        try:
            with open(filename, 'r') as load_g:
                # print("hhhhh")
                comment_dict = json.load(load_g)  # 字典里就是所有的
                for key, value in comment_dict.items():
                    print('key', key)
                    numcos = cos_simlarity(keyword, key)
                    if numcos > 0.1:
                        print(key)
                        contextDict[key] = date
            date = int(date) + 1
        except Exception as e:
            date = int(date) + 1
            pass
        continue
    # 我们有啦相关信息#我们要返回，key，日期，评论数，情感，所有这一切凭借key来实现，返回字典
    # 获取情感字典和评论数字典com_dict,count_dict
    commentdict = {}
    countdict = {}
    for key, value in contextDict.items():
        commentsFile = str(value) + '.json'
        countFile = str(value) + 'count.json'
        with open(commentsFile, 'r') as load_g:
            comment_dict = json.load(load_g)  # 字典里就是所有的
            commentdict[key] = comment_dict[key]
        with open(countFile, 'r') as load_f:
            count_dict = json.load(load_f)
            countdict[key] = count_dict[key]

    return contextDict, commentdict, countdict


def get_word_vector(s1, s2):
    # 分词
    cut1 = jieba.cut(s1)
    cut2 = jieba.cut(s2)
    list_word1 = (','.join(cut1)).split(',')
    list_word2 = (','.join(cut2)).split(',')

    # 列出所有的词,取并集
    key_word = list(set(list_word1 + list_word2))
    # 给定形状和类型的用0填充的矩阵存储向量
    word_vector1 = np.zeros(len(key_word))
    word_vector2 = np.zeros(len(key_word))

    # 计算词频
    # 依次确定向量的每个位置的值
    for i in range(len(key_word)):
        # 遍历key_word中每个词在句子中的出现次数
        for j in range(len(list_word1)):
            if key_word[i] == list_word1[j]:
                word_vector1[i] += 1
        for k in range(len(list_word2)):
            if key_word[i] == list_word2[k]:
                word_vector2[i] += 1

    # 输出向量
    # print(word_vector1)
    # print(word_vector2)
    return word_vector1, word_vector2


def cos_simlarity(s1, s2):
    vec1, vec2 = get_word_vector(s1, s2)
    dist1 = float(np.dot(vec1, vec2) /
                  (np.linalg.norm(vec1) * np.linalg.norm(vec2)))
    return dist1


##########################################################################

def index(request):
    return render(request, 'index.html')


def test(request):
    s1 = request.POST['num']
    data = []
    relatedDict = {}
    if is_valid_date(s1):
        importdate = s1[2:4] + s1[5:7] + s1[8:]
        print(importdate)
        importDict, impComDict = findImportant(importdate, 10)
        for key, value in importDict.items():
            posPro, nevPro = handleMood(impComDict[key], key)
            sentence = str(key) + "\t\t\t评论人数：" + str(value) + \
                "\t积极态度：" + str(posPro) + "\t消极态度：" + str(nevPro)
            data.append(sentence)
    else:
        print('i am here ,do you call me', s1)
        relatedDict, commentDict, CountDict = findnews(s1)
        for key, value in relatedDict.items():
            print(key)
            posPro, nevPro = handleMood(commentDict[key], key)
            sentence = str(key) + "\t日期：" + str(value) + "\t评论人数：" + str(
                CountDict[key]) + "\t积极态度：" + str(posPro) + "\t消极态度：" + str(nevPro)
            data.append(sentence)
    return HttpResponse(json.dumps({'data': data}))


##########################################################################
'''''
if __name__ == "__main__":
    keyword = '主席'
    finalDict = findnews(keyword)
    print(finalDict)

'''''