为了将原搜索引擎网页部分利用于推荐系统课程设计网页部分,特定将untitle文件夹下的view文件记录如下,如果想使用新闻推荐,直接将views.py文件复制过来。
from django.http import HttpResponse
from django.shortcuts import render
from snownlp import SnowNLP
import matplotlib.pyplot as plt
import time
import json
import matplotlib.pyplot as plt
import datetime
import os
# -*- coding: utf-8 -*-
import jieba
import numpy as np
import re
# Create your views here.
'''
'''
# the part works underground
def is_valid_date(str_date):
try:
if ":" in str_date:
time.strptime(str_date, "%Y-%m-%d %H:%M:%S")
else:
time.strptime(str_date, "%Y-%m-%d")
return True
except BaseException:
return False
'''
对json文件进行处理吗,
筛选出一天时间评论数最多的新闻。
'''
# 寻找某一天,最重要的前num条新闻
def findImportant(date, num):
Maxnum = num # 选择评论数最高的几条数据
count = 0
importDict = {} # 返回标题和评论数。
importCommentDict = {} # 返回标题和情感
openfileName = str(date) + "count.json"
with open(openfileName, 'r') as load_f:
load_dict = json.load(load_f)
sortedDict = sorted(
load_dict.items(),
key=lambda item: item[1],
reverse=True)
commentOpenFile = str(date) + ".json"
with open(commentOpenFile, 'r') as load_g:
comment_loaddict = json.load(load_g)
for key, value in sortedDict:
if count < Maxnum:
importDict[key] = value
importCommentDict[key] = comment_loaddict[key]
count = count + 1
else:
break
print('import count', importDict)
print('import comment', importCommentDict)
return importDict, importCommentDict
# 对一条信息处理
def handleMood(sentimentslist, key):
plt.hist(sentimentslist, bins=np.arange(0, 1, 0.01), facecolor="b")
plt.xlabel("probability")
plt.ylabel("number")
titlename = str(key) + '.png'
plt.title('the react from the crowd ')
plt.show()
# 记录积极数量
posCount = 0
# 记录消极数量
nevCount = 0.0
# 记录总数数量
Count = 0.0
# 记录支持比例
posPro = 0.0
# 记录消极比例
nevPro = 0.0
for i in sentimentslist:
if i > 0.7:
posCount += 1
Count += 1
elif i < 0.4:
nevCount += 1
Count += 1
else:
Count += 1
print('pos', posCount)
print('nev', nevCount)
posPro = ('%.2f%%' % ((posCount / Count) * 100))
nevPro = ('%.2f%%' % ((nevCount / Count) * 100))
return posPro, nevPro
# plt.savefig(titlename)
def get_all_file(floder_path):
file_list = []
if floder_path is None:
raise Exception("floder_path is None")
for dirpath, dirnames, filenames in os.walk(floder_path):
for name in filenames:
file_list.append(dirpath + '\\' + name)
return file_list
def walk_dir(path, filename):
filter_file_name = str(filename) + '.json'
print(filter_file_name)
for root, dirs, files in os.walk(path):
for dir_item in dirs:
if dir_item == 'comments':
value_dir = os.path.join(root, dir_item, filter_file_name)
print("hhehe", value_dir)
# 打开文件
# 获取当前日期
def getdate():
today = datetime.date.today()
formatted_today = today.strftime('%y%m%d')
return formatted_today
def findnews(keyword):
num = 10
# 遍历一定日期,前十天,所有日期文件里包含关键词评论的评论数和评论情感变化
today = int(getdate())
startday = int(today) - num
# 创建一个相关内容字典,把与查询句子语义相关度大于0.5的都放进改字典里
contextDict = {}
# 创建一个dict用来存储十天之内的json文件
dateFileDict = {}
date = startday
for i in range(num + 1):
filename = str(date) + '.json'
try:
with open(filename, 'r') as load_g:
# print("hhhhh")
comment_dict = json.load(load_g) # 字典里就是所有的
for key, value in comment_dict.items():
print('key', key)
numcos = cos_simlarity(keyword, key)
if numcos > 0.1:
print(key)
contextDict[key] = date
date = int(date) + 1
except Exception as e:
date = int(date) + 1
pass
continue
# 我们有啦相关信息#我们要返回,key,日期,评论数,情感,所有这一切凭借key来实现,返回字典
# 获取情感字典和评论数字典com_dict,count_dict
commentdict = {}
countdict = {}
for key, value in contextDict.items():
commentsFile = str(value) + '.json'
countFile = str(value) + 'count.json'
with open(commentsFile, 'r') as load_g:
comment_dict = json.load(load_g) # 字典里就是所有的
commentdict[key] = comment_dict[key]
with open(countFile, 'r') as load_f:
count_dict = json.load(load_f)
countdict[key] = count_dict[key]
return contextDict, commentdict, countdict
def get_word_vector(s1, s2):
# 分词
cut1 = jieba.cut(s1)
cut2 = jieba.cut(s2)
list_word1 = (','.join(cut1)).split(',')
list_word2 = (','.join(cut2)).split(',')
# 列出所有的词,取并集
key_word = list(set(list_word1 + list_word2))
# 给定形状和类型的用0填充的矩阵存储向量
word_vector1 = np.zeros(len(key_word))
word_vector2 = np.zeros(len(key_word))
# 计算词频
# 依次确定向量的每个位置的值
for i in range(len(key_word)):
# 遍历key_word中每个词在句子中的出现次数
for j in range(len(list_word1)):
if key_word[i] == list_word1[j]:
word_vector1[i] += 1
for k in range(len(list_word2)):
if key_word[i] == list_word2[k]:
word_vector2[i] += 1
# 输出向量
# print(word_vector1)
# print(word_vector2)
return word_vector1, word_vector2
def cos_simlarity(s1, s2):
vec1, vec2 = get_word_vector(s1, s2)
dist1 = float(np.dot(vec1, vec2) /
(np.linalg.norm(vec1) * np.linalg.norm(vec2)))
return dist1
##########################################################################
def index(request):
return render(request, 'index.html')
def test(request):
s1 = request.POST['num']
data = []
relatedDict = {}
if is_valid_date(s1):
importdate = s1[2:4] + s1[5:7] + s1[8:]
print(importdate)
importDict, impComDict = findImportant(importdate, 10)
for key, value in importDict.items():
posPro, nevPro = handleMood(impComDict[key], key)
sentence = str(key) + "\t\t\t评论人数:" + str(value) + \
"\t积极态度:" + str(posPro) + "\t消极态度:" + str(nevPro)
data.append(sentence)
else:
print('i am here ,do you call me', s1)
relatedDict, commentDict, CountDict = findnews(s1)
for key, value in relatedDict.items():
print(key)
posPro, nevPro = handleMood(commentDict[key], key)
sentence = str(key) + "\t日期:" + str(value) + "\t评论人数:" + str(
CountDict[key]) + "\t积极态度:" + str(posPro) + "\t消极态度:" + str(nevPro)
data.append(sentence)
return HttpResponse(json.dumps({'data': data}))
##########################################################################
'''''
if __name__ == "__main__":
keyword = '主席'
finalDict = findnews(keyword)
print(finalDict)
'''''