闲来无事,整理了一下之前做过的项目,这个项目为基于python开发的微博舆情情感分析
1.使用的框架及包
jieba
Django
snownlp
sqlite
pandas
tqdm
scikit_learn
pyecharts
scrapy
query
Pillow
pymongo
2.系统功能及实现方法
2.1登录界面
界面展示:
这个系统登陆界面有登录和注册功能,对应的实现代码分别为:
注册:
def register(request):
if request.method == "POST":
name, tel, pwd = request.POST.get('name'), request.POST.get('tel'), request.POST.get('pwd')
print(name, tel, pwd)
if User.objects.filter(tel=tel):
msg = "你已经有账号了,请登录"
else:
User.objects.create(name=name, tel=tel, password=pwd)
msg = "注册成功,请登录!"
return render(request, 'login.html', locals())
else:
msg = ""
return render(request, 'register.html', locals())
登录:
def login(request):
if request.method == "POST":
tel, pwd = request.POST.get('tel'), request.POST.get('pwd')
if User.objects.filter(tel=tel, password=pwd):
obj = redirect('/') #将用户重定向到网站的首页。
obj.set_cookie('uid', User.objects.filter(tel=tel, password=pwd)[0].id, max_age=60 * 60 * 24)
return obj
else:
msg = "用户信息错误,请重新输入!!"
return render(request, 'login.html', locals())
else:
return render(request, 'login.html', locals())
2.2功能界面-话题选择
可以选择当前最热话题,通过下拉菜单进行选择。
def index(request):
# 话题列表
topic_raw = [item.topic for item in WeiBo.objects.all() if item.topic]
topic_list = []
for item in topic_raw:
topic_list.extend(item.split(','))
topic_list = list(set(topic_list))
# yon用户信息
uid = int(request.COOKIES.get('uid', -1))
if uid != -1:
username = User.objects.filter(id=uid)[0].name
# 得到话题
if 'key' not in request.GET:
key = topic_list[0]
raw_data = WeiBo.objects.all()
else:
key = request.GET.get('key')
raw_data = WeiBo.objects.filter(topic__contains=key)
# 分页
if 'page' not in request.GET:
page = 1
else:
page = int(request.GET.get('page'))
data_list = raw_data[(page - 1) * 20: page * 20]
return render(request, 'index.html', locals())
2.3 正-负向情感分类可视化界面
ef fenlei(request):
from snownlp import SnowNLP
# print(s.sentiments)
for item in tqdm(WeiBo.objects.all()):
emotion = '正向' if SnowNLP(item.content).sentiments >0.45 else '负向'
# 对每个对象的content字段进行情感分析(这里使用了SnowNLP库),根据情感分析结果设置情感字段的值为'正向'或者'负向'
WeiBo.objects.filter(id=item.id).update(emotion=emotion)
return JsonResponse({'status':1,'msg':'操作成功'} )
2.4 Kmeans聚类分析
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
def kmeansPlot(request):
uid = int(request.COOKIES.get('uid', -1))
if uid != -1:
username = User.objects.filter(id=uid)[0].name
# 聚类个数
if 'num' in request.GET:
num = int(request.GET.get('num'))
else:
num = 2
### 训练
# 清洗文本
clean_data = [item.content for item in WeiBo.objects.all()]
clean_data = [clearTxt(item) for item in clean_data]
clean_data = [sent2word(item) for item in clean_data]
# 该类会将文本中的词语转换为词频矩阵,矩阵元素a[i][j] 表示j词在i类文本下的词频
vectorizer = CountVectorizer(max_features=20000)
tfidf_matrix = X_tfidf.toarray()
# 聚成5类
from sklearn.cluster import KMeans
clf = KMeans(n_clusters=num)
result_list = clf.fit(tfidf_matrix)
result_list = list(clf.predict(tfidf_matrix))
div_id_list = [f'container{i + 1}' for i in range(num)]
data_list = []
for label, name in enumerate(div_id_list):
tmp = {'id': name, 'data': [], 'title': f'第{label + 1}类'}
# 汇总
tmp_text_list = ''
for la, text in zip(result_list, clean_data):
if la == label:
tmp_text_list += ' ' + text
tmp_text_list = [item for item in tmp_text_list.split(' ') if item.strip() != ' ']
# 得到前30
rank_Data = [
{
'value': tmp_text_list.count(item),
'name': item
}
for item in set(tmp_text_list)
]
rank_Data = sorted(rank_Data, key=lambda x: x['value'], reverse=True)[:100]
tmp['data'] = rank_Data
data_list.append(tmp)
return render(request, 'kmeansPlot.html', locals())