数据下载地址:https://download.csdn.net/download/weixin_43906500/14141832
通过对json数据进行处理,利用pyecharts库进行可视化展示
1.每月发帖数及关键词变化趋势展示
代码如下所示
import json
from collections import Counter
from pyecharts import Bar
import jieba
#去除停用词
stopwords = [line.strip() for line in open("stopword.txt", 'r',encoding="utf-8").readlines()]
stopwords_other = ['\n',' ']
stopwords.extend(stopwords_other)
with open("data.json",'r',encoding='utf-8') as load_f:
load_dict = json.load(load_f)
list = []
dic_word = {}
for dic in load_dict:
time = dic['time']
loc = time.rfind('-')
list.append(time[0:7])
article = dic['article']
seg_list = jieba.lcut(article)
month = time[0:7]
if month in dic_word.keys():
dic_word[month].extend(seg_list)
else:
dic_word[month] = []
dic = dict(Counter(list))
d = sorted(dic.items(), key=lambda d:d[0])
key_word_used = []
key_word = []
for k in d:
m = k[0]
list = [i for i in dic_word[m] if i not in stopwords]
word_count = Counter(list)
word_list = word_count.most_common(12)
for i in word_list:
if(i[0] not in key_word_used):
key_word.append(i[0])
key_word_used.append(i[0])
break
columns = [i[0] for i in d]
data = [i[1] for i in d]
col = []
for i in range(len(columns)):
c1 =