1、https://download.csdn.net/download/qq_41635763/13489415或者在官网里下载wordcloud的whl文件,根据python版本下载,cp37m就是python3.7的版本
2、iphone的聊天记录通过爱思助手导出,在爱思助手里面找到QQ.db之后,使用可以打开sqllite数据库的软件打开,推荐使用navicat,打开过后根据qq号即可找到对应的聊天消息
3、导出表为csv或者txt文件
4、编写python
import imageio
import jieba
import csv
import collections
from wordcloud import WordCloud
import re
import jieba.analyse
CSV_FILE_PATH="./tb_c2cMsg_xxxxxx.csv"
STOP_WORDS_FILE_PATH="./xxxxx.txt"
def read_csv_to_dict(index) -> dict:
#编码可看导出的时候所选择的编码 utf-8/GB18030
with open(CSV_FILE_PATH, 'r', encoding='GB18030') as csvfile:
reader = csv.reader(csvfile)
column=[]
result=''
for columns in reader:
# 排除一些特殊的字符
if re.findall(r'<(.*)>(.*)',columns[index]) or re.findall(r'http(.*)',columns[index]):
result=result
else:
column+=jieba.lcut(columns[index])
return column
def analysis_sina_content():
# 读取内容列
words = read_csv_to_dict(1)
word_list=' '
word_dict={}
excludes =[]
#如果使用停用词可以考虑下面的方法
# excludes=["img","url","cn","encoding","UTF","id","通话","一个","然后","流泪","不是","没有","可以","干嘛","这个","我们","明天","回去","现在","什么","知道","就是","那个","今天","刚刚"]
# fp=open(STOP_WORDS_FILE_PATH,'r',encoding="gb18030")
# reader=fp.read();
# for word in reader:
# excludes.append(word)
# fp.close()
for word in words:
# 数据清洗,去掉无效词
if word not in excludes and len(word)>1 and len(word)<6:
word_list = word_list + ' ' + word
if (word_dict.get(word)):
word_dict[word] = word_dict[word] + 1
else:
word_dict[word] = 1
# print(word)
sort_words = sorted(word_dict.items(), key=lambda x: x[1], reverse=True)
print(sort_words[0:101]) # 输出前0-100的词
#文字可以图片的形式出现
color_mask = imageio.imread("1.png")
wordcloud = WordCloud(
background_color='white',font_path='/Library/Fonts/Arial Unicode.ttf', width=1000,height=600,collocations=False,mask=color_mask
).generate(word_list)#generate_from_frequencies(sort_words)
image_1 = wordcloud.to_image()
#报错到文件中
wordcloud.to_file("xxxxx.png")
image_1.show()
analysis_sina_content()
1.png
结果: