一、效果预览
年度报告
数据可视化
每月消息趋势
消息数量
饼图
一周内消息分布
一天中消息分布
每月信息数量
热力图1
热力图2
二、工具准备
1、python环境
https://www.python.org/ftp/python/3.12.1/python-3.12.1-embed-amd64.zip
2、pycharm工具
PyCharm:JetBrains 面向专业开发人员的 Python IDE
3、MemoTrace
GitHub - LC044/WeChatMsg: 提取微信聊天记录,将其导出成HTML、Word、CSV文档永久保存,对聊天记录进行分析生成年度聊天报告
点开链接后,右下角Releases点击
然后滑到最下方,将exe文件下载到电脑上
需要科学上网,没有的话私信发给你
4、微信电脑版Windows(升级到最新版)
三、数据处理
1、微信记录迁移
首先确保手机和电脑处于同一无线网内,然后手机打开微信——我——设置——通用设置——聊天记录迁移与备份——迁移——迁移到电脑,将想要处理的聊天记录迁移到电脑上
2、微信记录导出
迁移完聊天记录后,重启微信,然后以管理员身份运行刚才下载的exe文件
点击获取信息
然后点击开始启动
点击左侧好友,找到想要导出的好友
这里就有一些相应的功能
3、数据可视化
1、预处理
首先需要在上一步,导出CSV格式聊天记录
需要导入的库
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.font_manager import FontProperties
读取聊天记录文件
df = pd.read_csv(r"C:\Users\x\Desktop\data\聊天记录\****_utf8.csv", sep=',')
将C:\Users\x\Desktop\data\聊天记录\****_utf8.csv修改为你导出的聊天记录文件路径
2、每月消息趋势
df['month'] = pd.to_datetime(df['StrTime']).dt.month
month_counts = df['month'].value_counts().sort_index()
scaled_sizes = month_counts * 0.08
plt.figure(facecolor='white')
plt.title('Figure 1: Monthly Trends in Message Counts', fontname='Times New Roman',fontsize=22)
plt.xlabel('Month', fontname='Times New Roman',fontsize=20)
plt.ylabel('Messages', fontname='Times New Roman',fontsize=20)
plt.xticks(range(1, 13), fontname='Times New Roman',fontsize=15)
plt.yticks(fontname='Times New Roman',fontsize=15)
plt.scatter(month_counts.index, month_counts.values, color='#80BCBD', marker='o',s=scaled_sizes)
plt.grid(True, linestyle='solid', linewidth=1, color='lightgrey',axis='y')
fig = plt.gcf()
fig.set_size_inches(15,8)
fig.savefig('chat_month.png',dpi=100)
plt.show()
3、消息数量
df['month_XiaoSong'] = pd.to_datetime(df[df['IsSender'] == 1]['StrTime']).dt.month
df['month_TiTi'] = pd.to_datetime(df[df['IsSender'] == 0]['StrTime']).dt.month
labels = ['TiTi', 'XiaoSong']
colors = ['#FFC0D9', '#8ACDD7']
month_counts_xiaosong = df['month_XiaoSong'].value_counts().sort_index()
month_counts_titi = df['month_TiTi'].value_counts().sort_index()
max_xiaosong = month_counts_xiaosong.max()
max_month_xiaosong = month_counts_xiaosong.idxmax()
max_titi = month_counts_titi.max()
max_month_titi = month_counts_titi.idxmax()
month_counts_titi.plot(kind='line', marker='o', label='TiTi',color='#FFC0D9')
month_counts_xiaosong.plot(kind='line', marker='o', label='XiaoSong',color='#8ACDD7')
plt.annotate(f'Max: {max_titi}', xy=(max_month_titi, max_titi), xytext=(max_month_titi + 0.5, max_titi + 10),
arrowprops=dict(facecolor='black', arrowstyle='->'),
fontsize=18,fontname='Times New Roman')
plt.annotate(f'Max: {max_xiaosong}', xy=(max_month_xiaosong, max_xiaosong), xytext=(max_month_xiaosong + 0.4, max_xiaosong + 10),
arrowprops=dict(facecolor='black', arrowstyle='->'),
fontsize=18,fontname='Times New Roman')
plt.title('Figure 2: Trends in Monthly Message Counts', fontname='Times New Roman',fontsize=22)
plt.xlabel('Month', fontname='Times New Roman',fontsize=20)
plt.ylabel('Messages', fontname='Times New Roman',fontsize=20)
plt.xticks(range(1, 13), fontname='Times New Roman',fontsize=15)
plt.yticks(fontname='Times New Roman',fontsize=15)
plt.grid(True, linestyle='solid', linewidth=0.5, color='lightgrey')
font_prop = FontProperties(family='Times New Roman')
plt.legend(labels, loc="best",prop=font_prop)
plt.tight_layout()
fig = plt.gcf()
fig.set_size_inches(15,8)
fig.savefig('chat_plot.png',dpi=100)
plt.show()
4、饼图
value_counts = df['IsSender'].value_counts()
percentages = 100. * value_counts / value_counts.sum()
labels = ['XiaoSong', 'TiTi']
colors = ['#8ACDD7', '#FFC0D9']
explode = (0.1, 0)
plt.figure(figsize=(8, 8))
def func(pct, allvals):
absolute = int(pct/100.*np.sum(allvals))
return f"{pct:.1f}%\n({absolute:d})"
plt.pie(value_counts, explode=explode, labels=labels, colors=colors,
autopct=lambda pct: func(pct, value_counts), shadow=True, startangle=80, textprops={'style':'italic' , 'fontsize': 18})
plt.title('Figure 3: Distribution of Messages: TiTi vs. XiaoSong', fontname='Times New Roman',fontsize=22)
font_prop = FontProperties(family='Times New Roman')
plt.legend(labels, loc="best",prop=font_prop)
plt.axis('equal')
fig = plt.gcf()
fig.set_size_inches(15,8)
fig.savefig('chat_pie',dpi=100)
plt.show()
5、一天内消息分布
dates = pd.to_datetime(df['StrTime'])
weekdays = dates.dt.day_name()
weekday_counts = weekdays.value_counts()
colors = ['#FF90BC', '#FFC0D9', '#F9F9E0', '#8ACDD7', '#EEE7DA', '#88AB8E', '#AFC8AD']
explode = (0.1, 0, 0, 0, 0, 0, 0)
plt.figure(figsize=(8, 8))
plt.pie(weekday_counts, explode=explode, labels=weekday_counts.index, colors=colors, autopct='%1.1f%%', shadow=True, startangle=90,textprops={'fontsize': 18})
plt.title('Figure 4: The Distribution of Messages during the Week', fontname='Times New Roman',fontsize=22)
font_prop = FontProperties(family='Times New Roman')
plt.legend(labels=weekday_counts.index, loc="best",prop=font_prop)
plt.axis('equal')
fig = plt.gcf()
fig.set_size_inches(15,8)
fig.savefig('chat_pie_2',dpi=100)
plt.show()
6、一天内信息分布
df['hour'] = pd.to_datetime(df['StrTime']).dt.hour
plt.title('Figure 5: The Distribution of Messages throughout the Day', fontname='Times New Roman',fontsize=18)
plt.xlabel('Time', fontname='Times New Roman',fontsize=18)
plt.ylabel('Number of messages', fontname='Times New Roman',fontsize=18)
sns.set_style('darkgrid')
sns.histplot(df['hour'],bins=24,kde=True, color='lightcoral')
plt.xticks(np.arange(0, 25, 1.0), fontname='Times New Roman',fontsize=15)
plt.yticks(fontname='Times New Roman',fontsize=15)
fig = plt.gcf()
fig.set_size_inches(15,8)
fig.savefig('chat_time.png',dpi=100)
plt.show()
7、每月信息数量
df['Date'] = pd.to_datetime(df['StrTime'])
df.set_index('Date', inplace=True)
monthly_counts = {}
for month in range(1, 13):
month_str = f'2023-{month:02d}'
month_df = df.loc[month_str]
daily_count = month_df.resample('D').size()
monthly_counts[month_str] = daily_count
plt.figure(figsize=(12, 8))
labels = ['2023-01', '2023-02', '2023-03', '2023-04', '2023-05', '2023-06', '2023-07', '2023-08', '2023-09', '2023-10', '2023-11', '2023-12']
colors = ['#FF9843', '#3468C0', '#D63484', '#402B3A','#1f77b4', '#ff7f0e', '#2ca02c', '#d62728','#9467bd', '#3468C0', '#8c564b', '#17becf']
for idx, (month, count_data) in enumerate(monthly_counts.items()):
plt.plot(count_data.index.day, count_data.values, marker='o', linestyle='-', color=colors[idx], label=month)
max_value = count_data.max()
max_day = count_data.idxmax().day
plt.annotate(f'Max: {max_value}', xy=(max_day, max_value), xytext=(max_day + 1.2, max_value + 1),
arrowprops=dict(facecolor='black', arrowstyle='->'),
fontsize=18, fontname='Times New Roman')
plt.title('Figure 6: The Number of Messages Distributed Each Month from January 2023 to December 2023',
fontname='Times New Roman', fontsize=22)
plt.xlabel('Day', fontname='Times New Roman', fontsize=20)
plt.ylabel('Messages', fontname='Times New Roman', fontsize=20)
plt.xticks(range(1, 32), fontname='Times New Roman', fontsize=15) # 设置x轴标签
plt.yticks(fontname='Times New Roman', fontsize=15)
font_prop = FontProperties(family='Times New Roman')
plt.legend(labels, loc="best", prop=font_prop)
plt.grid(True, linestyle='solid', linewidth=0.5, color='lightgrey') # 添加网格线
plt.tight_layout()
fig = plt.gcf()
fig.set_size_inches(15, 8)
fig.savefig('chat_plot_2.png', dpi=100)
plt.show()
8、热力图1
df['Date'] = pd.to_datetime(df['StrTime']).dt.date
daily_counts = df['Date'].value_counts().reset_index()
daily_counts.columns = ['Date', 'Chat_Count']
heatmap_data = daily_counts.pivot_table(index='Date', values='Chat_Count', aggfunc='sum')
plt.figure(figsize=(14, 10))
sns.heatmap(heatmap_data, cmap="Reds",linewidths=0.5, linecolor='gray',xticklabels=False)
plt.title('Figure 7: Chat Counts Heatmap', fontname='Times New Roman',fontsize=22)
plt.ylabel('Date', fontname='Times New Roman',fontsize=20)
plt.yticks(fontname='Times New Roman')
plt.tight_layout()
fig = plt.gcf()
fig.set_size_inches(15,8)
fig.savefig('heatmap_1.png',dpi=100)
plt.show()
9、热力图2
df['Date'] = pd.to_datetime(df['StrTime'])
df['Month'] = df['Date'].dt.month
heatmap_data = df.pivot_table(index=df['Date'].dt.day, columns='Month', values='StrTime', aggfunc='count')
sns.heatmap(heatmap_data, cmap="GnBu", linewidths=0.5, linecolor='gray')
plt.title('Figure 8: Chat Counts Heatmap by Month', fontname='Times New Roman',fontsize=22)
plt.xlabel('Month', fontname='Times New Roman',fontsize=20)
plt.ylabel('Day of Month', fontname='Times New Roman',fontsize=20)
plt.xticks(fontname='Times New Roman',fontsize=15)
plt.yticks(fontname='Times New Roman',fontsize=15)
plt.tight_layout()
fig = plt.gcf()
fig.set_size_inches(15,8)
fig.savefig('heatmap_2.png',dpi=100)
plt.show()
4、完整代码
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.font_manager import FontProperties
df = pd.read_csv(r"C:\Users\x\Desktop\data\聊天记录\****_utf8.csv", sep=',')
#每月消息数量趋势
df['month'] = pd.to_datetime(df['StrTime']).dt.month
month_counts = df['month'].value_counts().sort_index()
scaled_sizes = month_counts * 0.08
plt.figure(facecolor='white')
plt.title('Figure 1: Monthly Trends in Message Counts', fontname='Times New Roman',fontsize=22)
plt.xlabel('Month', fontname='Times New Roman',fontsize=20)
plt.ylabel('Messages', fontname='Times New Roman',fontsize=20)
plt.xticks(range(1, 13), fontname='Times New Roman',fontsize=15)
plt.yticks(fontname='Times New Roman',fontsize=15)
plt.scatter(month_counts.index, month_counts.values, color='#80BCBD', marker='o',s=scaled_sizes)
plt.grid(True, linestyle='solid', linewidth=1, color='lightgrey',axis='y')
fig = plt.gcf()
fig.set_size_inches(15,8)
fig.savefig('chat_month.png',dpi=100)
plt.show()
#每月消息数量趋势
df['month_XiaoSong'] = pd.to_datetime(df[df['IsSender'] == 1]['StrTime']).dt.month
df['month_TiTi'] = pd.to_datetime(df[df['IsSender'] == 0]['StrTime']).dt.month
labels = ['TiTi', 'XiaoSong']
colors = ['#FFC0D9', '#8ACDD7']
month_counts_xiaosong = df['month_XiaoSong'].value_counts().sort_index()
month_counts_titi = df['month_TiTi'].value_counts().sort_index()
max_xiaosong = month_counts_xiaosong.max()
max_month_xiaosong = month_counts_xiaosong.idxmax()
max_titi = month_counts_titi.max()
max_month_titi = month_counts_titi.idxmax()
month_counts_titi.plot(kind='line', marker='o', label='TiTi',color='#FFC0D9')
month_counts_xiaosong.plot(kind='line', marker='o', label='XiaoSong',color='#8ACDD7')
plt.annotate(f'Max: {max_titi}', xy=(max_month_titi, max_titi), xytext=(max_month_titi + 0.5, max_titi + 10),
arrowprops=dict(facecolor='black', arrowstyle='->'),
fontsize=18,fontname='Times New Roman')
plt.annotate(f'Max: {max_xiaosong}', xy=(max_month_xiaosong, max_xiaosong), xytext=(max_month_xiaosong + 0.4, max_xiaosong + 10),
arrowprops=dict(facecolor='black', arrowstyle='->'),
fontsize=18,fontname='Times New Roman')
plt.title('Figure 2: Trends in Monthly Message Counts', fontname='Times New Roman',fontsize=22)
plt.xlabel('Month', fontname='Times New Roman',fontsize=20)
plt.ylabel('Messages', fontname='Times New Roman',fontsize=20)
plt.xticks(range(1, 13), fontname='Times New Roman',fontsize=15)
plt.yticks(fontname='Times New Roman',fontsize=15)
plt.grid(True, linestyle='solid', linewidth=0.5, color='lightgrey')
font_prop = FontProperties(family='Times New Roman')
plt.legend(labels, loc="best",prop=font_prop)
plt.tight_layout()
fig = plt.gcf()
fig.set_size_inches(15,8)
fig.savefig('chat_plot.png',dpi=100)
plt.show()
#饼图
value_counts = df['IsSender'].value_counts()
percentages = 100. * value_counts / value_counts.sum()
labels = ['XiaoSong', 'TiTi']
colors = ['#8ACDD7', '#FFC0D9']
explode = (0.1, 0)
plt.figure(figsize=(8, 8))
def func(pct, allvals):
absolute = int(pct/100.*np.sum(allvals))
return f"{pct:.1f}%\n({absolute:d})"
plt.pie(value_counts, explode=explode, labels=labels, colors=colors,
autopct=lambda pct: func(pct, value_counts), shadow=True, startangle=80, textprops={'style':'italic' , 'fontsize': 18})
plt.title('Figure 3: Distribution of Messages: TiTi vs. XiaoSong', fontname='Times New Roman',fontsize=22)
font_prop = FontProperties(family='Times New Roman')
plt.legend(labels, loc="best",prop=font_prop)
plt.axis('equal')
fig = plt.gcf()
fig.set_size_inches(15,8)
fig.savefig('chat_pie',dpi=100)
plt.show()
#一周内消息的分布
dates = pd.to_datetime(df['StrTime'])
weekdays = dates.dt.day_name()
weekday_counts = weekdays.value_counts()
colors = ['#FF90BC', '#FFC0D9', '#F9F9E0', '#8ACDD7', '#EEE7DA', '#88AB8E', '#AFC8AD']
explode = (0.1, 0, 0, 0, 0, 0, 0)
plt.figure(figsize=(8, 8))
plt.pie(weekday_counts, explode=explode, labels=weekday_counts.index, colors=colors, autopct='%1.1f%%', shadow=True, startangle=90,textprops={'fontsize': 18})
plt.title('Figure 4: The Distribution of Messages during the Week', fontname='Times New Roman',fontsize=22)
font_prop = FontProperties(family='Times New Roman')
plt.legend(labels=weekday_counts.index, loc="best",prop=font_prop)
plt.axis('equal')
fig = plt.gcf()
fig.set_size_inches(15,8)
fig.savefig('chat_pie_2',dpi=100)
plt.show()
#一天中的消息分布
df['hour'] = pd.to_datetime(df['StrTime']).dt.hour
plt.title('Figure 5: The Distribution of Messages throughout the Day', fontname='Times New Roman',fontsize=18)
plt.xlabel('Time', fontname='Times New Roman',fontsize=18)
plt.ylabel('Number of messages', fontname='Times New Roman',fontsize=18)
sns.set_style('darkgrid')
sns.histplot(df['hour'],bins=24,kde=True, color='lightcoral')
plt.xticks(np.arange(0, 25, 1.0), fontname='Times New Roman',fontsize=15)
plt.yticks(fontname='Times New Roman',fontsize=15)
fig = plt.gcf()
fig.set_size_inches(15,8)
fig.savefig('chat_time.png',dpi=100)
plt.show()
#每个月消息数量
df['Date'] = pd.to_datetime(df['StrTime'])
df.set_index('Date', inplace=True)
monthly_counts = {}
for month in range(1, 13):
month_str = f'2023-{month:02d}'
month_df = df.loc[month_str]
daily_count = month_df.resample('D').size()
monthly_counts[month_str] = daily_count
plt.figure(figsize=(12, 8))
labels = ['2023-01', '2023-02', '2023-03', '2023-04', '2023-05', '2023-06', '2023-07', '2023-08', '2023-09', '2023-10', '2023-11', '2023-12']
colors = ['#FF9843', '#3468C0', '#D63484', '#402B3A','#1f77b4', '#ff7f0e', '#2ca02c', '#d62728','#9467bd', '#3468C0', '#8c564b', '#17becf']
for idx, (month, count_data) in enumerate(monthly_counts.items()):
plt.plot(count_data.index.day, count_data.values, marker='o', linestyle='-', color=colors[idx], label=month)
max_value = count_data.max()
max_day = count_data.idxmax().day
plt.annotate(f'Max: {max_value}', xy=(max_day, max_value), xytext=(max_day + 1.2, max_value + 1),
arrowprops=dict(facecolor='black', arrowstyle='->'),
fontsize=18, fontname='Times New Roman')
plt.title('Figure 6: The Number of Messages Distributed Each Month from January 2023 to December 2023',
fontname='Times New Roman', fontsize=22)
plt.xlabel('Day', fontname='Times New Roman', fontsize=20)
plt.ylabel('Messages', fontname='Times New Roman', fontsize=20)
plt.xticks(range(1, 32), fontname='Times New Roman', fontsize=15) # 设置x轴标签
plt.yticks(fontname='Times New Roman', fontsize=15)
font_prop = FontProperties(family='Times New Roman')
plt.legend(labels, loc="best", prop=font_prop)
plt.grid(True, linestyle='solid', linewidth=0.5, color='lightgrey') # 添加网格线
plt.tight_layout()
fig = plt.gcf()
fig.set_size_inches(15, 8)
fig.savefig('chat_plot_2.png', dpi=100)
plt.show()
#热力图1
df['Date'] = pd.to_datetime(df['StrTime']).dt.date
daily_counts = df['Date'].value_counts().reset_index()
daily_counts.columns = ['Date', 'Chat_Count']
heatmap_data = daily_counts.pivot_table(index='Date', values='Chat_Count', aggfunc='sum')
plt.figure(figsize=(14, 10))
sns.heatmap(heatmap_data, cmap="Reds",linewidths=0.5, linecolor='gray',xticklabels=False)
plt.title('Figure 7: Chat Counts Heatmap', fontname='Times New Roman',fontsize=22)
plt.ylabel('Date', fontname='Times New Roman',fontsize=20)
plt.yticks(fontname='Times New Roman')
plt.tight_layout()
fig = plt.gcf()
fig.set_size_inches(15,8)
fig.savefig('heatmap_1.png',dpi=100)
plt.show()
#热力图2
df['Date'] = pd.to_datetime(df['StrTime'])
df['Month'] = df['Date'].dt.month
heatmap_data = df.pivot_table(index=df['Date'].dt.day, columns='Month', values='StrTime', aggfunc='count')
sns.heatmap(heatmap_data, cmap="GnBu", linewidths=0.5, linecolor='gray')
plt.title('Figure 8: Chat Counts Heatmap by Month', fontname='Times New Roman',fontsize=22)
plt.xlabel('Month', fontname='Times New Roman',fontsize=20)
plt.ylabel('Day of Month', fontname='Times New Roman',fontsize=20)
plt.xticks(fontname='Times New Roman',fontsize=15)
plt.yticks(fontname='Times New Roman',fontsize=15)
plt.tight_layout()
fig = plt.gcf()
fig.set_size_inches(15,8)
fig.savefig('heatmap_2.png',dpi=100)
plt.show()