利用python进行QQ聊天分析

该代码使用Python对聊天记录进行处理,包括解析文件、筛选2023年后的记录、进行情感分析、计算对话间的时间差以及统计词频。通过SnowNLP进行情感分析,使用jieba进行分词,并生成词云图展示高频词汇。
摘要由CSDN通过智能技术生成
import re
import jieba
import pandas as pd
import numpy as np
from datetime import datetime
from snownlp import SnowNLP
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from datetime import timedelta


# 解析聊天记录文件
def parse_chat_records(file_path):
    with open(file_path, encoding='utf-8') as f:
        content = f.read()
    all_messages = re.findall(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) (.*?)\n(.*?)\n', content)

    # 筛选2023年之后的聊天记录
    messages = []
    for date_time, user, message in all_messages:
        year = int(date_time[:4])
        if year >= 2023:
            messages.append((date_time, user, message))

    return messages

# 情感分析
def sentiment_analysis(messages):
    sentiment_scores = []
    for _, _, message in messages:
        if message.strip():  # 检查消息是否为空
            sentiment_scores.append(SnowNLP(message).sentiments)
    return sentiment_scores

# 计算时间差值
def time_differences(messages):
    diffs = []
    for i in range(1, len(messages)):
        time1 = datetime.strptime(messages[i-1][0], '%Y-%m-%d %H:%M:%S')
        time2 = datetime.strptime(messages[i][0], '%Y-%m-%d %H:%M:%S')
        delta = time2 - time1

        # 检查时间差是否小于或等于1天
        if delta <= timedelta(hours=8):
            diffs.append(delta.seconds)
    return diffs

# 词频统计
def word_frequency(messages, stopwords_path=None):
    words = []

    # 读取停用词文件
    stopwords = set()
    if stopwords_path:
        with open(stopwords_path, encoding='utf-8') as f:
            for line in f:
                stopwords.add(line.strip())

    for _, _, message in messages:
        for word in jieba.cut(message):
            # 过滤掉长度为1的词汇和停用词
            if len(word) > 1 and word not in stopwords and word not in ['图片', '表情', '这些', '那些', '就是', '那个', '之前', '一个', '现在']:
                words.append(word)
    counter = Counter(words)
    return counter.most_common()

# 生成词云
def generate_wordcloud(words, file_path):
    if len(words) == 0:
        print('没有足够的词汇生成词云。')
        return

    wc = WordCloud(font_path='simhei.ttf', background_color='white', width=1920, height=1080)
    wc.generate_from_frequencies(dict(words))
    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')
    plt.savefig(file_path)
# 生成圆饼图
def generate_pie_chart(labels, values, file_path):
    plt.pie(values, labels=labels, autopct='%1.1f%%')
    plt.savefig(file_path)
    
    
# 主程序
def analyze_chat_records(file_path):
    messages = parse_chat_records(file_path)
    sentiment_scores = sentiment_analysis(messages)
    time_diffs = time_differences(messages)
    words = word_frequency(messages)
    generate_wordcloud(words, 'wordcloud.png')
    
        

    # 输出结果
    print('情感分析(平均分):', np.mean(sentiment_scores))
    print('每次对话之间的时间差值(平均秒数):', np.mean(time_diffs))
    print('词频统计(前10):', words[:10])



if __name__ == '__main__':
    analyze_chat_records('test.txt')

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值