python数据分析微信_Python——微信数据分析

最新推荐文章于 2024-04-30 18:24:33 发布

weixin_39565390

最新推荐文章于 2024-04-30 18:24:33 发布

阅读量164

点赞数

文章标签： python数据分析微信

数据可视化：http://echarts.baidu.com/echarts2/doc/example.html

import re

from wxpy import *

import jieba

import numpy

import pandas as pd

import matplotlib.pyplot as plt

from scipy.misc import imread

from wordcloud import WordCloud, ImageColorGenerator

#初始化机器人bot

bot = Bot()

# 通过扫码的方式获取你的所有好友信息

My_friends = bot.friends()

print(type(My_friends))

def friends_count():

sex_dict = {'male':0,'female':0}

for friend in My_friends:

if friend.sex ==1:

sex_dict['male'] +=1

elif friend.sex ==2:

sex_dict['female'] +=1

print(sex_dict)

def province_count():

# 使用一个字典统计各省好友数量

province_dict = {'北京': 0, '上海': 0, '天津': 0, '重庆': 0,

'河北': 0, '山西': 0, '吉林': 0, '辽宁': 0, '黑龙江': 0,

'陕西': 0, '甘肃': 0, '青海': 0, '山东': 0, '福建': 0,

'浙江': 0, '台湾': 0, '河南': 0, '湖北': 0, '湖南': 0,

'江西': 0, '江苏': 0, '安徽': 0, '广东': 0, '海南': 0,

'四川': 0, '贵州': 0, '云南': 0,

'内蒙古': 0, '新疆': 0, '宁夏': 0, '广西': 0, '西藏': 0,

'香港': 0, '澳门': 0}

# 统计省份

for friend in My_friends:

if friend.province in province_dict.keys():

province_dict[friend.province] += 1

# 为了方便数据的呈现，生成JSON Array格式数据

data = []

for key, value in province_dict.items():

data.append({'name': key, 'value': value})

print(data)

def write_txt_file(path, txt):

'''

写入txt文本

'''

with open(path, 'a', encoding='gb18030', newline='') as f:

f.write(txt)

def read_txt_file(path):

'''

读取txt文本

'''

with open(path, 'r', encoding='gb18030', newline='') as f:

return f.read()

def show_signature(My_friends):

# 统计签名

for friend in My_friends:

# 对数据进行清洗，将标点符号等对词频统计造成影响的因素剔除

pattern = re.compile(r'[一-龥]+')

filterdata = re.findall(pattern, friend.signature)

write_txt_file('signatures.txt', ''.join(filterdata))

# 读取文件

content = read_txt_file('signatures.txt')

segment = jieba.lcut(content)

words_df = pd.DataFrame({'segment':segment})

# 读取stopwords

stopwords = pd.read_csv("stopwords.txt",index_col=False,quoting=3,sep=" ",names=['stopword'],encoding='utf-8')

words_df = words_df[~words_df.segment.isin(stopwords.stopword)]

print(words_df)

words_stat = words_df.groupby(by=['segment'])['segment'].agg({"计数":numpy.size})

words_stat = words_stat.reset_index().sort_values(by=["计数"],ascending=False)

# 设置词云属性

color_mask = imread('background.jfif')

wordcloud = WordCloud(font_path="simhei.ttf", # 设置字体可以显示中文

background_color="white", # 背景颜色

max_words=100, # 词云显示的最大词数

mask=color_mask, # 设置背景图片

max_font_size=100, # 字体最大值

random_state=42,

width=1000, height=860, margin=2,# 设置图片默认的大小,但是如果使用背景图片的话, # 那么保存的图片大小将会按照其大小保存,margin为词语边缘距离

)

# 生成词云, 可以用generate输入全部文本,也可以我们计算好词频后使用generate_from_frequencies函数

word_frequence = {x[0]:x[1]for x in words_stat.head(100).values}

print(word_frequence)

word_frequence_dict = {}

for key in word_frequence:

word_frequence_dict[key] = word_frequence[key]

wordcloud.generate_from_frequencies(word_frequence_dict)

# 从背景图片生成颜色值

image_colors = ImageColorGenerator(color_mask)

# 重新上色

wordcloud.recolor(color_func=image_colors)

# 保存图片

wordcloud.to_file('output.png')

plt.imshow(wordcloud)

plt.axis("off")

plt.show()

show_signature(My_friends)

weixin_39565390

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python数据分析微信_Python——微信数据分析

数据可视化：http://echarts.baidu.com/echarts2/doc/example.htmlimport refrom wxpy import *import jiebaimport numpyimport pandas as pdimport matplotlib.pyplot as pltfrom scipy.misc import imreadfrom wordcloud...
复制链接

扫一扫

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。