爬取微信好友昵称、性别、地址、签名等信息,并利用签名制作云图
#coding:utf-8
"""
微信爬虫,爬取微信好友昵称、性别、地址、签名等信息
爬取流程:启动程序后用微信扫一下生成的二维码,会生成微信好友信息excel文件(data.csv)
以及从所有微信好友签名中提取出来的关键词词云
要求:解释器:Anaconda3
安装库方法:cmd命令行中输入pip install 库名
"""
#导入库
import itchat #微信接口
import re
import jieba
import matplotlib.pyplot as plt
from wordcloud import WordCloud,ImageColorGenerator
import numpy as np
import PIL.Image as Image
from pandas import DataFrame
#登录微信
itchat.login()
friends=itchat.get_friends(update=True)[0:]
#统计男女生数量,1表示男生,2表示女生,没有性别信息为0
male=female=other=0
for i in friends[1:]:
sex=i["Sex"]
if sex==1:
male+=1
elif sex==2:
female+=1
else:
other+=1
total=len(friends[1:])
print("男性好友:%.2f%%"%(float(male)/total*100)+"\n"+
"女性好友:%.2f%%"%(float(female)/total*100)+"\n"+
"其他:%.2f%%"%(float(other)/total*100))
def get_var(var):
variable=[]
for i in friends:
value=i[var]
variable.append(value)
return variable
#统计昵称等信息
NickName=get_var("NickName")
Sex=get_var("Sex")
Province=get_var("Province")
City=get_var("City")
Signature=get_var("Signature")
data={'NickName':NickName,'Sex':Sex,'Province':Province,'City':City,'Signature':Signature}
frame=DataFrame(data)
#导出到excel文件
frame.to_csv('data.csv',index=True,encoding="utf-8")
#根据所有好友签名获取关键词并生成词云
siglist=[]
for i in friends:
signature=i["Signature"].strip().replace("span","").replace("class","").replace("emoji","")
rep=re.compile("1f\d+\w*|[<>/=]")
signature=rep.sub("",signature)
siglist.append(signature)
text="".join(siglist)
wordlist=jieba.cut(text,cut_all=True)
word_space_split=" ".join(wordlist)
#生成词云
coloring=np.array(Image.open("bg.jpg"))
my_wordcloud=WordCloud(background_color="white",max_words=2000,mask=coloring,max_font_size=60,random_state=42,scale=2,
font_path="simhei.ttf").generate(word_space_split)
image_colors=ImageColorGenerator(coloring)
plt.imshow(my_wordcloud.recolor(color_func=image_colors))
plt.imshow(my_wordcloud)
plt.axis("off")
plt.show()
下载完整版见:https://download.csdn.net/download/qq_37913997/10711595
交流QQ:2422035338