问题:根据上课讲的吴迪老师的微信好友数据分析,请选择你的微信好友数据,或者你的qq好友数据,或者你的班级同学数据,或者其他你能获取的其他人物数据
作为分析对象。然后利用上课讲的技术,但不限于,对其进行数据分析。比如分析微信好友数据,可以可视化好友男女比例分布,可视化省份来源,可视化签名的情感强度值等等。
文档
1.柱状图学生省份和城市分析
from pyecharts.charts import Bar
from pyecharts import options as opts
import xlrd
def getProvince(filename):
Province1 = {}
Province2 = {}
Province3 = {}
a = []
flag = 0
for filename_kid in filename:
flag += 1
rd = xlrd.open_workbook(filename_kid)
all_sheet = rd.sheet_names()
for sheet_kid in all_sheet:
table = rd.sheet_by_name(sheet_kid)
nrows = table.nrows
for i in range(nrows - 1):
province = table.cell(i + 1, 6).value
if flag == 1:
Province1[province] = Province1.get(province, 0) + 1
elif flag == 2:
Province2[province] = Province2.get(province, 0) + 1
else:
Province3[province] = Province3.get(province, 0) + 1
if flag == 1:
a.append(Province1)
elif flag == 2:
a.append(Province2)
else:
a.append(Province3)
return a
def Provincepyechart(province_list):
province_dict = ['黑龙江', '四川', '天津', '内蒙古', '江苏', '海南', '辽宁', '陕西', '浙江', '河南', '山西', '湖北', '广西', '重庆', '吉林', '贵州',
'云南', '河北', '山东']
province1_dict = province_list[0]
province2_dict = province_list[1]
province3_dict = province_list[2]
province1 = []
province2 = []
province3 = []
value1 = []
value2 = []
value3 = []
for key, value in province1_dict.items():
province1.append(key)
value1.append(value)
for key, value in province2_dict.items():
province2.append(key)
value2.append(value)
for key, value in province3_dict.items():
province3.append(key)
value3.append(value)
print(province1)
print(province2)
print(province3)
bar = (
Bar()
.add_xaxis(province_dict)
.add_yaxis('软件181学生', value1)
.add_yaxis('软件191学生', value2)
.add_yaxis('软件201学生', value3)
.set_global_opts(title_opts=opts.TitleOpts(title="18,19,20级学生省份分析"),
xaxis_opts=opts.AxisOpts(
axislabel_opts=opts.LabelOpts(
rotate=45, # Optional[Numeric]
)
)
))
bar.render('181,191,201级学生省份分析.html')
def getCity(filename):
City1 = {}
City2 = {}
City3 = {}
a = []
flag = 0
for filename_kid in filename:
flag += 1
rd = xlrd.open_workbook(filename_kid)
all_sheet = rd.sheet_names()
for sheet_kid in all_sheet:
table = rd.sheet_by_name(sheet_kid)
nrows = table.nrows
for i in range(nrows - 1):
City = table.cell(i + 1, 7).value
if flag == 1:
City1[City] = City1.get(City, 0) + 1
elif flag == 2:
City2[City] = City2.get(City, 0) + 1
else:
City3[City] = City3.get(City, 0) + 1
if flag == 1:
a.append(City1)
elif flag == 2:
a.append(City2)
else:
a.append(City3)
return a
def cityPyechart(city_list):
city_dict = ['鸡西', '泰安', '阜阳', '北京', '兰州', '深圳', '阳春', '开封', '周口', '哈尔滨', '齐齐哈尔', '鹤岗', '大庆', '佳木斯', '宜昌', '怀化',
'常德', '德州', '临沂', '朔州', '临汾', '天津', '温州市', '重庆', '上饶', '金华', '石家庄', '巴中', '咸阳', '信阳']
city1_dict = city_list[0]
city2_dict = city_list[1]
city3_dict = city_list[2]
city1 = []
city2 = []
city3 = []
value1 = []
value2 = []
value3 = []
for key, value in city1_dict.items():
city1.append(key)
value1.append(value)
for key, value in city2_dict.items():
city2.append(key)
value2.append(value)
for key, value in city3_dict.items():
city3.append(key)
value3.append(value)
print(city1)
print(city2)
print(city3)
bar = (
Bar()
.add_xaxis(city_dict)
.add_yaxis('软件181学生', value1)
.add_yaxis('软件191学生', value2)
.add_yaxis('软件201学生', value3)
.set_global_opts(title_opts=opts.TitleOpts(title="18,19,20级学生省份分析"),
xaxis_opts=opts.AxisOpts(
axislabel_opts=opts.LabelOpts(
rotate=45,
)
)
))
bar.render('181,191,201级学生城市分析.html')
province_list = getProvince(['软件181学生详细名单.xls', '软件191学生详细名单.xls', '软件201学生详细名单.xls'])
Provincepyechart(province_list)
City_list = getCity(['软件181学生详细名单.xls', '软件191学生详细名单.xls', '软件201学生详细名单.xls'])
cityPyechart(City_list)
2.学生情感分析
import re
import jieba
import xlrd
from snownlp import SnowNLP
import matplotlib.pyplot as plt
import numpy as np
jieba.setLogLevel(jieba.logging.INFO)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
def getStrong(filename,save):
data = xlrd.open_workbook(filename,'r',encoding_override='utf-8')
temp = []
name = []
tables = data.sheets()
for i in range(len(tables)):
list = data.sheet_by_index(i)
rows = list.nrows
for j in range(rows):
if j== 0:
continue
name.append(list.row_values(j)[2])
temp.append(list.row_values(j)[2])
file = open(f"{save}",'a',encoding='utf-8')
i = 0
for ld in temp:
signature = ld.strip().replace("emoji","").replace("span","").replace("class","")
rec = re.compile("lf\d+\w*|[<>/=]]")
signature = rec.sub("",signature)
if signature != "":
file.write(name[i]+" : " +signature)
s = SnowNLP(signature)
if s.sentiments > 0.5:
file.write(" : 积极!\n")
elif s.sentiments <= 0.5:
file.write(" : 消极!\n")
i+=1
def counter(save):
txt = open(f"{save}", encoding="utf-8").read()
need_words = open("./qingxu.txt", encoding="utf-8").read()
find = need_words.split()
jieba.load_userdict('./qingxu.txt')
words = jieba.lcut(txt)
counts = {}
for word in words:
counts[word] = counts.get(word,0) + 1
lst=[]
for i in range(len(find)):
try :
print(find[i],counts[find[i]])
except:
lst.append(find[i])
a = counts[find[0]]
b = counts[find[1]]
return a,b
getStrong("./软件201学生详细名单.xls","./201-Signature.txt")
getStrong("./软件202学生详细名单.xls","./202-Signature.txt")
a,b=counter("./201-Signature.txt")
c,d=counter("./202-Signature.txt")
x = np.array(['软件201积极', '软件202积极','软件201消极','软件202消极'])
y = np.array([a,c,b,d])
plt.bar(x, y, color =["hotpink","lightblue","hotpink","lightblue"],width=0.5)
plt.title('201与202班签名情感强度分析对比')
plt.show()
3.关系图
import xlrd
from pyecharts import options as opts
from pyecharts.charts import Graph
def getRoom(filename):
Room={}
data=xlrd.open_workbook(filename,'r',encoding_override='utf-8')
table=data.sheets()[0]
rows=table.nrows
for i in range(rows):
if i==0:
continue
Room[table.row_values(i)[3]]=table.row_values(i)[9]
return Room
def RoomSee(Room):
nodes=[]
for i in Room:
nodes.append({"name":i,"symbolSize":5})
links=[]
for i in Room:
for j in Room:
if Room[i]==Room[j]:
links.append({"source":i,"target":j})
print(links)
c= (
Graph()
.add("", nodes, links,
categories=None,
is_focusnode=True,
is_roam=True,
is_rotate_label=True,
layout="force",
edge_length=300,
gravity=0.5,
repulsion=100,
label_opts=opts.LabelOpts(is_show=True),
linestyle_opts=opts.LineStyleOpts(curve=0.2),
)
.render("./关系图.html")
)
RoomSee(getRoom("./软件201学生详细名单.xls"))
4.好友男女比例
from pyecharts.charts import Pie, Bar
from pyecharts import options as opts
import csv
def getSex(filename):
lstsex = []
with open(filename, 'r') as fr:
reader = csv.reader(fr)
for i in reader:
lstsex.append(i[4])
return lstsex
def VisualSexpyechart(lstsex):
sex = dict()
for f in lstsex[1:]:
if f == '1':
sex['man'] = sex.get('man', 0) + 1
elif f == '2':
sex['women'] = sex.get('women', 0) + 1
else:
sex['unknown'] = sex.get('unknown', 0) + 1
total = len(lstsex[1:])
print("男性好友:%.2f%%" % (float(sex['man']) / total * 100) + '\n' + "女性好友:%.2f%%" % (
float(sex['women']) / total * 100) +
"不明性别好友:%.2f%%" % (float(sex['unknown']) / total * 100))
attr = ['男性好友', '女性好友', '不明性别好友']
value = [sex['man'], sex['women'], sex['unknown']]
data_pair = [list(z) for z in zip(attr, value)]
(
Pie(init_opts=opts.InitOpts(bg_color="white"))
.add(
series_name="性别分析",
data_pair=data_pair,
rosetype='',
radius="55%",
center=["50%", "50%"],
label_opts=opts.LabelOpts(is_show=True, position="center"),
)
.set_global_opts(
title_opts=opts.TitleOpts(
title="微信好友性别比例",
pos_left="center",
pos_top="20",
title_textstyle_opts=opts.TextStyleOpts(color="black"),
),
legend_opts=opts.LegendOpts(is_show=True),
)
.set_series_opts(
tooltip_opts=opts.TooltipOpts(
trigger="item", formatter="{a} <br/>{b}: {c} ({d}%)"
),
label_opts=opts.LabelOpts(color="black"),
)
.render('好友性别比例.html')
)
def VisualSexpyechart2(lstsex):
sex = dict()
for f in lstsex[1:]:
if f == '1':
sex['man'] = sex.get('man', 0) + 1
elif f == '2':
sex['women'] = sex.get('women', 0) + 1
else:
sex['unknown'] = sex.get('unknown', 0) + 1
total = len(lstsex[1:])
print(
"男性好友:%.2f%%" % (float(sex['man']) / total * 100) + '\n' + "女性好友:%.2f%%" % (
float(sex['women']) / total * 100) + '\n' +
"不明性别好友:%.2f%%" % (float(sex['unknown']) / total * 100))
attr = ['男性好友', '女性好友', '不明性别好友']
value = [sex['man'], sex['women'], sex['unknown']]
bar = (
Bar()
.add_xaxis(attr)
.add_yaxis("amount", value, color='green')
.set_global_opts(
title_opts=opts.TitleOpts(title='微信数据分析'),
yaxis_opts=opts.AxisOpts(name="amount"),
xaxis_opts=opts.AxisOpts(name="sex")
)
).render('好友性别比例2.html')
lstsex = getSex('./我的微信好友信息.csv')
VisualSexpyechart(lstsex)
VisualSexpyechart2(lstsex)
5.好友省份地图可视化
import csv
from collections import Counter
from pyecharts import options as opts
from pyecharts.charts import Map
def getInfo(filename,index):
lstdata = []
with open(filename,'r') as fr:
reader = csv.reader(fr)
for i in reader:
lstdata.append(i[index])
return lstdata
def province_Map(lstprovince):
lstprovincenew = []
for i in lstprovince:
if i == " ":pass
else: lstprovincenew.append(i)
data = Counter(lstprovincenew).most_common(15)
print(data)
c = (
Map()
.add(
series_name="好友省份",
data_pair=data,
maptype='china',
is_selected=True,
is_roam=True,
is_map_symbol_show=True,
itemstyle_opts={
"normal": {"areaColor": "white", "borderColor": "red"},
"emphasis": {"areaColor": "rgba(0,0,0,1)"}
}
)
.set_global_opts(
title_opts=opts.TitleOpts(
title="省份地图",
subtitle="好友省份数据",
pos_left="left",
pos_top="20",
title_textstyle_opts=opts.TextStyleOpts(color="#404a59",font_size=24)
),
legend_opts=opts.LegendOpts(is_show=True),
visualmap_opts=opts.VisualMapOpts(max_=60, is_piecewise=True)
)
.set_series_opts(
label_opts=opts.LabelOpts(
is_show=True,
color='blue')
)
.render("./好友省份分布.html")
)
lstdata = getInfo("./我的微信好友信息.csv",3)
province_Map(lstdata)
6.词云
from pyecharts import options
from pyecharts.charts import WordCloud
import csv
import re
import jieba
jieba.setLogLevel(jieba.logging.INFO)
def getInfo(filename,index):
lstdata = []
with open(filename,'r') as fr:
reader = csv.reader(fr)
for i in reader:
if i!='':
lstdata.append(i[index])
file = open('./sign.txt','a',encoding='utf-8')
for ld in lstdata:
signature = ld.strip().replace("emoji",'').replace("span",'').replace("class",'')
rec = re.compile("lf\d+\w*|[<>/=]")
signature = rec.sub("",signature)
file.write(signature+"\n")
def create_word_cloud(filename):
text = open("./{}.txt".format(filename),encoding='utf-8').read()
wordlist = jieba.lcut(text,cut_all=True)
counts = {}
words = []
for word in wordlist:
if len(word) == 1:
continue
else:
counts[word] = counts.get(word,0) + 1
for key,value in counts.items():
words.append((key,value))
w = (
WordCloud()
.add("",words)
.set_global_opts(title_opts=options.TitleOpts(title="词云"))
.render('./词云.html')
)
getInfo("./我的微信好友信息.csv",5)
create_word_cloud("sign")
7 .广告好友比例仪表板和水球图
import re
from pyecharts import options as opts
import jieba
import xlrd
from pyecharts.charts import Gauge
from pyecharts.charts import Liquid
jieba.setLogLevel(jieba.logging.INFO)
workbook = xlrd.open_workbook('./coor.xls')
sheet1 = workbook.sheet_by_name('Sheet1')
num_rows1 = sheet1.nrows
sheet2 = workbook.sheet_by_name('Sheet2')
num_rows2 = sheet2.nrows
def lexicon_deall(word,num_rows):
for curr_row in range(num_rows):
cell = sheet2.cell_value(curr_row,0)
if word in cell:
return True
return False
def get_Name():
NickName = []
for i in range(num_rows1-1):
NickName.append(sheet1.cell_value(i+1,2))
return NickName
def create_nickname(NickName):
num_list = []
advertise_list = []
temp = False
cop = re.compile("[^\u4e00-\u9fa5^.^a-z^A-Z^0-9]")
for sid,val in enumerate(NickName):
v1 = val.strip().replace("emoji","").replace("span","").replace("class","")
v2 = cop.sub("",v1)
if re.findall(r"1\d{10}",v2):
num_list.append(NickName[sid])
cut = jieba.cut(v2)
for c in cut:
if lexicon_deall(c,num_rows2) == True:
print(c)
temp = True
if temp == True:
advertise_list.append(NickName[sid])
temp = False
with open("昵称分析.txt",'w',encoding="utf-8") as f:
f.write("以下是昵称中带手机号的好友:\n")
f.write(str(num_list))
f.write("这部分好友有自己的生意,需要大家关注手机号,联系方便\n")
f.write("以下是昵称中带职业的好友:\n")
f.write(str(advertise_list))
f.write("这部分好友有自己的事业\n")
c = (
Gauge()
.add(
series_name = "广告好友指标",
data_pair = [("所占比率", len(advertise_list)/len(NickName)*100)],
radius="100%",
min_=0, max_=200,
title_label_opts=opts.GaugeTitleOpts(
font_size=20, color="black", font_family="Microsoft YaHei"
),
axisline_opts=opts.AxisLineOpts(
linestyle_opts=opts.LineStyleOpts(
color=[(0.3, "#67e0e3"), (0.7, "#37a2da"), (1, "#fd666d")], width=30
)
),
)
.set_global_opts(
title_opts=opts.TitleOpts(
title="广告好友比例"
),
legend_opts=opts.LegendOpts(
is_show=False
),
)
.render("./仪表板广告好友比例.html")
)
c = (
Liquid()
.add("广告好友", [(len(advertise_list)/len(NickName)),(len(advertise_list)/len(NickName)*100)], is_outline_show=False)
.set_global_opts(title_opts=opts.TitleOpts(title="广告好友比例"))
.render("./水球图广告好友比例.html")
)
NickName = get_Name()
create_nickname(NickName)