《青春有你2》选手数据分析
任务描述
基于第二天实践使用Python来爬去百度百科中《青春有你2》所有参赛选手的信息,进行数据可视化分析。
import sys
sys.path.append('/home/aistudio/external-libraries')
# 下载中文字体
!wget https://mydueros.cdn.bcebos.com/font/simhei.ttf
# 将字体文件复制到matplotlib字体路径
!cp simhei.ttf /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/
# 一般只需要将字体文件复制到系统字体目录下即可,但是在aistudio上该路径没有写权限,所以此方法不能用
# !cp simhei.ttf /usr/share/fonts/
# 创建系统字体文件路径
!mkdir .fonts
# 复制文件到该路径
!cp simhei.ttf .fonts/
!rm -rf .cache/matplotlib
绘制选手区域分布柱状图
import matplotlib.pyplot as plt
import numpy as np
import json
import matplotlib.font_manager as font_manager
#显示matplotlib生成的图形
%matplotlib inline
with open('data/data31557/20200422.json', 'r', encoding='UTF-8') as file:
json_array = json.loads(file.read())
#绘制小姐姐区域分布柱状图,x轴为地区,y轴为该区域的小姐姐数量
zones = []
for star in json_array:
zone = star['zone']
zones.append(zone)
print(len(zones))
print(zones)
zone_list = []
count_list = []
for zone in zones:
if zone not in zone_list:
count = zones.count(zone)
zone_list.append(zone)
count_list.append(count)
print(zone_list)
print(count_list)
# 设置显示中文
plt.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体
plt.figure(figsize=(20,15))
plt.bar(range(len(count_list)), count_list,color='r',tick_label=zone_list,facecolor='#9999ff',edgecolor='white')
# 这里是调节横坐标的倾斜度,rotation是度数,以及设置刻度字体大小
plt.xticks(rotation=45,fontsize=20)
plt.yticks(fontsize=20)
!mkdir ./work/result
plt.legend()
plt.title('''《青春有你2》参赛选手''',fontsize = 24)
plt.savefig('/home/aistudio/work/result/bar_result.jpg')
plt.show()
way2
import matplotlib.pyplot as plt
import numpy as np
import json
import matplotlib.font_manager as font_manager
import pandas as pd
#显示matplotlib生成的图形
%matplotlib inline
df = pd.read_json('data/data31557/20200422.json')
#print(df)
grouped=df['name'].groupby(df['zone'])
s = grouped.count()
zone_list = s.index
count_list = s.values
# 设置显示中文
plt.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体
plt.figure(figsize=(20,15))
plt.bar(range(len(count_list)), count_list,color='r',tick_label=zone_list,facecolor='#9999ff',edgecolor='white')
# 这里是调节横坐标的倾斜度,rotation是度数,以及设置刻度字体大小
plt.xticks(rotation=45,fontsize=20)
plt.yticks(fontsize=20)
!mkdir ./work/result
plt.legend()
plt.title('''《青春有你2》参赛选手''',fontsize = 24)
plt.savefig('/home/aistudio/work/result/bar_result02.jpg')
plt.show()
请在下面完成作业,对选手体重分布进行可视化,绘制饼状图
import matplotlib.pyplot as plt
import numpy as np
import json
import matplotlib.font_manager as font_manager
#显示matplotlib生成的图形
%matplotlib inline
with open('data/data31557/20200422.json', 'r', encoding='UTF-8') as file:
json_array = json.loads(file.read())
#绘制小姐姐区域分布柱状图,x轴为地区,y轴为该区域的小姐姐数量
weight_list= []
for star in json_array:
weight = star['weight'].strip('kg')
weight_list.append(weight)
# print(len(weight_list))
# print(weight_list)
weight_label = ['<=45kg', '45~50kg', '50~55kg', '>55kg']
count_list = [0,0,0,0]
for weight in weight_list:
weight=float(weight)
if weight <= 45:
count_list[0]=count_list[0]+1
continue
if weight>45 and weight<=50:
count_list[1]=count_list[1]+1
continue
if weight > 50 and weight <= 55:
count_list[2]=count_list[2]+1
continue
if weight>55:
count_list[3]=count_list[3]+1
continue
print(weight_label)
print(count_list)
# 设置显示中文
plt.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体
plt.figure(figsize=(6,6))
# plt.pie(x=count_list ,labels=weight_label,autopct='%1.1f%%',shadow=False,startangle=150)
explode = (0, 0.1, 0, 0)
plt.pie(x=count_list ,explode=explode, labels=weight_label,autopct='%1.1f%%', shadow=False, startangle=90)
plt.axis('equal')
plt.legend()
plt.title('''《青春有你2》参赛选手''',fontsize = 24)
plt.savefig('/home/aistudio/work/result/pie_result.jpg')
plt.show()
方法2
import matplotlib.pyplot as plt
import numpy as np
import json
import matplotlib.font_manager as font_manager
#显示matplotlib生成的图形
%matplotlib inline
df = pd.read_json('data/data31557/20200422.json')
weights = df['weight']
arrs = weights.values
for i in range(len(arrs)):
arrs[i] = float(arrs[i] [0:-2])
bin = [0,45,50,55,100]
sel = pd.cut(arrs,bin)
#pandas的value_counts()函数可以对series里面的每个值进行计数并且排序
pd.value_counts(sel)
labels = '<= 45kg','45~50kg','50~55kg','>55kg'
sizes = pd.value_counts(sel)
explode = (0.1,0.1,0,0)
fig1,ax1 = plt.subplots()
ax1.pie(sizes,explode = explode, labels = labels, autopct = '%1.1f%%',shadow = True,startangle = 90)
ax1.axis('equal')
plt.legend(loc = "best")
plt.title('''《青春有你2》参赛选手''',fontsize = 24)
plt.savefig('/home/aistudio/work/result/pie_result.jpg')
plt.show()
以上两种方法都可!
写到这吧!感觉状态不好,睡了!
人生苦短,我选Python!