任务描述:
基于第二天实践使用Python来爬去百度百科中《青春有你2》所有参赛选手的信息,进行数据可视化分析。
# # 如果需要进行持久化安装, 需要使用持久化路径, 如下方代码示例:
# !mkdir /home/aistudio/external-libraries
# !pip install matplotlib -t /home/aistudio/external-libraries
# 同时添加如下代码, 这样每次环境(kernel)启动的时候只要运行下方代码即可:
import sys
sys.path.append('/home/aistudio/external-libraries')
# # 下载中文字体
# !wget https://mydueros.cdn.bcebos.com/font/simhei.ttf
# # 将字体文件复制到matplotlib字体路径
# !cp simhei.ttf /opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/matplotlib/mpl-data/fonts/ttf/
# # 一般只需要将字体文件复制到系统字体目录下即可,但是在aistudio上该路径没有写权限,所以此方法不能用
# # !cp simhei.ttf /usr/share/fonts/
# # 创建系统字体文件路径
# !mkdir .fonts
# # 复制文件到该路径
# !cp simhei.ttf .fonts/
# !rm -rf .cache/matplotlib
绘制选手区域分布柱状图
import matplotlib.pyplot as plt
import numpy as np
import json
import matplotlib.font_manager as font_manager
#显示matplotlib生成的图形
%matplotlib inline
with open('data/data31557/20200422.json', 'r', encoding='UTF-8') as file:
json_array = json.loads(file.read())
#绘制小姐姐区域分布柱状图,x轴为地区,y轴为该区域的小姐姐数量
zones = []
for star in json_array:
zone = star['zone']
zones.append(zone)
# print(len(zones))
# print(zones)
zone_list = []
count_list = []
for zone in zones:
if zone not in zone_list:
count = zones.count(zone)
zone_list.append(zone)
count_list.append(count)
# print(zone_list)
# print(count_list)
# 设置图表的大小为10*10
plt.figure(figsize=(10,10))
# 设置y轴刻度范围为
plt.bar(range(len(count_list)), count_list,color='r',tick_label=zone_list,facecolor='#9999ff',edgecolor='white')
# 这里是调节横坐标的倾斜度,rotation是度数,以及设置刻度字体大小
plt.xticks(rotation=45,fontsize=10)
plt.yticks(fontsize=20)
plt.legend()
myfont = font_manager.FontProperties(fname='/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc')
plt.title('《青春有你2》参赛选手',FontProperties=myfont)
plt.savefig('work/result/bar_result.jpg')
plt.show()
请在下面完成作业,对选手体重分布进行可视化,绘制饼状图
#绘制小姐姐区域分布柱状图,x轴为地区,y轴为该区域的小姐姐数量
weights = []
for star in json_array:
weight = star['weight']
weights.append(weight)
# print(len(weights))
# print(weights)
weight_list = ['<=45kg','45~50kg','50~55kg','>55kg']
count_list = [0,0,0,0]
for weight in weights:
if int(weight[:2]) <= 45:
count_list[0]+=1
elif int(weight[:2]) <= 50:
count_list[1]+=1
elif int(weight[:2]) <= 55:
count_list[2]+=1
else:
count_list[3]+=1
# print(weight_list)
# print(count_list)
plt.pie(count_list,labels=weight_list,autopct='%1.1f%%',shadow=False,startangle=90)
plt.axis('equal')
plt.title('选手体重分布情况图')
plt.show()