Python matplotlib 练习题

matplotlib —— 课后练✋

%matplotlib inline
import matplotlib as mpl
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

练习1:航班乘客变化分析

  • 分析年度乘客总量变化情况(折线图)
  • 分析乘客在一年中各月份的分布(柱状图)
data = sns.load_dataset("flights")
data.head()
# 年份,月份,乘客数
yearmonthpassengers
01949January112
11949February118
21949March132
31949April129
41949May121

年度变化

# your code
year_group=data.groupby('year').sum()

fig,ax=plt.subplots()
ax.plot(year_group.index,year_group['passengers'])
ax.set_xlabel('year')
ax.set_ylabel('passengers')
ax.set_title('Annual Variation Trend of Passengers')
<matplotlib.text.Text at 0x7f89cacfaf50>

这里写图片描述

各月份之间的差异

data_1949=data[data['year']==1949]
month_group=data.groupby('month').sum()
month_group['month_num']=range(12)

fig1,ax1=plt.subplots()
ax1.bar(month_group['month_num'],month_group['passengers'],align='center')
ax1.set_xlabel('month')
ax1.set_ylabel('passengers')
ax1.set_xticks(range(12))
month_names=[str[:3] for str in list(month_group.index)]
ax1.set_xticklabels(month_names)
ax1.set_title('Monthly Distribution of Passengers')
<matplotlib.text.Text at 0x7f89cabdad10>

这里写图片描述

练习2:鸢尾花花型尺寸分析

  • 萼片(sepal)和花瓣(petal)的大小关系(散点图)
  • 不同种类(species)鸢尾花萼片和花瓣的大小关系(分类散点子图)
  • 不同种类鸢尾花萼片和花瓣大小的分布情况(柱状图或者箱式图)
data = sns.load_dataset("iris")
data.head()
# 萼片长度,萼片宽度,花瓣长度,花瓣宽度,种类
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
# your code
#尺寸为长乘以宽
data['sepal_size']=data['sepal_length']*data['sepal_width']
data['petal_size']=data['petal_length']*data['petal_width']

花瓣与萼片的关系

fig, ax2_1 = plt.subplots()
ax2_1.scatter(data['sepal_size'],data['petal_size'])

# 添加标题和坐标说明
ax2_1.set_title('Size of Sepal vs Size of Petal')
ax2_1.set_xlabel('size of sepal')
ax2_1.set_ylabel('size of petal')
<matplotlib.text.Text at 0x7f89caa496d0>

这里写图片描述

species=data['species'].unique()
species
array([‘setosa’, ‘versicolor’, ‘virginica’], dtype=object)
data1=data[data['species']==species[0]]
data2=data[data['species']==species[1]]
data3=data[data['species']==species[2]]

不同种类之间萼片与花瓣的关系

fig, ax2_2 = plt.subplots()

ax2_2.scatter(data1['sepal_size'],data1['petal_size'],color = '#ff0000',label=species[0])
ax2_2.scatter(data2['sepal_size'],data2['petal_size'],color = '#00ff00',label =species[1])
ax2_2.scatter(data3['sepal_size'],data3['petal_size'],color = '#0000ff',label=species[2])
ax2_2.legend(loc = 'best')

# 添加标题和坐标说明
ax2_2.set_title('Size of Sepal vs Size of Petal')
ax2_2.set_xlabel('size of sepal')
ax2_2.set_ylabel('size of petal')
<matplotlib.text.Text at 0x7f89ca98b990>

这里写图片描述

不同种类的花瓣与萼片大小

def boxplot(x_data, y_data, base_color, median_color, x_label, y_label, title):
    _, ax = plt.subplots()
    ax.boxplot(y_data
               # 箱子是否颜色填充
               , patch_artist = True
               # 中位数线颜色
               , medianprops = {'color': base_color}
               # 箱子颜色设置,color:边框颜色,facecolor:填充颜色
               , boxprops = {'color': base_color, 'facecolor': median_color}
               # 猫须颜色whisker
               , whiskerprops = {'color': median_color}
               # 猫须界限颜色whisker cap
               , capprops = {'color': base_color})
    # 箱图与x_data保持一致
    ax.set_xticklabels(x_data)
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)
    ax.set_title(title)


bp_data=[data1['sepal_size'],data2['sepal_size'],data3['sepal_size']]

# 调用绘图函数
boxplot(x_data = species
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'Species'
        , y_label = 'Size of Sepal'
        , title = 'Size Distribution of Sepal By Species')

这里写图片描述

bp_data=[data1['petal_size'],data2['petal_size'],data3['petal_size']]

# 调用绘图函数
boxplot(x_data = species
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'Species'
        , y_label = 'Size of Petal'
        , title = 'Size Distribution of Petal By Species')

这里写图片描述

练习3:餐厅小费情况分析

  • 小费和总消费之间的关系(散点图)
  • 男性顾客和女性顾客,谁更慷慨(分类箱式图)
  • 抽烟与否是否会对小费金额产生影响(分类箱式图)
  • 工作日和周末,什么时候顾客给的小费更慷慨(分类箱式图)
  • 午饭和晚饭,哪一顿顾客更愿意给小费(分类箱式图)
  • 就餐人数是否会对慷慨度产生影响(分类箱式图)
  • 性别+抽烟的组合因素对慷慨度的影响(分组柱状图)
data = sns.load_dataset("tips")
data.head()
# 总消费,小费,性别,吸烟与否,就餐星期,就餐时间,就餐人数
total_billtipsexsmokerdaytimesize
016.991.01FemaleNoSunDinner2
110.341.66MaleNoSunDinner3
221.013.50MaleNoSunDinner3
323.683.31MaleNoSunDinner2
424.593.61FemaleNoSunDinner4

小费与总消费的关系

# your code
_, ax3_1 = plt.subplots()
ax3_1.scatter(data['tip'],data['total_bill'])
ax3_1.set_title('Tip vs Total bill')
ax3_1.set_xlabel('total bill')
ax3_1.set_ylabel('tip')
<matplotlib.text.Text at 0x7f89ca689150>

这里写图片描述

男性与女性

sex=data['sex'].unique()
bp_data=[data[data['sex']==sex[0]]['tip'],data[data['sex']==sex[1]]['tip']]

# 调用绘图函数
boxplot(x_data = sex
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'Sex'
        , y_label = 'Tip'
        , title = 'Distribution of Tip By Sex')

这里写图片描述

抽烟与否

smoker=data['smoker'].unique()
bp_data=[data[data['smoker']==smoker[0]]['tip'],data[data['smoker']==smoker[1]]['tip']]

# 调用绘图函数
boxplot(x_data = smoker
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'Smoke or Not'
        , y_label = 'Tip'
        , title = 'Distribution of Tip By Smoker')

这里写图片描述

工作日与周末

day=data['day'].unique()
bp_data=[data[data['day'].isin(day[:2])]['tip'],data[data['day'].isin(day[2:4])]['tip']]

# 调用绘图函数
boxplot(x_data = ['weekend','weekday']
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'Day'
        , y_label = 'Tip'
        , title = 'Distribution of Tip By Day')

这里写图片描述

午餐与晚餐

time=data['time'].unique()
bp_data=[data[data['time']==time[0]]['tip'],data[data['time']==time[1]]['tip']]

# 调用绘图函数
boxplot(x_data = time
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'Time'
        , y_label = 'Tip'
        , title = 'Distribution of Tip By Time')

这里写图片描述

就餐人数

size=data['size'].unique()
bp_data=[]
for i in range(len(size)):
    bp_data.append(data[data['size']==size[i]]['tip'])

# 调用绘图函数
boxplot(x_data = size
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'Size'
        , y_label = 'Tip'
        , title = 'Distribution of Tip By Size')

这里写图片描述

性别+抽烟

tip_by_sex_smoke=data.groupby(['sex','smoker']).mean()['tip']
tip_by_sex_smoke=tip_by_sex_smoke.unstack()
tip_by_sex_smoke
smokerYesNo
sex
Male3.0511673.113402
Female2.9315152.773519
# 绘制分组柱状图的函数
def groupedbarplot(x_data, y_data_list, y_data_names, colors, x_label, y_label,title):
    _, ax = plt.subplots()
    # 设置每一组柱状图的宽度
    total_width = 0.8
    # 设置每一个柱状图的宽度
    ind_width = total_width / len(y_data_list)
    # 计算每一个柱状图的中心偏移
    alteration = np.arange(-total_width/2+ind_width/2, total_width/2+ind_width/2, ind_width)

    # 分别绘制每一个柱状图
    for i in range(0, len(y_data_list)):
        # 横向散开绘制
        ax.bar(x_data + alteration[i], y_data_list[i], color = colors[i], label = y_data_names[i], width = ind_width)
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)
    ax.set_title(title)
    ax.legend(loc = 'upper right')



# 调用绘图函数
groupedbarplot(x_data = range(2)
               , y_data_list = [tip_by_sex_smoke['Yes'],tip_by_sex_smoke['No']]
               , y_data_names = ['Yes', 'No']
               , colors = ['#539caf', '#7663b0']
               , x_label = 'sex'
               , y_label = 'tip'
               ,title = 'Tip By Smoker and Sex')
ax=plt.gca()
ax.set_xticks(range(2))
ax.set_xticklabels(tip_by_sex_smoke.index.values)
[<matplotlib.text.Text at 0x7f89ca39cdd0>,
 <matplotlib.text.Text at 0x7f89ca3a7e90>]

这里写图片描述

练习4:泰坦尼克号海难幸存状况分析

  • 不同仓位等级中幸存和遇难的乘客比例(堆积柱状图)
  • 不同性别的幸存比例(堆积柱状图)
  • 幸存和遇难乘客的票价分布(分类箱式图)
  • 幸存和遇难乘客的年龄分布(分类箱式图)
  • 不同上船港口的乘客仓位等级分布(分组柱状图)
  • 幸存和遇难乘客堂兄弟姐妹的数量分布(分类箱式图)
  • 幸存和遇难乘客父母子女的数量分布(分类箱式图)
  • 单独乘船与否和幸存之间有没有联系(堆积柱状图或者分组柱状图)
data = sns.load_dataset("titanic")
data.head()
# 幸存与否,仓位等级,性别,年龄,堂兄弟姐妹数,父母子女数,票价,上船港口缩写,仓位等级,人员分类,是否成年男性,所在甲板,上船港口,是否幸存,是否单独乘船
survivedpclasssexagesibspparchfareembarkedclasswhoadult_maledeckembark_townalivealone
003male22.0107.2500SThirdmanTrueNaNSouthamptonnoFalse
111female38.01071.2833CFirstwomanFalseCCherbourgyesFalse
213female26.0007.9250SThirdwomanFalseNaNSouthamptonyesTrue
311female35.01053.1000SFirstwomanFalseCSouthamptonyesFalse
403male35.0008.0500SThirdmanTrueNaNSouthamptonnoTrue

不同仓位等级幸存比例

# your code
# 绘制堆积柱状图
def stackedbarplot(x_data, y_data_list, y_data_names, colors, x_label, y_label, title):
    _, ax = plt.subplots()
    # 循环绘制堆积柱状图
    for i in range(0, len(y_data_list)):
        if i == 0:
            ax.bar(x_data, y_data_list[i], color = colors[i], align = 'center', label = y_data_names[i])
        else:
            # 采用堆积的方式,除了第一个分类,后面的分类都从前一个分类的柱状图接着画
            # 用归一化保证最终累积结果为1
            ax.bar(x_data, y_data_list[i], color = colors[i], bottom = y_data_list[i - 1], align = 'center', label = y_data_names[i])
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)
    ax.set_title(title)
    ax.legend(loc = 'upper right') # 设定图例位置
pclass_survived=data.groupby(['pclass','survived']).size().unstack()
pclass_survived
survived01
pclass
180136
29787
3372119
pclass_survived['sum']=pclass_survived[0]+pclass_survived[1]
pclass_survived['yes_prop']=pclass_survived[1]/pclass_survived['sum']
pclass_survived['no_prop']=pclass_survived[0]/pclass_survived['sum']
pclass_survived
survived01sumyes_propno_prop
pclass
1801362160.6296300.370370
297871840.4728260.527174
33721194910.2423630.757637
# 调用绘图函数
stackedbarplot(x_data = pclass_survived.index.values
               , y_data_list = [pclass_survived['yes_prop'], pclass_survived['no_prop']]
               , y_data_names = ['Survived', 'Not survived']
               , colors = ['#539caf', '#7663b0']
               , x_label = 'Pclass'
               , y_label = 'Number of People'
               , title = 'Number of People By Survived Or Not and Pclass')

ax=plt.gca()
ax.set_xticks(range(1,4))
ax.set_xticklabels(pclass_survived.index.values)
[<matplotlib.text.Text at 0x7f89ca2f1890>,
 <matplotlib.text.Text at 0x7f89ca27a410>,
 <matplotlib.text.Text at 0x7f89ca26d6d0>]

这里写图片描述

不同性别幸存比例

sex_survived=data.groupby(['sex','survived']).size().unstack()
sex_survived
survived01
sex
female81233
male468109
sex_survived['sum']=sex_survived[0]+sex_survived[1]
sex_survived['yes_prop']=sex_survived[1]/sex_survived['sum']
sex_survived['no_prop']=sex_survived[0]/sex_survived['sum']
sex_survived
survived01sumyes_propno_prop
sex
female812333140.7420380.257962
male4681095770.1889080.811092
# 调用绘图函数
stackedbarplot(x_data = [0,1]
               , y_data_list = [sex_survived['yes_prop'], sex_survived['no_prop']]
               , y_data_names = ['Survived', 'Not survived']
               , colors = ['#539caf', '#7663b0']
               , x_label = 'Sex'
               , y_label = 'Number of People'
               , title = 'Number of People By Survived Or Not and Sex')
ax=plt.gca()
ax.set_xticks(range(2))
ax.set_xticklabels(sex_survived.index.values)
[<matplotlib.text.Text at 0x7f89ca1c3a10>,
 <matplotlib.text.Text at 0x7f89ca1ce0d0>]

这里写图片描述

幸存or遇难の票价分布

survived=data['survived'].unique()
bp_data=[data[data['survived']==survived[0]]['fare'],data[data['survived']==survived[1]]['fare']]

# 调用绘图函数
boxplot(x_data = survived
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'survived'
        , y_label = 'fare'
        , title = 'Distribution of Fare By Survived')

这里写图片描述

幸存or遇难の年龄分布

data['age'].fillna(0,inplace=True)
survived=data['survived'].unique()
bp_data=[data[data['survived']==survived[0]]['age'],data[data['survived']==survived[1]]['age']]
# 调用绘图函数
boxplot(x_data=survived
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'survived'
        , y_label = 'age'
        , title = 'Distribution of Age By Survived')

这里写图片描述

不同上传港口の仓位等级

embark_pclass=data.groupby(['embark_town','pclass']).size().unstack()
#embark_pclass.fillna(0,inplace=True)
embark_pclass
pclass123
embark_town
Cherbourg851766
Queenstown2372
Southampton127164353
pclass_list=[embark_pclass.iloc[:,0],embark_pclass.iloc[:,1],embark_pclass.iloc[:,2]]
pclass_list
[embark_town Cherbourg 85 Queenstown 2 Southampton 127 Name: 1, dtype: int64, embark_town Cherbourg 17 Queenstown 3 Southampton 164 Name: 2, dtype: int64, embark_town Cherbourg 66 Queenstown 72 Southampton 353 Name: 3, dtype: int64]

# 调用绘图函数
groupedbarplot(x_data = range(3)
               , y_data_list = pclass_list
               , y_data_names = embark_pclass.columns
               , colors = ['#539caf', '#7663b0','#00ff00']
               , x_label = 'embark_town'
               , y_label = 'counts of pclass'
               ,title = 'Counts of Pclass vs Embark Town')

ax=plt.gca()
ax.set_xticks(range(3))
ax.set_xticklabels(embark_pclass.index.values)
[<matplotlib.text.Text at 0x7f89c9f488d0>,
 <matplotlib.text.Text at 0x7f89ca045b10>,
 <matplotlib.text.Text at 0x7f89c9eec150>]

这里写图片描述

幸存or遇难の堂兄弟姐妹数量分布

survived=data['survived'].unique()
bp_data=[data[data['survived']==survived[0]]['sibsp'],data[data['survived']==survived[1]]['sibsp']]
# 调用绘图函数
boxplot(x_data=survived
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'survived'
        , y_label = 'sibsp'
        , title = 'Distribution of Sibsp By Survived')

这里写图片描述

幸存or遇难の父母子女数量分布

survived=data['survived'].unique()
bp_data=[data[data['survived']==survived[0]]['parch'],data[data['survived']==survived[1]]['parch']]
# 调用绘图函数
boxplot(x_data=survived
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'survived'
        , y_label = 'parch'
        , title = 'Distribution of Parch By Survived')

这里写图片描述

单独乘船 vs 幸存

alone_survived=data.groupby(['alone','survived']).size().unstack()
alone_survived
survived01
alone
False175179
True374163
_, ax = plt.subplots()
width=0.4
index=alone_survived.index.values
ax.bar(index, alone_survived[0], color = '#ff0000', label = 'Not survived', width = width)
ax.bar(index+width, alone_survived[1], color = '#00ff00', label = 'Survived', width = width)

ax.set_ylabel('numbers of People')
ax.set_xlabel('alone')
ax.set_title('People Survived vs Alone')
ax.legend(loc = 'upper right')
plt.xticks(index+width,index)
([<matplotlib.axis.XTick at 0x7f89ca2d73d0>,
  <matplotlib.axis.XTick at 0x7f89ca85f4d0>],
 <a list of 2 Text xticklabel objects>)

这里写图片描述

评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值