Matplotlib学习_柱状图&条形图

最新推荐文章于 2023-10-07 17:24:01 发布
sinat_15355869
最新推荐文章于 2023-10-07 17:24:01 发布
阅读量2k
点赞数 1
分类专栏： Matplotlib
本文链接：https://blog.csdn.net/sinat_15355869/article/details/79699888
版权
Matplotlib 专栏收录该内容
3 篇文章 0 订阅
订阅专栏
# coding: utf-8

# In[37]:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
get_ipython().magic('matplotlib inline')


# ## 4.1 柱状图
#     1. 主要参数介绍
#     2. 添加x轴标签tick label
#     3. 添加数据标签
#     4. 实例讲解
#     5. 其他柱状图类型

# 主要参数：
# bar(left, height, width=0.8, bottom=None, **kwargs)
#     
#     1.left为和分类数量一致的数值序列，序列里的数值数量决定了柱子的个数，数值大小决定了距离0点的位置
#     2.height为分类变量的数值大小，决定了柱子的高度
#     3.width决定了柱子的宽度，仅代表形状宽度而已
#     4.bottom决定了柱子距离x轴的高度，默认为None，即表示与x轴距离为0
# 

# In[50]:


# 创建一个DataFrame
data = pd.DataFrame({"label":["a", "b", "c", "d", "e"], "value":[30, 20, 15, 25,10]})
# 赋值给X, Y 
x = data["label"]
y = data["value"]
# 创建画布（定义长宽比例与分辨率），与坐标系
fig, ax = plt.subplots(figsize = (8, 6), dpi = 80)
# 填充数据写法一： 不用参数X替换，
#ax.bar(np.arange(len(data["label"])), data["value"], width=0.3) 
# 填充数据写法二
#ax.bar(np.arange(len(x)),y,width=0.3,bottom=[5,0,0,8,0])
# 填充数据写法三
ax.bar(x, y, width=0.9, bottom=[5, 0, 0, 8, 0])
#设置X,Y轴 title
ax.set(xlabel="X axis", ylabel="Y axis", title="Label & Value")


# In[44]:


data


# 函数：len()的用法  它经常搭配range函数使用
# 
# 1：作用：返回字符串、列表、字典、元组等长度
# 
# 2：语法：len(str)
# 
# 3：参数： str：要计算的字符串、列表、字典、元组等
# 
# 4：返回值：字符串、列表、字典、元组等元素的长度
# 
# #计算字符串长度
# >s = "hello good boy doiido" 
#    
# >len(s)
#    
# >21
# 
# #计算列表元素个数
# >l = ['h','e','l','l','o'] 
#     
# >len(l)
#     
# >5
# 
# #计算字典的总长度
# >d = {'num':123,'name':"doiido"}
#     
# >len(d)
#     
# >2
# 
# #计算元组元素个数
# >t = ('G','o','o','d') 
#     
# >len(t)
#     
# >4
#     

# range()函数：
# 
# 它能返回一系列连续增加的整数，它的工作方式类似于分片，可以生成一个列表对象。range函数大多数时常出现在for循环中，在for循环中可做为索引使用。其实它也可以出现在任何需要整数列表的环境中，在python 3.0中range函数是一个迭代器。
# 
# 1）range()函数内只有一个参数，则表示会产生从0开始计数的整数列表：
# >range(4)
# 
# >[0, 1, 2, 3] #python 返回值
# 
# 2）当传入两个参数时，则将第一个参数做为起始位，第二个参数为结束位：
# >range(0,5)
# 
# >[0, 1, 2, 3, 4]
# 
# 3)range()函数内可以填入三个参数，第三个参数是步进值（步进值默认为1）：
# > range(0,10,3)
# 
# >[0, 3, 6, 9]
# 
# 4)range函数的参数和结果也并非一定要是正数或是递增的，好比下面两个例子：
# > range(-4,4)
# 
# >[-4, -3, -2, -1, 0, 1, 2, 3]
# 
# 5)range可以根据给定的次数，重复动作，来看一个range与for循环最简单的例子：
# > x = 'iplaypython'
# 
# > for i in x:
# 
# >...   print i,
# 
# >... i p l a y p y t h o n
# 
# > range(len(x))
# 
# > [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# 
# > len(x)
# 
# > 11
# 
# > for i in range(len(x)):
# 
# >...   print x[i],
# 
# >... i p l a y p y t h o n
# >

# #### numpy.arange  与 range()的区别
# np.arange()官方文档：https://docs.scipy.org/doc/numpy-1.10.1/reference/generated/numpy.arange.html
# 
# CSDN参考：https://blog.csdn.net/lanchunhui/article/details/49493633

# ### 添加X轴标签 

# In[51]:


# bar(left, height, width=0.8, bottom=None, **kwargs)
data = pd.DataFrame({"label":["a", "b", "c", "d", "e"], "value":[30, 20, 15, 25,10]})

x = data["label"]
y = data["value"]

fig, ax = plt.subplots(figsize=(8, 6), dpi = 80)

ax.bar(x, y, width=0.9)
ax.set(xlabel = "X", ylabel="Y", title="This is title")


# ### 添加数据标签

# In[55]:


data = pd.DataFrame({"data":["a", "b", "c", "d", "e", "f"], "value":[1, 2, 3, 4, 5, 6]})
data


# In[62]:


x = data["data"]
y = data["value"]

fig, ax = plt.subplots(figsize=(8, 6), dpi = 80)

ax.bar(x, y, width=0.5)
ax.set_ylim(0,10 )

#添加数据标签在柱子上
for a,b in zip(x,y):
    ax.text(a, b+1, b, ha="center", fontsize=10)


# ### 实例练习 - Titanic 船舱等级与获救人数图表绘制

# In[63]:


import os


# In[65]:


os.chdir('../参考资料_数据集')


# In[69]:


# df = pd.read_csv("../参考资料_数据集/titanic.csv")
# 这里读取失败了，错误：OSError: Initializing from file failed，估计是含有中文，修改一下


# Python 中pandas读取文件Initializing from file failed，
# 
# 解决方案：
# 文件读取失败： https://blog.csdn.net/qq1483661204/article/details/78524714

# In[97]:


#记住如何正确的打开文件
f = open("../参考资料_数据集/titanic.csv") #存在中文路径，先把这个路径赋值给一个变量，然后再调用pd.read_csv(f)
df = pd.read_csv(f)
df.head()


# In[79]:


# 我想知道在泰坦尼克号事故中不同等级舱位的获救人数


# 提前了解一些常识: 
# 
# python中的numpy包中：np.random.randint()/ np.unque()
# 
# CSDN资料numpy.unique:https://blog.csdn.net/liulina603/article/details/78782990/
# 
# 
# python中的set()：http://www.runoob.com/python/python-func-set.html
# 
# numpy.random.randint():https://blog.csdn.net/zenghaitao0128/article/details/78556535
# 
# Pandas分组统计函数：groupby:https://blog.csdn.net/elecjack/article/details/50760736
# https://blog.csdn.net/youngbit007/article/details/54288603

# In[93]:


#numpy.random.randint(low, high=None, size=None, dtype=’l’) 
#low—–为最小值 ,high—-为最大值 ,size—–为数组维度大小 ,dtype—为数据类型，默认的数据类型是np.int。 
#返回随机整数或整型数组，范围区间为[low,high），包含low，不包含high； 
#high没有填写时，默认生成随机数的范围是[0，low）
a = np.random.randint(0, 6, 8) #生成随机
a


# In[94]:


c = np.unique(a) #unique()函数返回参数数组中所有不同的值，并按照从小到大排序
c


# In[95]:


d = set(a)
d


# In[96]:


#return_index: True 表示unique()后的【新数据每个元素】在原始数组中对应的下标；
#return_inverse :True 表示用重建后（新的）的数组中各元素对应的下标 表达【原始数组或列表中元素序列】；
e = np.unique(a, return_index=True, return_inverse=True)
e


# In[98]:


#好了，说了那么多废话，现在开始处理数据
#问题是：要寻找不同舱位与获救人数的关系，画图
#想一下：X轴应该为每种舱位的名称：比如1等舱，2等舱，3等舱（注意不能重复，直接调用df["Pclass"]会把舱位重复画出）
#想一下：Y轴应该是每种舱位存活人数的总数：使用groupby["xx"].sum()计算,返回series


# In[103]:


y = df.groupby("Pclass")["Survived"].sum()
type(y)


# In[110]:


#这里难点主要是，定义正确的X与Y轴
x = np.unique(df["Pclass"])
y = df.groupby("Pclass")["Survived"].sum()

#接下来是套路化的绘图
#定义画布，坐标轴
fig, ax = plt.subplots(figsize = (8,6), dpi = 80)
#填充数据
ax.bar(x, y, width=0.5, tick_label=x)
#设置X,Y,title
ax.set(xlabel = "Pclass", ylabel = "Survived Person", title = "The number of rescued on Titanic")
#可以设置y值的范围，通过set_ylim
ax.set_ylim(0, 160)

#把数值写在柱子上方
for a,b in zip(np.arange(len(x)), y): #这个函数要搞一下，我的竖直歪了
    ax.text(a, b+3, b, ha = "center", fontsize=10)


# 结论：从图中我们可以知道1等舱的获救人数最多，3等舱次之，2等舱获救人数最少

# In[113]:


#也可以采用pandas绘图，居然还带彩色！！(๑ŐдŐ)b
df.groupby("Pclass")["Survived"].sum().plot(kind="bar",title="The number of rescued on Titanic",figsize=(7,5),rot=0)


# ### 了解一下pandas绘图
# 

# In[114]:


df1 = pd.DataFrame({"class":["a", "b", "c"], "score":[80, 90, 100]})
df1


# In[116]:


df1.plot(x="class", y="score", kind="bar", rot=0, title="Class&Score")


# In[120]:


# 如果想知道不同等级舱位中男女获救的比例 - pandas方法
df.groupby(["Pclass", "Sex"])["Survived"].sum().plot(kind="bar",rot=0,figsize=(8,5),title="The number of rescued on Titanic")


# In[124]:


# 如果想知道不同等级舱位中男女获救的比例 - matplotlib方法 ??? 如何选中满足Pclass条件中的Sex
x = np.unique(df["Pclass"])


# ### pandas绘图+matplotlib面向对象绘图

# In[126]:


fig, ax=plt.subplots(figsize=(8,6), dpi=80)

#数据分类，汇总
df_ps = df.groupby(["Pclass", "Sex"])["Survived"].sum()

#用pandas绘图，画出分类汇总后的数据，并与ax对象关联起来
df_ps.plot(kind="bar",rot=0,figsize=(8,5),ax=ax,edgecolor="w")
ax.set_title("The number of rescued on Titanic", fontsize=14)

#添加数据标签
for a,b in zip(np.arange(len(df_ps.values)),df_ps.values):
    ax.text(a,b+1,b,ha="center")

#添加一条平均线，表示每个性别每个舱位的平均获救人数
avg = df["Survived"].sum()/6
ax.axhline(y=avg, color=(210/255, 199/255, 180/255), linestyle = "--")
ax.text(5, avg+5, "average is :" + str(int(avg)))

#去除左右上的边框线，去除掉y轴刻度
ax.spines["left"].set_color("none") 
ax.spines["right"].set_color("none")
ax.spines["top"].set_color("none")
ax.set_yticks([])


# ### 堆积柱状图与并列柱状图

# In[127]:


#显示中文字体为SimHei
plt.rcParams['font.sans-serif']=['SimHei']

sale8 = [5,20,15,25,10]
sale9 = [10,15,25,30,5]
labels = ["{}号衣服".format(i) for i in range(1,6)]

fig,ax = plt.subplots(figsize=(8,5),dpi=80)
width_1 = 0.4

ax.bar(np.arange(len(sale8)),sale8,width=width_1,tick_label=labels,label = "8月")
ax.bar(np.arange(len(sale9))+width_1,sale9,width=width_1,tick_label=labels,label="9月")
ax.legend()


# In[128]:


#显示中文字体为SimHei
plt.rcParams["font.sans-serif"]=["SimHei"]

sale8 = [10,20,30,15,18]
sale9 = [10,12,24,32,8]
labels = ["{}号衣服".format(i) for i in range(1,6)]

fig,ax = plt.subplots(figsize=(8,5),dpi=80)

ax.bar(np.arange(len(sale8)),sale8,tick_label=labels,label="8月")
ax.bar(np.arange(len(sale9)),sale9,bottom=sale8,tick_label=labels,label="9月")
ax.legend()


# ## 5.1 条形图

# In[2]:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
get_ipython().magic('matplotlib inline')


# In[3]:


#查看风格
plt.style.available


# In[4]:


# 使用其中一种风格
plt.style.use("seaborn-bright")


# ### 1.一般条形图
# 
# 条形图一般用于表示分类变量的数值大小，或者各类别频数的多少，条形图一般用于数量较多的分类比较，而柱状图一般用于数量较少的分类比较
# 
# barh(bottom, width, height=0.8, left=None, **kwargs)

# In[6]:


# 做一个top10票价与对应人名的条形图
# 导入数据
import os


# In[8]:


os.chdir('../Matplotlib/Matplotlib学习/参考资料_数据集')


# In[9]:


g = open("../参考资料_数据集/titanic.csv")
df2 = pd.read_csv(g)
df2.head()


# DataFrame中sort_values:https://blog.csdn.net/flyfrommath/article/details/77225733
# 
# pandas.DataFrame.sort_values官方说明：https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.sort_values.html

# In[13]:


#解释一下：
df2_top10 = df2.groupby("Name").sum().sort_values(by="Fare", ascending=False)[:10].reset_index()[["Name", "Fare"]]
df2_top10


# In[16]:


#y轴数据
labels_length = np.arange(len(df2_top10["Name"]))
#x轴数据，即表示数据条的长度
values = df2_top10["Fare"].round(1).sort_values()
#y轴刻度标签
labels = df2_top10["Name"]

#创建figure和axes对象
fig,ax = plt.subplots(figsize=(8,5),dpi=80)

#在子图对象上画条形图，并添加x轴标签，图形的主标题
ax.barh(labels_length,values,tick_label=labels)
ax.set_xlabel("Fare")
ax.set_title("top10 of Fare")

#添加数据标签
for a,b in zip(labels_length,values):
    ax.text(b+21,a,b,ha="center")


# 旋风图（正负条形图）

# In[18]:


#显示中文字体为SimHei
plt.rcParams["font.sans-serif"]=["SimHei"]
#用来正常显示负号
plt.rcParams['axes.unicode_minus']=False 

#数据源
df_grouped1 = df2.groupby(["Sex","Pclass"])["Survived"].sum()
female = df_grouped1.loc["female"]
male = df_grouped1.loc["male"]
labels = df_grouped1.loc["male"].index

#创建figure和axes对象
fig,ax = plt.subplots(figsize=(8,5),dpi=80)

#在子图对象上画条形图
ax.barh(np.arange(len(female)),female,label="female",height=0.5)
ax.barh(np.arange(len(male)),-male,label="male",height=0.5,tick_label=labels)

#添加x轴标签，y轴标签，标题，修改x轴范围，显示图例
ax.set(xlabel="获救人数",ylabel="等级舱",title="不同舱位的男女获救人数")
ax.set_xlim(-100,100)
ax.legend()

#添加数据标签
for a,b in zip(female,np.arange(len(female))):
    ax.text(a+4,b,a,ha="center",fontsize=10)
    
for a,b in zip(male,np.arange(len(male))):
    ax.text(-a-4,b,a,ha="center",fontsize=10)