数据分析之matplotlib
绘制折线图
需求1
假设一天中每隔两个小时(range(2, 25, 2))的气温©分别是[15, 13, 14.5, 17, 20, 25, 26, 26, 27, 22, 18, 15]
import matplotlib.pyplot as plt
x = range(2,26,2)
y = [15,13,14.5,17,20,25,26,26,27,22,18,15]
# 绘图
plt.plot(x,y)
# 保存
plt.savefig("./t1.png")
# 展示图形
plt.show()
运行效果
注意:先保存图片在展示图片
升级版
import matplotlib.pyplot as plt
x = range(2,26,2)
y = [15,13,14.5,17,20,25,26,26,27,22,18,15]
# 设置图片大小
plt.figure(figsize=(20,8),dpi=80)
# 绘图
plt.plot(x,y)
# 设置x轴的刻度
li = [i/2 for i in range(4,49)]
#plt.xticks(li)
plt.xticks(li[::2]) # 当刻度太密集时 可以使用列表的步长
#plt.xticks(range(2,25))
#plt.xticks(x)
# 设置y轴的刻度
plt.yticks(range(min(y),max(y)+1))
# 保存
plt.savefig("./t1.png")
# 展示图形
plt.show()
运行效果
需求2
列表a表示10点到12点的每一分钟的气温,如何绘制折线图观察每分钟气温的变化情况?
a= [random.randint(20,35) for i in range(120)]
import matplotlib.pyplot as plt
import random
x = range(0,120)
y= [random.randint(20,35) for i in range(120)]
plt.plot(x,y)
plt.show()
运行效果
横坐标用字符串表示
import matplotlib.pyplot as plt
import random
x = range(0,120)
y= [random.randint(20,35) for i in range(120)]
plt.figure(figsize=(20,8),dpi=60)
plt.plot(x,y)
_x = list(x)[::10]
_xticks_tables = ['hello{}'.format(i) for i in _x]
# 将数字与字符串一一对应
plt.xticks(_x,_xticks_tables)
plt.show()
import matplotlib.pyplot as plt
import random
x = range(0,120)
y= [random.randint(20,35) for i in range(120)]
plt.figure(figsize=(20,8),dpi=60)
plt.plot(x,y)
_xticks_tables = ['10点{}分'.format(i) for i in range(60)]
_xticks_tables+= ['11点{}分'.format(i) for i in range(60)]
# 取步长 将数字与字符串一一对应 数字和字符串的长度相同 rotation旋转的度数
plt.xticks(list(x)[::5],_xticks_tables[::5],rotation=30)
plt.yticks(range(min(y), max(y)+1))
plt.show()
注意 此时中文有乱码
设置中文显示
import matplotlib.pyplot as plt
import random
from matplotlib import font_manager
# 设置字体 微软雅黑
my_font = font_manager.FontProperties(fname="C:/Windows/Fonts/msyh.ttf")
x = range(0,120)
y= [random.randint(20,35) for i in range(120)]
plt.figure(figsize=(20,8),dpi=100)
plt.plot(x,y)
_xticks_tables = ['10点{}分'.format(i) for i in range(60)]
_xticks_tables+= ['11点{}分'.format(i) for i in range(60)]
# 取步长 将数字与字符串一一对应 数字和字符串的长度相同 rotation旋转的度数
plt.xticks(list(x)[::5],_xticks_tables[::5],rotation=30,fontproperties=my_font)
plt.yticks(range(min(y), max(y)+1))
plt.show()
运行效果
添加图形描述信息
import matplotlib.pyplot as plt
import random
from matplotlib import font_manager
my_font = font_manager.FontProperties(fname="C:/Windows/Fonts/msyh.ttf")
#my_font = font_manager.FontProperties(fname = "C:\Windows\Fonts\msyh.ttf")
x = range(0,120)
y= [random.randint(20,35) for i in range(120)]
plt.figure(figsize=(20,8),dpi=100)
plt.plot(x,y)
_xticks_tables = ['10点{}分'.format(i) for i in range(60)]
_xticks_tables+= ['11点{}分'.format(i) for i in range(60)]
# 取步长 将数字与字符串一一对应 数字和字符串的长度相同 rotation旋转的度数
plt.xticks(list(x)[::5],_xticks_tables[::5],rotation=30,fontproperties=my_font)
plt.yticks(range(min(y), max(y)+1))
plt.xlabel("时间",fontproperties=my_font)
plt.ylabel("温度 单位('C)",fontproperties=my_font)
plt.title("10点到12点每分钟的气温变化情况",fontproperties=my_font)
plt.show()
运行效果
需求3
假设大家在30岁的时候,根据自己的实际情况统计出来从11岁到30岁每年交的女(男)朋友的数量如a,请绘制出该数据的折线图,以便分析每年交女(男)朋友的数量走势
a = [1,0,1,1,2,5,3,2,3,4,4,5,6,5,4,3,3,1,1,1]
要求:
y轴表示个数
x轴表示岁数,比如11岁,12岁
import matplotlib.pyplot as plt
from matplotlib import font_manager
my_font = font_manager.FontProperties(fname="C:/Windows/Fonts/msyh.ttf")
x = range(11,31)
y= [1,0,1,1,2,5,3,2,3,4,4,5,6,5,4,3,3,1,1,1]
plt.figure(figsize=(10,5),dpi=100)
plt.plot(x,y)
_xticks_tables = ['{}岁'.format(i) for i in x]
# 取步长 将数字与字符串一一对应 数字和字符串的长度相同 rotation旋转的度数
plt.xticks(list(x),_xticks_tables,fontproperties=my_font)
plt.xlabel("岁数",fontproperties=my_font)
plt.ylabel("男女朋友个数",fontproperties=my_font)
plt.title("每年交女(男)朋友的数量走势",fontproperties=my_font)
plt.show()
运行效果
绘制网格 添加图例
import matplotlib.pyplot as plt
from matplotlib import font_manager
# 设置中文的显示字体 微软雅黑
my_font = font_manager.FontProperties(fname="C:/Windows/Fonts/msyh.ttf")
x = range(11,31)
y1= [1,0,1,1,2,5,3,2,3,4,4,5,6,5,4,3,3,1,1,1]
y2 = [1, 0, 3, 1, 2, 2, 3, 3, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]
# 设置图片大小和像素
plt.figure(figsize=(10,5),dpi=100)
# 绘图
plt.plot(x,y1,label='自己',color='red',linestyle='-.',linewidth=2)
plt.plot(x,y2,label='同桌',color='green',linestyle=':',linewidth=4)
# 设置x轴刻度
_xticks_tables = ['{}岁'.format(i) for i in x]
plt.xticks(list(x),_xticks_tables,fontproperties=my_font)
#添加描述信息
plt.xlabel("岁数",fontproperties=my_font)
plt.ylabel("男女朋友个数",fontproperties=my_font)
plt.title("每年交女(男)朋友的数量走势",fontproperties=my_font)
#绘制网格 ,透明度0.4
plt.grid(alpha=0.4)
# 添加图例 设置图例位置
plt.legend(prop=my_font,loc='upper left')
# 显示
plt.show()
运行效果
总结
绘制散点图
需求1
假设通过爬虫你获取到了某地3月10月的每天白天的最高气温(分别位于列表a,b)那么此时如何寻找出气温和随时间(天)变化的某种规律?
a = [11,17,16,11,12,6,6,7,8,9,8,12,15,14,17,18,21,15,17,20,14,15,15,15,19,21,22,22,22,23,20]
b = [26,26,28,19,21,20,19,17,16,19,18,20,20,19,22,23,17,20,22,15,11,15,5,13,17,10,11,13,12,13,6]
import matplotlib.pyplot as plt
from matplotlib import font_manager
# 设置中文的显示字体 微软雅黑
my_font = font_manager.FontProperties(fname="C:/Windows/Fonts/msyh.ttf")
x_3 = range(1,32)
x_10 = range(41,72)
y_3 = [11,17,16,11,12,6,6,7,8,9,8,12,15,14,17,18,21,15,17,20,14,15,15,15,19,21,22,22,22,23,20]
y_10 = [26,26,28,19,21,20,19,17,16,19,18,20,20,19,22,23,17,20,22,15,11,15,5,13,17,10,11,13,12,13,6]
# 设置图片大小和像素
plt.figure(figsize=(10,5),dpi=100)
# 绘图
plt.scatter(x_3,y_3,label='3月')
plt.scatter(x_10,y_10,label='10月')
_x = list(x_3)+list(x_10)
# 设置xy轴刻度
_xticks_tables = ['3月{}日'.format(i) for i in x_3]
_xticks_tables += ['10月{}日'.format(i-40) for i in x_10]
plt.xticks(_x[::3],_xticks_tables[::3],rotation=45,fontproperties=my_font)
#添加描述信息
plt.xlabel("时间",fontproperties=my_font)
plt.ylabel("温度",fontproperties=my_font)
plt.title("气温和随时间(天)变化的某种规律",fontproperties=my_font)
# 添加图例 设置图例位置
plt.legend(prop=my_font,loc='upper left')
# 显示
plt.show()
运行效果
绘制条形图
需求1
假设你获取了2018年国内地电影票房前20的电影(列表a)和电影票房数据(列表b)哪么如何更加直观的展示该数据
a =["战狼2","红海行动","美人鱼","唐人街神探2","我不是药神","速度与激情8","西虹市首富","速度与激情7","捉妖记","复仇者联盟3:无限战争","捉妖记2","羞羞的铁拳","变形金刚4:绝迹求生","前任3:再见前任","功夫瑜伽","侏罗纪世界2"]
b = [56.32,36.22,33.9,33.71,30.75,26.46,25.25,24.26,24.21,23.7,22.19,21.9,19.79,19.26,17.53,16.79]
from matplotlib import pyplot as plt
from matplotlib import font_manager
a = ["战狼2","红海行动","美人鱼","唐人街神探2","我不是药神","速度与激情8","西虹市首富","速度与激情7","捉妖记","复仇者联盟3:无限战争","捉妖记2","羞羞的铁拳","变形金刚4:绝迹求生","前任3:再见前任","功夫瑜伽","侏罗纪世界2"]
b = [56.32,36.22,33.9,33.71,30.75,26.46,25.25,24.26,24.21,23.7,22.19,21.9,19.79,19.26,17.53,16.79]
# 绘制条形图
plt.bar(range(len(a)), b) # 传入数字
# 显示图片
plt.show()
运行效果
from matplotlib import pyplot as plt
from matplotlib import font_manager
my_font = font_manager.FontProperties(fname="C:/Windows/Fonts/msyh.ttf")
a = ["战狼2","红海行动","美人鱼","唐人街神探2","我不是药神","速度与激情8","西虹市首富","速度与激情7","捉妖记","复仇者联盟3:无限战争","捉妖记2","羞羞的铁拳","变形金刚4:绝迹求生","前任3:再见前任","功夫瑜伽","侏罗纪世界2"]
b = [56.32,36.22,33.9,33.71,30.75,26.46,25.25,24.26,24.21,23.7,22.19,21.9,19.79,19.26,17.53,16.79]
plt.figure(figsize=(20,8),dpi=80)
# 绘制条形图
plt.bar(range(len(a)), b,width=0.3) # [0,1,2,3,4,...,18,19]
plt.xticks(range(len(a)),a,rotation=45,fontproperties=my_font) # 将数字与字符串一一对应
# 显示图片
plt.show()
运行效果
绘制横版条形图
from matplotlib import pyplot as plt
from matplotlib import font_manager
my_font = font_manager.FontProperties(fname="C:/Windows/Fonts/msyh.ttf")
a = ["战狼2","红海行动","美人鱼","唐人街神探2","我不是药神","速度与激情8","西虹市首富","速度与激情7","捉妖记","复仇者联盟3:无限战争","捉妖记2","羞羞的铁拳","变形金刚4:绝迹求生","前任3:再见前任","功夫瑜伽","侏罗纪世界2"]
b = [56.32,36.22,33.9,33.71,30.75,26.46,25.25,24.26,24.21,23.7,22.19,21.9,19.79,19.26,17.53,16.79]
plt.figure(figsize=(20,8),dpi=80)
# 绘制条形图 参数为数字
plt.barh(range(len(a)), b,height=0.3)
plt.yticks(range(len(a)),a,fontproperties=my_font) # 将数字与字符串一一对应
#添加描述信息
plt.xlabel("票房数据",fontproperties=my_font)
plt.ylabel("电影",fontproperties=my_font)
plt.title("2018年国内地电影票房前20的电影和电影票房数据",fontproperties=my_font)
plt.grid(alpha=0.3)
# 显示图片
plt.show()
运行效果
需求2
假设列表a中电影分别在2017-09-14(b_14),2017-09-15(b_15),2017-09-16(b_16)三天的票房,为展示列表中电影本身的票房以及同其他电影的数据对比情况,应该如何直观呈现该数据?
a = [“星球崛起3:终极之战”,“敦刻尔克”,“英雄归来”,“战狼2”]
b_16 =[15746,312,4497,319]
b_15 =[12357,156,2045,168]
b_14 =[2358,399,2358,362]
from matplotlib import pyplot as plt
from matplotlib import font_manager
my_font = font_manager.FontProperties(fname="C:\Windows\Fonts\msyh.ttf")
a = ["星球崛起3:终极之战","敦刻尔克","英雄归来","战狼2"]
b_16 =[15746, 312, 4497, 319]
b_15 =[12357, 156, 2045, 168]
b_14 =[2358, 399, 2358, 362]
# 设置图片大小
plt.figure(figsize=(20, 8), dpi=80)
plt.bar(range(len(a)), b_16,width=0.3)
# 显示图片
plt.show()
运行效果
from matplotlib import pyplot as plt
from matplotlib import font_manager
my_font = font_manager.FontProperties(fname="C:\Windows\Fonts\msyh.ttf")
a = ["星球崛起3:终极之战","敦刻尔克","英雄归来","战狼2"]
b_16 =[15746, 312, 4497, 319]
b_15 =[12357, 156, 2045, 168]
b_14 =[2358, 399, 2358, 362]
# 设置图片大小
plt.figure(figsize=(20, 8), dpi=80)
bar_width=0.2
x_14 = list(range(len(a))) #[0, 1, 2, 3]
x_15 = [i+bar_width for i in x_14] #[0.2, 1.2, 2.2, 3.2]
x_16 = [i+bar_width*2 for i in x_14] #[0.4, 1.4, 2.4, 3.4]
plt.bar(x_14, b_14,width=bar_width)
plt.bar(x_15, b_15,width=bar_width)
plt.bar(x_16, b_16,width=bar_width)
# 显示图片
plt.show()
运行效果
from matplotlib import pyplot as plt
from matplotlib import font_manager
my_font = font_manager.FontProperties(fname="C:\Windows\Fonts\msyh.ttf")
a = ["星球崛起3:终极之战","敦刻尔克","英雄归来","战狼2"]
b_16 =[15746, 312, 4497, 319]
b_15 =[12357, 156, 2045, 168]
b_14 =[2358, 399, 2358, 362]
# 设置图片大小
plt.figure(figsize=(20, 8), dpi=80)
bar_width=0.2
x_14 = list(range(len(a))) #[0, 1, 2, 3]
x_15 = [i+bar_width for i in x_14] #[0.2, 1.2, 2.2, 3.2]
x_16 = [i+bar_width*2 for i in x_14] #[0.4, 1.4, 2.4, 3.4]
plt.bar(x_14, b_14,width=bar_width,label='2017-09-14')
plt.bar(x_15, b_15,width=bar_width,label='2017-09-15')
plt.bar(x_16, b_16,width=bar_width,label='2017-09-16')
# 设置x轴刻度
plt.xticks(x_15,a,fontproperties=my_font) # 将数字与电影名称一一对应
#添加描述信息
plt.xlabel("电影",fontproperties=my_font)
plt.ylabel("票房数据",fontproperties=my_font)
plt.title("电影3天分析图",fontproperties=my_font)
# 添加图例 设置图例位置
plt.legend(prop=my_font,loc='upper left')
# 显示图片
plt.show()
运行效果
绘制直方图
直方图,形状类似柱状图却有着与柱状图完全不同的含义。直方图牵涉统计学的概念,首先要对数据进行分组,然后统计每个分组内数据元的数量。 在坐标系中,横轴标出每个组的端点,纵轴表示频数,每个矩形的高代表对应的频数,称这样的统计图为频数分布直方图。
相关概念:
- 组数:在统计数据时,我们把数据按照不同的范围分成几个组,分成的组的个数称为组数
- 组距:每一组两个端点的差
把数据分为多少组进行统计??
组数要适当,太少会有较大的统计误差,太多规律不明显
组数:将数据分组,当数据在100个以内时,按数据多少常分5-12组。
组距:指每个小组的两个端点的距离。
组数 = 极差/组距 = (max(a)-min(a))/ bin_width
需求1
假设你获取了250部电影的时长a列表,希望从这些电影时长的分布状态(比如时长100分钟到120分钟电影的数量出现的频率)等信息你应该如何分析呈现的数据?
a=[131,99,126,129,142,120,113,90,94,135,131,129,136,129,102,120,103,90,114,135,121,119,136,119,112,120,113,100,94,115,101,99,126,129,142,120,133,90,94,135,161,99,126,129,142,120,143,90,94,135,141,99,126,129,162,120,113,90,94,135,101,99,126,129,142,120,112,90,94,135,130,99,126,129,142,140,113,90,94,135,136,99,126,129,162,120,113,90,94,135,134,99,126,129,142,120,113,120,94,135,135,99,126,129,142,120,133,90,94,135,136,99,126,129,142,124,113,90,94,135,137,99,126,129,142,120,113,95,135,111,138,99,126,129,142,126,113,90,94,135,139,99,126,129,142,128,113,96,94,135,131,99,126,129,142,129,113,90,94,135,133,99,126,129,142,120,113,90,94,135,121,99,126,129,142,130,113,92,94,135,131,99,126,129,142,120,113,90,94,135,141,99,126,129,142,120,113,90,94,135,151,99,126,129,142,120,113,90,94,135,131,99,126,129,142,140,113,90,94,135,131,99,126,129,142,120,113,91,94,135,131,99,126,129,142,120,113,90,94,135,131,161,99,126,129,142,120,113,90,111]
from matplotlib import pyplot as plt
a = [131,99,126,129,142,120,113,90,94,135,131,129,136,129,102,120,103,90,114,135,121,119,136,119,112,120,113,100,94,115,101,99,126,129,142,120,133,90,94,135,161,99,126,129,142,120,143,90,94,135,141,99,126,129,162,120,113,90,94,135,101,99,126,129,142,120,112,90,94,135,130,99,126,129,142,140,113,90,94,135,136,99,126,129,162,120,113,90,94,135,134,99,126,129,142,120,113,120,94,135,135,99,126,129,142,120,133,90,94,135,136,99,126,129,142,124,113,90,94,135,137,99,126,129,142,120,113,95,135,111,138,99,126,129,142,126,113,90,94,135,139,99,126,129,142,128,113,96,94,135,131,99,126,129,142,129,113,90,94,135,133,99,126,129,142,120,113,90,94,135,121,99,126,129,142,130,113,92,94,135,131,99,126,129,142,120,113,90,94,135,141,99,126,129,142,120,113,90,94,135,151,99,126,129,142,120,113,90,94,135,131,99,126,129,142,140,113,90,94,135,131,99,126,129,142,120,113,91,94,135,131,99,126,129,142,120,113,90,94,135,131,161,99,126,129,142,120,113,90,111]
plt.hist(a, 20) # 20组
plt.show()
运行效果
from matplotlib import pyplot as plt
a = [131,99,126,129,142,120,113,90,94,135,131,129,136,129,102,120,103,90,114,135,121,119,136,119,112,120,113,100,94,115,101,99,126,129,142,120,133,90,94,135,161,99,126,129,142,120,143,90,94,135,141,99,126,129,162,120,113,90,94,135,101,99,126,129,142,120,112,90,94,135,130,99,126,129,142,140,113,90,94,135,136,99,126,129,162,120,113,90,94,135,134,99,126,129,142,120,113,120,94,135,135,99,126,129,142,120,133,90,94,135,136,99,126,129,142,124,113,90,94,135,137,99,126,129,142,120,113,95,135,111,138,99,126,129,142,126,113,90,94,135,139,99,126,129,142,128,113,96,94,135,131,99,126,129,142,129,113,90,94,135,133,99,126,129,142,120,113,90,94,135,121,99,126,129,142,130,113,92,94,135,131,99,126,129,142,120,113,90,94,135,141,99,126,129,142,120,113,90,94,135,151,99,126,129,142,120,113,90,94,135,131,99,126,129,142,140,113,90,94,135,131,99,126,129,142,120,113,91,94,135,131,99,126,129,142,120,113,90,94,135,131,161,99,126,129,142,120,113,90,111]
d = 3 # 组距 每隔3多一个竖条
num_bins = (max(a)-min(a))//d # 组距 //是整除
plt.figure(figsize=(20,8),dpi=80)
plt.hist(a, num_bins)
# 设置x轴刻度
plt.xticks(range(min(a),max(a)+d,d)) # [90,95,100,105,...,160,165]
plt.grid()
plt.show()
运行效果
频数分布直方图
from matplotlib import pyplot as plt
a = [131,99,126,129,142,120,113,90,94,135,131,129,136,129,102,120,103,90,114,135,121,119,136,119,112,120,113,100,94,115,101,99,126,129,142,120,133,90,94,135,161,99,126,129,142,120,143,90,94,135,141,99,126,129,162,120,113,90,94,135,101,99,126,129,142,120,112,90,94,135,130,99,126,129,142,140,113,90,94,135,136,99,126,129,162,120,113,90,94,135,134,99,126,129,142,120,113,120,94,135,135,99,126,129,142,120,133,90,94,135,136,99,126,129,142,124,113,90,94,135,137,99,126,129,142,120,113,95,135,111,138,99,126,129,142,126,113,90,94,135,139,99,126,129,142,128,113,96,94,135,131,99,126,129,142,129,113,90,94,135,133,99,126,129,142,120,113,90,94,135,121,99,126,129,142,130,113,92,94,135,131,99,126,129,142,120,113,90,94,135,141,99,126,129,142,120,113,90,94,135,151,99,126,129,142,120,113,90,94,135,131,99,126,129,142,140,113,90,94,135,131,99,126,129,142,120,113,91,94,135,131,99,126,129,142,120,113,90,94,135,131,161,99,126,129,142,120,113,90,111]
d = 3 # 组距 每隔3多一个竖条
num_bins = (max(a)-min(a))//d # 组距 //是整除
plt.figure(figsize=(20,8),dpi=80)
plt.hist(a, num_bins, normed=True)
# 设置x轴刻度
plt.xticks(range(min(a),max(a)+d,d)) # [90,95,100,105,...,160,165]
plt.grid()
plt.show()
运行效果
频率分布直方图
需求2
美国2004年人口普查发现,有124million人在离家较远的地方工作,根据他们从家到上班地点所需要的时间,通过抽样统计(最后一列)出以下表的数据,这些数据能绘制成直方图吗?
Data by absolute numbers
Interval | Width | Quantity | Quantity/width |
---|---|---|---|
0 | 5 | 4180 | 836 |
5 | 5 | 13687 | 2737 |
10 | 5 | 18618 | 3723 |
15 | 5 | 19634 | 3926 |
20 | 5 | 17981 | 3596 |
25 | 5 | 7190 | 1438 |
30 | 5 | 16369 | 3273 |
35 | 5 | 3212 | 642 |
40 | 5 | 4122 | 824 |
45 | 15 | 9200 | 613 |
60 | 30 | 6461 | 215 |
90 | 60 | 3435 | 57 |
interval = [0,5,10,15,20,25,30,35,40,45,60,90]
width = [5,5,5,5,5,5,5,5,5,15,30,60]
quantity = [836,2737,3723,3926,3596,1438,3273,642,824,613,215,57]
前面的问题问的是 : 那些数据能够绘制直方图
给出的数据都是统计之后的数据
所以为了达到直方图的效果 需要绘制条形图
所以: 一般来说能够使用plt.hist的方法的是那些没有经过统计过的数据
from matplotlib import pyplot as plt
interval = [0,5,10,15,20,25,30,35,40,45,60,90] # x轴 时间段
width = [5,5,5,5,5,5,5,5,5,15,30,60] # 组距d
quantity = [836,2737,3723,3926,3596,1438,3273,642,824,613,215,57]
print(len(interval),len(width),len(quantity))
plt.figure(figsize=(20,8),dpi=80)
plt.bar(range(len(quantity)),quantity) # [0,1,2,...,11,12]
plt.grid()
plt.show()
运行效果
from matplotlib import pyplot as plt
interval = [0,5,10,15,20,25,30,35,40,45,60,90] # x轴 时间段
width = [5,5,5,5,5,5,5,5,5,15,30,60] # 组距d
quantity = [836,2737,3723,3926,3596,1438,3273,642,824,613,215,57]
print(len(interval),len(width),len(quantity))
plt.figure(figsize=(20,8),dpi=80)
plt.bar(range(len(quantity)),quantity,width=1) # [0,1,2,...,11,12]
plt.grid()
plt.show()
运行效果
from matplotlib import pyplot as plt
interval = [0,5,10,15,20,25,30,35,40,45,60,90] # x轴 时间段
width = [5,5,5,5,5,5,5,5,5,15,30,60] # 组距d
quantity = [836,2737,3723,3926,3596,1438,3273,642,824,613,215,57]
plt.figure(figsize=(20,8),dpi=80)
plt.bar(range(len(quantity)),quantity,width=1) # [0,1,2,...,11,12]
# [-0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5]
_x = [i-0.5 for i in range(len(quantity))]
_xtick_lables = interval # [0,5,10,15,20,25,30,35,40,45,60,90]
plt.xticks(_x,_xtick_lables) # 一一对应
plt.grid()
plt.show()
运行效果
from matplotlib import pyplot as plt
interval = [0,5,10,15,20,25,30,35,40,45,60,90] # x轴 时间段
width = [5,5,5,5,5,5,5,5,5,15,30,60] # 组距d
quantity = [836,2737,3723,3926,3596,1438,3273,642,824,613,215,57]
plt.figure(figsize=(20,8),dpi=80)
plt.bar(range(len(quantity)),quantity,width=1) # [0,1,2,...,11,12]
# [-0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5]
_x = [i-0.5 for i in range(len(quantity)+1)]
_xtick_lables = interval+[90+60]
plt.xticks(_x,_xtick_lables)
plt.grid()
plt.show()
运行效果
直方图更多应用场景:
用户的年龄分布状态
一段时间内用户点击次数的分布
用户活跃时间的分布状态
饼图
注意显示的百分比的位数
plt.pie(x, labels=,autopct=,colors)
- x:数量,自动算百分比
- labels:每部分名称
- autopct:占比显示指定%1.2f%%
- colors:每部分颜色
需求
显示不同的电影的排片占比
import matplotlib.pyplot as plt
from matplotlib import font_manager
movie_name = ['雷神3:诸神黄昏','正义联盟','东方快车谋杀案','寻梦环游记','全球风暴','降魔传','追捕','七十七天','密战','狂兽','其它']
place_count = [60605,54546,45819,28243,13270,9945,7679,6799,6101,4621,20105]
colors=['b','r','g','y','c','m','y','k','c','g','y']
my_font = font_manager.FontProperties(fname="C:/Windows/Fonts/msyh.ttf")
# 设置图片大小
plt.figure(figsize=(15,10),dpi=80)
plt.axis('equal') #保证画出来的是圆的
# 绘制饼图
pie = plt.pie(place_count, labels=movie_name, autopct="%1.2f%%", colors=colors)
#图形中的文字无法通过rcParams设置
for font in pie[1]:
font.set_fontproperties(my_font)
# 显示图例
plt.legend(prop=my_font,loc='upper left')
# 添加标题
plt.title("电影排片占比",fontproperties=my_font)
# 展示图形
plt.show()
运行效果
对比常用统计图
折线图:
以折线的上升或下降来表示统计数量的增减变化的统计图
特点:能够显示数据的变化趋势,反映事物的变化情况。(变化)
直方图:
由一系列高度不等的纵向条纹或线段表示数据分布的情况。一般用横轴表示数据范围,纵轴表示分布情况。
特点:绘制连续性的数据,展示一组或者多组数据的分布状况(统计)
条形图:
排列在工作表的列或行中的数据可以绘制到条形图
特点:绘制连离散的数据,能够一眼看出各个数据的大小,比较数据之间的差别(统计)
散点图:
用两组数据构成多个坐标点,考察坐标点的分布,判断两变量之间是否存在某种关联或总结坐标点的分布模式。
特点:判断变量之间是否存在数量关联趋势,展示离群点(分布规律)
饼图:
用于表示不同分类的占比情况,通过弧度大小来对比各种分类。
特点:分类数据的占比情况(占比)
Plot的图表函数
plt.plot(x,y , fmt) :绘制坐标图
plt.bar(left, height, width, bottom) : 绘制条形图
plt.barh(width, bottom, left, height) : 绘制横向条形图
plt.pie(data, explode) : 绘制饼图
plt.scatter(x, y) :绘制散点图
plt.hist(x, bings, normed) : 绘制直方图