目录
各类图表绘制参考文档
https://matplotlib.org/stable/api/pyplot_summary.html
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
% matplotlib inline
一、plt.plot()
DataFrame.plot(x=None, y=None, kind=‘line’, ax=None, subplots=False, sharex=None, sharey=False, layout=None, figsize=None, use_index=True, title=None, grid=None, legend=True, style=None, logx=False, logy=False, loglog=False, xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=None, colormap=None, table=False, yerr=None, xerr=None, secondary_y=False, sort_columns=False, **kwds)
- kind: 图表类型,line, bar, barh…(折线图,柱状图,柱状图-横…)
- figsize: 图表大小
- title: 图名
- legend: 是否显示图例,一般直接用plt.legend()
- label: 图例标签,一般仅Series设置label,DataFrame以columns为label
- xlim, ylim: x,y轴边界
- xticks, yticks: x,y轴刻度值
- grid: 是否显示网格,一般直接用plt.grid()
- style: 风格字符串,包括linestyle(-), marker(.), color(g)
- color: 颜色,比style中的color优先
- alpha: 透明度,0-1
- rot: 旋转刻度值,0-360,垂直图旋转xticks,水平图旋转yticks
- use_index: 是否将索引设置为刻度标签,默认为True,False时刻度标签为数字,表示第几个值
以下属性对DataFrame有效
- colormap: 可自动根据颜色板中的颜色自动为不同的项设置不同的颜色,颜色板链接如下:
colormaps_reference
在颜色板颜色后面添加"_r",按照反向顺序设置颜色 - subplots: 是否将各个列绘制为不同图表,默认False
Series.plot():默认index为横坐标,value为纵坐标
ts = pd.Series(np.random.randn(100), index = pd.date_range("2021-01-01", periods = 100))
ts = ts.cumsum()
# 也可以用plt.plot(ts)
ts.plot(kind = "line",
figsize = (6, 4),
title = "title",
legend = True, label = "label",
ylim = [-20, 20], yticks = list(range(-20, 20, 5)),
grid = True,
style = "--g", color = "red", alpha = 0.8,
rot = 45,
use_index = True)
DataFrame.plot()
df = pd.DataFrame(np.random.randn(100, 4),
columns = list('ABCD'),
index = pd.date_range("2021-01-01", periods = 100))
df = df.cumsum()
# 也可以用plt.plot(df)
df.plot(kind = "line",
figsize = (6, 4),
title = "title",
legend = True,
ylim = [-20, 20], yticks = list(range(-20, 20, 10)),
grid = False,
style = "--g", colormap = "cool", alpha = 0.8,
rot = 45,
use_index = True,
subplots = True)
二、柱状图、堆叠图
1、plt.plot(kind = ‘bar/barh’, stacked = True)
新版本plt.plot.bar()和plt.plot.barh()
s = pd.Series(np.random.randint(0, 10, 7), index = list('abcdefg'))
df = pd.DataFrame(np.random.rand(10, 3), columns=['a', 'b', 'c'])
单系列柱状图:
s.plot(kind = "bar", figsize = (4, 2), color = "k", alpha = 0.5)
单系列横向柱状图:
s.plot(kind = "barh", figsize = (4, 2), color = "k", alpha = 0.5)
多系列柱状图:
df.plot(kind = "bar", figsize = (6, 3), colormap = 'Reds_r')
堆叠图:
df.plot(kind = "bar", stacked = True, figsize = (6, 3), colormap = 'Reds_r')
2、plt.bar()
plt.bar(x, height, width=0.8, bottom=None, *, align=‘center’, data=None, **kwargs)
plt.barh(y, width, height=0.8, left=None, *, align=‘center’, **kwargs)
对于barh,width和height互换,bottom改为left,yerr改为xerr
- x: x坐标
- height: 条形的高度,即y值
- width: 宽度,0-1,默认0.8
- bottom: y轴起始坐标,相当于将条形平移,bottom扩展即可化为甘特图
- align: 条形的中心位置,默认为"edge",表示条形左边缘在坐标所在位置,也可设置为"center"
- facecolor: 条形填充颜色
- edgecolor: 条形边框颜色
- yerr: y方向误差棒
添加注解:plt.text()
zip()函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表。
plt.figure(figsize=(10,4))
x = np.arange(10)
y1 = np.random.rand(10)
y2 = -np.random.rand(10)
plt.bar(x, y1, width = 1, align = "center", facecolor = "yellowgreen", edgecolor = "white", yerr = y1 * 0.1)
plt.bar(x, y2, width = 1, facecolor = "lightskyblue", edgecolor = "white")
for i, j in zip(x, y1):
plt.text(i - 0.25, j - 0.15, '%0.2f' % j, color = "white")
三、面积图 plt.plot.area()
- stacked: 是否堆叠,默认为True
stacked为True时,为了产生堆积面积图,每列必须全部是正值或负值,否则会报错
当有数据为NaN时,自动填充0,所以需清洗缺失值
df = pd.DataFrame(np.random.rand(10, 4), columns = list('abcd'))
df.plot.area(colormap = "Greens_r", alpha = 0.5)
四、填图 plt.fill()、plt.fill_between()
plt.fill():对函数与坐标轴之间的区域进行填充
plt.fill_between():对两个函数之间的区域进行填充
x = np.linspace(0, 1, 500)
y1 = np.sin(4 * np.pi * x) * np.exp(-5 * x)
y2 = -np.sin(4 * np.pi * x) * np.exp(-5 * x)
plt.fill(x, y1, 'r', x, y2, 'g', alpha = 0.5)
'''
plt.fill(x, y1, 'r', alpha = 0.5, label = 'y1')
plt.fill(x, y2, 'g', alpha = 0.5, label = 'y2')
plt.legend()
'''
x = np.linspace(0, 5 * np.pi, 1000)
y1 = np.sin(x)
y2 = 0.5
plt.fill_between(x, y1, y2, color = "b",alpha = 0.5)
五、饼图 plt.pie()
plt.pie(x, explode=None, labels=None, colors=None, autopct=None, pctdistance=0.6, shadow=False, labeldistance=1.1, startangle=None,
radius=None, counterclock=True, wedgeprops=None, textprops=None, center=(0, 0), frame=False, hold=None, data=None)
- x: 数据
- explode: 如果不是None,则是len(x)数组,该数组指定偏移量占半径的比例
- labels: 标签
- colors: 颜色
- autopct: 标签的显示格式
- pctdistance: 通过autopct生成的文本的开头距离中心的距离,默认为0.6
- shadow: 是否显示阴影
- labeldistance: 饼图标签的半径大小,相对于饼图半径的比例,默认为1.1
- startangle: 饼图开始角度,x轴逆时针,默认为0
- radius: 半径
- counterclock: 楔形块是否是顺时针,默认为True顺时针
- frame: 图框
s = pd.Series(3 * np.random.rand(4), index = ['a', 'b', 'c', 'd'], name = 'series')
plt.axis('equal') # 设置图表长宽相等,否则可能不是正圆
plt.pie(s,
explode = [0.1, 0, 0, 0],
labels = s.index,
colors=["r", "g", "b", "c"],
autopct = "%0.2f%%",
pctdistance = 0.8,
shadow = True,
labeldistance = 1.2,
startangle = 45,
radius = 0.8,
counterclock = False,
frame = False)
六、直方图 plt.hist()、密度图 plt.plot(kind = ‘kde’)
1、直方图 plt.hist()
plt.hist(x, bins=10, range=None, normed=False, weights=None, cumulative=False, bottom=None, histtype=‘bar’, align=‘mid’, orientation=‘vertical’, rwidth=None, log=False, color=None, label=None, stacked=False, hold=None, data=None, **kwargs)
- bins: 定义范围内等宽宽度的bin数,默认为10
- normed: 标准化,直方图下的面积积分为1
- histtype: 直方图类型
“bar”,默认,条形直方图;
“barstacked”,堆叠的条形直方图,不常用;
“step”,未填充的线图;
“stepfilled”,填充的线图 - orientation: 垂直‘horizontal’,默认,或水平‘vertical’
- align: 对齐方式,默认’mid’,{‘left’, ‘mid’, ‘right’}
s = pd.Series(np.random.randn(1000))
s.hist(bins = 20,
histtype = 'bar',
orientation = "vertical",
alpha = 0.5)
# 直方图归一化后,加上密度图
s.hist(bins = 20,
histtype = 'bar',
orientation = "vertical",
alpha = 0.5,
normed = True)
s.plot(kind = 'kde', style = 'k--')
2、堆叠直方图 DataFrame.plot.hist(stacked = True)
df = pd.DataFrame({'a': np.random.randn(1000) + 1, 'b': np.random.randn(1000),
'c': np.random.randn(1000) - 1, 'd': np.random.randn(1000) - 2},
columns = ['a', 'b', 'c', 'd'])
df.plot.hist(stacked = True,
bins = 20,
colormap='Greens_r',
alpha=0.5)
df.hist(bins = 50) # 生成多个直方图
七、散点图
1、散点图 plt.scatter()
plt.scatter(x, y, s=20, c=None, marker=‘o’, cmap=None, norm=None, vmin=None, vmax=None, alpha=None, linewidths=None, verts=None, edgecolors=None, hold=None, data=None, **kwargs)
其中s和c可以更加多维地展示结果
- marker: 标记的样式
- s: 散点的大小
- c: 散点的颜色
- cmap: colormap
- vmin, vmax: 亮度设置
x = np.random.randn(1000)
y = np.random.randn(1000)
plt.scatter(x, y,
s = np.random.randn(1000) * 100,
c = y,
marker = '.',
cmap = 'Reds',
alpha = 0.8)
2、矩阵散点图 pd.scatter_matrix()
比较列与列之间的相关性,对角线为每个列的数据情况
pd.scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, diagonal=‘hist’, marker=’.’, density_kwds=None, hist_kwds=None, range_padding=0.05, **kwds)
- frame: 矩阵
- diagonal: 在直方图‘hist’和密度图‘kde’中选择一个
- range_padding: 图像在x轴、y轴附近的留白(padding),该值越大,留白距离越大
df = pd.DataFrame(np.random.randn(100, 3), columns = ['a', 'b', 'c'])
pd.scatter_matrix(df,
alpha = 0.5,
figsize = (5, 3),
diagonal = 'kde',
marker = 'o',
range_padding = 0.2)
八、极坐标图
s = pd.Series(np.arange(20))
theta = np.arange(0, 2 * np.pi, 0.02)
1、plt.polar()
plt.polar(theta, r, **kwargs),theta为弧度制角度,支持多个theta、r参数
plt.polar(s)
plt.polar(theta, theta * 3)
plt.thetagrids(np.arange(0.0, 360.0, 90), ['0', '90', 'pi', '270']) # 设置网格、标签
plt.ylim(5, 15)
2、plt.subplot(121, projection = “polar”) 或fig.add_subplot(121, polar = True)
fig = plt.figure(figsize = (8, 4))
ax1 = plt.subplot(121, projection = "polar") # 创建极坐标子图
# 也可以 ax1 = fig.add_subplot(121, polar = True)
ax2 = plt.subplot(122)
ax1.plot(s)
ax1.plot(theta, theta * 3)
ax2.plot(s)
ax2.plot(theta, theta * 3)
# 设置坐标轴方向,默认逆时针1,可设置为顺时针-1
ax1.set_theta_direction(-1)
# 设置角度偏移,参数为弧度,正数代表逆时针偏移
ax1.set_theta_offset(np.pi / 2)
# 设置极坐标角度网格线显示及标签,网格线和标签一一对应
ax1.set_thetagrids(np.arange(0.0, 360.0, 90), ['0', '90', 'pi', '270'])
# 设置极径网格线显示,参数必须是正数,不能是0
ax1.set_rgrids(np.arange(1, 10, 2))
# 左图
ax2.set_rlim(5, 15) # 设置显示的极径范围
# 相当于
# ax1.set_rmin(5) # 设置显示的极径最小值
# ax1.set_rmax(15) # 设置显示的极径最大值
# 右图,在左图基础上添加ax2.set_rlim(5, 15)截取极径范围
# 设置极径网格线的显示范围,与如上两个方法有重复
# ax1.set_rticks(np.arange(1, 10, 3))
3、雷达图:极坐标的折线图+填图
fig = plt.figure(figsize = (8, 4))
ax1 = plt.subplot(121, projection = "polar")
ax2 = plt.subplot(122, projection = "polar")
data = np.random.randint(1,10,10)
theta = np.arange(0, 2 * np.pi, 2 * np.pi / 10)
# 首尾未闭合
ax1.plot(theta, data, '.--')
ax1.fill(theta, data, alpha = 0.2)
# 首尾闭合
data = np.concatenate((data, [data[0]])) # 闭合
theta = np.concatenate((theta, [theta[0]])) # 闭合
ax2.plot(theta, data, '.--')
ax2.fill(theta, data, alpha = 0.2)
4、极轴图:极坐标的柱状图
data = np.random.randint(1,10,10)
theta = np.arange(0, 2 * np.pi, 2 * np.pi / 10)
bar = ax1.bar(theta, data, alpha = 0.5)
for r,bar in zip(data, bar):
bar.set_facecolor(plt.cm.jet(r/10.)) # 设置颜色