目录:
画图小tips
散点图
柱状图
热力图
折线图
堆积柱状图
1、画图小tips
目录:
1.1 保存多张图片+清空
1.2 画图标题/图例显示中文
1.3 图中加注释
1.4双坐标轴
1.5坐标轴显示百分比
1.6横坐标轴时间刻度间隔显示
1.1 保存多张图片+清空
plt.savefig(ticker_name + '.png') # 保存图片
plt.cla() # 清空
如果不加上 plt.cla(),后面的图片会在前一张图片的基础上累计,图就乱了。
1.2 画图标题/图例显示中文
python画图标题默认输出英文,如果输出中文,要对字体进行调整,加上下面代码即可:
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
当文本中不含有要输出的中文时,在legend函数中定义后再加上上面两行代码:
plt.legend((u'没有对冲国债市值', u'对冲后国债市值'), loc='upper right')
1.3 图中加注释
# 添加注释, 'xy='为注释需要添加到的位置坐标
plt.annotate('Annotation', xy=(max(X)+1, max(Y)+1))
1.4双坐标轴
import matplotlib.pyplot as plt
# 绘制曲线
fig = plt.figure(figsize=(18, 8)) # 指定画布大小
holding_MV[u'没有对冲国债市值'].plot(color='red', linewidth=3)
holding_MV[u'对冲后国债市值'].plot(color='blue', linewidth=3)
plt.legend()
plt.xlabel('time')
plt.ylabel('market_value')
holding_MV[u'国债期货累积收益'].plot(kind='bar', secondary_y=True, color='green')
plt.ylabel('return')
plt.legend(loc='upper right', bbox_to_anchor=(1, 1.008))
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.xticks(rotation=30)
plt.show()
但是这种做法,图例那块一直没有解决好,相当于是两个图例叠在一起显示的。
下面是官方做法,但两个都是折线图,没有柱状图,还没研究好怎么改。
from mpl_toolkits.axes_grid1 import host_subplot
import matplotlib.pyplot as plt
host = host_subplot(111)
par = host.twinx()
host.set_xlabel("Distance")
host.set_ylabel("Density")
par.set_ylabel("Temperature")
p1, = host.plot([0, 1, 2], [0, 1, 2], label="Density")
p2, = par.plot([0, 1, 2], [0, 3, 2], label="Temperature")
leg = plt.legend()
host.yaxis.get_label().set_color(p1.get_color())
leg.texts[0].set_color(p1.get_color())
par.yaxis.get_label().set_color(p2.get_color())
leg.texts[1].set_color(p2.get_color())
plt.show()
1.5坐标轴显示百分比
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
X = [1,2,3,4,5,6,7,8,9,10]
Y1 = [42.5, 42, 43, 44.7, 42.6, 44, 41.3, 40.3, 40.7, 40.1]
plt.plot(X, Y1, 'o', markersize=6, color='gray') # 2016H2
def to_percent(temp, position):
return '%1.0f'%(temp) + '%'
plt.gca().yaxis.set_major_formatter(FuncFormatter(to_percent))
plt.show()
1.6横坐标轴时间刻度间隔显示
x_list = pd.date_range(date[0], date[-1], freq='Y')
x_list = x_list.map(lambda x:x.strftime("%Y-%m-%d"))
plt.xticks(x_list, rotation=45)
2.散点图
import matplotlib.pyplot as plt
X = [8, -2, 5, -10, -4, 2]
Y = [2, 7, -2, 3, 1, 0]
# 移动坐标轴
ax = plt.gca()
ax.spines['top'].set_color('none')
ax.spines['right'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.spines['bottom'].set_position(('data', 0))
ax.yaxis.set_ticks_position('left')
ax.spines['left'].set_position(('data', 0))
# 画散点图
plt.plot(X, Y, 'ro')
# 自定义横纵坐标显示的范围,第一个参数为最小值,第二个参数为最大值
plt.xlim(min(X)-2, max(X)+2)
plt.ylim(min(Y)-2, max(Y)+2)
# 添加注释, 'xy='为注释需要添加到的位置坐标
plt.annotate('Annotation', xy=(max(X)+1, max(Y)+1))
plt.show()
画散点图的同时画出拟合曲线
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
# 计算yield beta=期货价格变动与组合价格变动回归系数
portfolio_return = portfolio_price.pct_change()[1:]
future_return = future_price.pct_change()[1:]
X = sm.add_constant(portfolio_return) # 添加一项常量
model = sm.OLS(future_return, X) # 回归
fitted = model.fit()
yield_beta = fitted.params[0]
plt.plot(portfolio_return, future_return, 'ro') # 画散点图
plt.plot(portfolio_return, fitted.fittedvalues, 'r', linewidth=1) # 画拟合曲线
plt.xlabel('portfolio_return')
plt.ylabel('future_return')
plt.annotate('Yield beta = {}'.format(yield_beta), xy=(-0.003, 0))
plt.show()
3.柱状图
idx1 = np.arange(len(style_factor)) #首先用第一个的长度作为横坐标
width = 0.2
idx2 = [i+width for i in idx1] #第二个的横坐标
idx3 = [i+width for i in idx2] #第三个的横坐标
idx4 = [i+width for i in idx3] #第四个的横坐标
fig,axs = plt.subplots(nrows=1, ncols=2, sharex=True, sharey=True, figsize=(12,6)) #表示1行有2个子图,X轴和Y轴相互share
ax1 = axs[0]
ax1.barh(idx1, future_return_corr_list1, width, color='#465D4C', label='0-1m')
ax1.barh(idx2, future_return_corr_list2, width, color='#FFBA84', label='1-2m')
ax1.barh(idx3, future_return_corr_list3, width, color='#24936E', label='2-3m')
ax1.barh(idx4, future_return_corr_list4, width, color='#B5495B', label='3-4m')
ax1.set_yticks(idx1+width*2) #将坐标显示在4个柱子中间
ax1.set_yticklabels(style_factor)
ax1.invert_yaxis() #横纵坐标互换
ax1.set_title('Correlation with future factor returns', fontweight='bold') #标题加粗
ax2 = axs[1] #第二个子图
ax2.barh(idx1, future_volatility_corr_list1, width, color='#465D4C', label='0-1m')
ax2.barh(idx2, future_volatility_corr_list2, width, color='#FFBA84', label='1-2m')
ax2.barh(idx3, future_volatility_corr_list3, width, color='#24936E', label='2-3m')
ax2.barh(idx4, future_volatility_corr_list4, width, color='#B5495B', label='3-4m')
ax2.set_yticks(idx1+width*2)
ax2.invert_yaxis()
ax2.set_title('Correlation with future factor volatility', fontweight='bold')
plt.legend(loc=2, bbox_to_anchor=(1.05,1.0), borderaxespad=0., title='Future Horizon') #显示图例,将其放在图外
plt.show()
4.热力图
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import matplotlib.cm as cm
delay_reference_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
delay_reference_label = ('15', '30', '60', '100', '150', '200', '250', '300', '500', '700', '1000', '1440', '2880', '4320')
OutofExpectation_threshold_label = ('', '0.1', '', '0.05', '', '0', '')
sig_final_all_list = [[6.55, 6.76, 7.32, 5.6, 5.94, 1.36, 1.41, 0.8, 0.98, 1.36, 13.4, 9.06, 12.24, 13.26],
[7.32, 6.55, 6.76, 5.6, 1.36, 0.8, 0.98, 1.36, 13.4, 5.94, 9.06, 12.24, 13.26, 1.41],
[5.94, 7.32, 6.55, 6.76, 5.6, 1.36, 1.41, 0.8, 1.36, 13.4, 9.06, 12.24, 0.98, 13.26]]
cdict = {'red': ((0., 1, 1),
(0.05, 1, 1),
(0.11, 0, 0),
(0.66, 1, 1),
(0.89, 1, 1),
(1, 0.5, 0.5)),
'green': ((0., 1, 1),
(0.05, 1, 1),
(0.11, 0, 0),
(0.375, 1, 1),
(0.64, 1, 1),
(0.91, 0, 0),
(1, 0, 0)),
'blue': ((0., 1, 1),
(0.05, 1, 1),
(0.11, 1, 1),
(0.34, 1, 1),
(0.65, 0, 0),
(1, 0, 0))}
# 设定一个图像,背景为白色
fig_sig = plt.figure(facecolor='w')
ax_sig = fig_sig.add_subplot(2, 1, 1)
# 设置横纵坐标轴标签,rotation为坐标旋转的角度
# ax_sig.set_xticklabels(delay_reference_label, rotation=-15)
plt.xticks(delay_reference_list, delay_reference_label, rotation=-15)
ax_sig.set_yticklabels(OutofExpectation_threshold_label)
cmap_sig = cm.colors.LinearSegmentedColormap('sig_colormap', cdict, 256)
# vmax和vmin是数据矩阵中的最大和最小值,这个范围要与数据的范围相协调
map_sig = ax_sig.imshow(sig_final_all_list, interpolation="nearest", cmap=cmap_sig, aspect='auto', vmin=0, vmax=15)
cb_sig = plt.colorbar(mappable=map_sig)
plt.xlabel('delay reference')
plt.ylabel('OutofExpectation threshold')
plt.title('the heatmap of sig')
plt.show()
横坐标轴标签显示不全
如果设置横纵坐标轴标签时这样写,图就会变成这样。
ax_sig.set_xticklabels(delay_reference_label, rotation=-15)
ax_sig.set_yticklabels(OutofExpectation_threshold_label)
纵坐标轴标签显示不全
如果设置横纵坐标轴标签时这样写,图就会变成这样。
OutofExpectation_threshold_label = ('0.1', '0.05', '0')
目前 我还不知道这样写为什么就会在中间加上一个刻度,即那个 0.05。
模糊热力图
还有一个地方,这里如果去掉 interpolation=”nearest”,就会变成下图。
map_sig = ax_sig.imshow(sig_final_all_list, cmap=cmap_sig, aspect='auto', vmin=0, vmax=15)
进阶版热力图
下面的例子来自参考文献7,我觉得图很好看诶,还是NBA数据,哈哈哈。但是我没怎么看懂。。
ppg2008.csv的数据是这样的:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
nba = pd.read_csv('ppg2008.csv', index_col=0)
# Normalize data columns
nba_norm = (nba - nba.mean()) / (nba.max() - nba.min())
nba_sort = nba_norm.sort('PTS', ascending=True)
# Plot it out
fig, ax = plt.subplots()
heatmap = ax.pcolor(nba_sort, cmap=plt.cm.Blues, alpha=0.8)
fig = plt.gcf()
fig.set_size_inches(8, 11)
ax.set_frame_on(False) # turn off the frame
# put the major ticks at the middle of each cell
ax.set_yticks(np.arange(nba_sort.shape[0]) + 0.5, minor=False)
ax.set_xticks(np.arange(nba_sort.shape[1]) + 0.5, minor=False)
# want a more natural, table-like display
ax.invert_yaxis()
ax.xaxis.tick_top()
# Set the labels
# label source:https://en.wikipedia.org/wiki/Basketball_statistics
labels = [
'Games', 'Minutes', 'Points', 'Field goals made', 'Field goal attempts', 'Field goal percentage', 'Free throws made', 'Free throws attempts', 'Free throws percentage',
'Three-pointers made', 'Three-point attempt', 'Three-point percentage', 'Offensive rebounds', 'Defensive rebounds', 'Total rebounds', 'Assists', 'Steals', 'Blocks', 'Turnover', 'Personal foul']
# note I could have used nba_sort.columns but made "labels" instead
ax.set_xticklabels(labels, minor=False)
ax.set_yticklabels(nba_sort.index, minor=False)
plt.xticks(rotation=90)
ax.grid(False)
ax = plt.gca() # Turn off all the ticks
for t in ax.xaxis.get_major_ticks():
t.tick1On = False
t.tick2On = False
for t in ax.yaxis.get_major_ticks():
t.tick1On = False
t.tick2On = False
plt.show()
下面是我自己后来应用的实例:
def plot_blue_heatmap(FeatureImportance_list, reference_label, DateList, name):
fig, ax = plt.subplots()
heatmap = ax.pcolor(FeatureImportance_list, cmap=plt.cm.Blues)
fig = plt.gcf()
fig.set_size_inches(30, 8)
ax.set_frame_on(False)
# put the major ticks at the middle of each cell
ax.set_yticks(np.arange(FeatureImportance_list.shape[0]) + 0.5, minor=False)
ax.set_xticks(np.arange(FeatureImportance_list.shape[1]) + 0.5, minor=False)
# want a more natural, table-like display
ax.invert_yaxis()
ax.xaxis.tick_top()
# Set the labels
ax.set_xticklabels(reference_label, minor=False)
ax.set_yticklabels(DateList, minor=False)
plt.xticks(rotation=90)
ax.grid(False)
ax = plt.gca() # Turn off all the ticks
for t in ax.xaxis.get_major_ticks():
t.tick1On = False
t.tick2On = False
for t in ax.yaxis.get_major_ticks():
t.tick1On = False
t.tick2On = False
plt.tight_layout()
plt.savefig(name+'_heatmap.png')
plt.show()
5、折线图
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('data.csv')
df['close'].plot() # 绘制一条曲线
# df[['close', 'adj close']].plot() # 同时绘制两条曲线
plt.show()
ax = df.plot(title='stock prices', fontsize=2)
ax.set_xlabel('date')
ax.set_ylabel('price')
plt.show()
fig = plt.figure(figsize=(18, 15)) # 指定画布大小
ax1 = fig.add_subplot(111)
# 绘制两条曲线
plt.plot(date, future_price, color='blue')
plt.plot(date, CTD_price, color='red')
plt.xticks(rotation=30)
plt.legend(['futures price', 'TF1906 CTD price'], loc='upper left')
plt.xlabel('time')
plt.ylabel('price')
plt.show()
6.堆积柱状图
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
data = [[1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,9,10,10,10],
['大盘', '中盘', '小盘','大盘', '中盘', '小盘', '大盘', '中盘', '小盘', '大盘', '中盘', '小盘',
'大盘', '中盘', '小盘', '大盘', '中盘', '小盘', '大盘', '中盘', '小盘', '大盘', '中盘', '小盘',
'大盘', '中盘', '小盘', '大盘', '中盘', '小盘'],
[0.21,0.58,0.21,0.23,0.47,0.30,0.15,0.64,0.21,0.13,0.48,0.39,0.21,0.55,0.24,0.22,0.46,0.32,0.15,0.63,0.22,
0.29,0.55,0.16,0.21,0.49,0.30,0.34,0.34,0.32]]
rows = list(zip(data[0], data[1], data[2]))
headers = ['size', 'style', 'Value']
df = pd.DataFrame(rows, columns=headers)
fig, ax = plt.subplots(figsize=(8, 5))
months = df['style'].drop_duplicates()
margin_bottom = np.zeros(len(df['size'].drop_duplicates()))
colors = ["orange", "#91B493", "#F6C555"]
for num, month in enumerate(months):
values = list(df[df['style'] == month].loc[:, 'Value'])
df[df['style'] == month].plot.bar(x='size',y='Value', ax=ax, stacked=True,
bottom = margin_bottom, color=colors[num], label=month)
margin_bottom += values
plt.title('2016H2')
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.legend((u'大盘', u'中盘', u'小盘'))
plt.show()
参考文献: