老师主要介绍了三个库文件(pandas / matplotlib / seaborn)的用途,使用要点,以及读取excel文件的方法。感谢暨南大学刘倩老师(武汉大学刘丽群老师的得意门生)、深圳大学史旻昱老师、香港城市大学万天娇老师,还有我的同桌西安工程大学丛红艳老师、广州大学魏晨捷老师、鲁迅美术学院于静老师的指点和鼓励,死磕pdf(Numpy / Matplotlib / Pandas),又睇Cheat Sheet(Python / Matplotlib/Seaborn / NumPy ),忙乎了一整天,耗尽了脑力,终于完成了这次作业!
1 简单的绘图
案例:正弦波(来源:嵩天)
#这是头文件
import matplotlib.pyplot as plt
import numpy as np
#绘图代码
a = np.arange(0.0,5.0,0.02)
plt.plot(a,np.cos(2*np.pi*a),'r--')
plt.xlabel('横轴:时间',fontproperties='simhei',fontsize=20)
plt.ylabel('纵轴:振幅',fontproperties='simhei',fontsize=20)
plt.title(r'正弦波实例 $y=cos(2\pi x)$',fontproperties='simhei',fontsize=25)
plt.annotate(r'$\mu=100$',xy=(2,1),xytext=(3,1.5),arrowprops=dict(facecolor='black',shrink=0.1,width=2))
plt.axis([-1,6,-2,2])
#这是输出代码
plt.grid(True)
plt.show()
案例:图像处理(来源:matplotlib)
#这是头文件
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
#读取图片
img = mpimg.imread('teacher_z.JPG')
#选择一个数据通道:
lum_img = img[:, :, 0]
#输出对比图
fig = plt.figure()
a = fig.add_subplot(1, 2, 1)
imgplot = plt.imshow(lum_img)
a.set_title('Before')
plt.colorbar(ticks=[0.1, 0.3, 0.5, 0.7], orientation='horizontal')
a = fig.add_subplot(1, 2, 2)
imgplot = plt.imshow(lum_img)
imgplot.set_clim(0.0, 0.7)
a.set_title('After')
plt.colorbar(ticks=[0.1, 0.3, 0.5, 0.7], orientation='horizontal')
2 各种华丽的绘图
案例:
# kde+Hist
#头文件
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
#读取数据
ccss = pd.read_excel("ccss_sample.xlsx")
ccss.head
#输出数据
ax = sns.distplot(ccss.s3, rug = True,
rug_kws = {"color":"g"},
kde_kws = {"color":"k", "lw": 3, "label": "KDE"},
hist_kws = {"linewidth": 3, "color": "y",
"label" : "Hist"})
案例:模糊的散点图
#模糊
#头文件
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
#中文字体
plt.rcParams["font.family"] = "SimHei"
#读取数据
ccss = pd.read_excel("ccss_sample.xlsx")
ccss.head
tmpdf = pd.crosstab(index = ccss.s4, columns = ccss.O1)
tmpdf
#输出数据
sns.kdeplot(ccss.s3, ccss.index1, shade=True, cbar = True, n_levels = 40)
案例
#头文件
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
#中文字体
plt.rcParams["font.family"] = "SimHei"
#读取数据
ccss = pd.read_excel("ccss_sample.xlsx")
ccss.head
data = ccss.s5.value_counts()
data
prob = data.cumsum() / data.sum()
prob
#输出数据
data.plot.bar(color = 'b')
prob.plot(color = 'g', secondary_y = True, style = '-o', linewidth = 2)
案例:热力图
#热力图
#头文件
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
#中文字体
plt.rcParams["font.family"] = "SimHei"
#读取数据
ccss = pd.read_excel("ccss_sample.xlsx")
ccss.head
data = ccss.s5.value_counts()
data
prob = data.cumsum() / data.sum()
prob
ccss['s3cls'] = pd.cut(ccss.s3,
bins = [15,20,25,30,35,40,45,50,55,60,65])
ccss.s3cls.head()
data = ccss.pivot_table(index = 's9', columns = 's5', values = 'index1' )
data.head()
#输出数据
sns.heatmap(data, square = True)
案例:热力图
#头文件
import pandas as pd
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
#读取数据
ccss = pd.read_excel("ccss_sample.xlsx")
ccss.head
data2.sort_values(by = 'All', axis = 'columns', inplace = True)
data2
#绘图
#输出数据
plt.figure(figsize=(10,6))
sns.heatmap(data2.iloc[:-1,:], cmap = 'YlGnBu',
annot = True, fmt = '.3g')
案例:抖动图(来源:pandas)
#头文件
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.close('all')
#读取数据
df = pd.DataFrame(np.random.randn(1000, 4),
index=ts.index, columns=list('ABCD'))
df = df.cumsum()
#绘图
plt.figure();
df.plot();
案例:蜡烛图(来源:pandas)
#头文件
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.close('all')
#数据
df = pd.DataFrame(np.random.rand(10, 3), columns=['Col1', 'Col2', 'Col3'])
df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B'])
df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B'])
#绘图
plt.figure();
bp = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y'])
案例:结构化的热力图(来源:seaborn)
import pandas as pd
import seaborn as sns
sns.set()
# Load the brain networks example dataset
df = sns.load_dataset("brain_networks", header=[0, 1, 2], index_col=0)
# Select a subset of the networks
used_networks = [1, 5, 6, 7, 8, 12, 13, 17]
used_columns = (df.columns.get_level_values("network")
.astype(int)
.isin(used_networks))
df = df.loc[:, used_columns]
# Create a categorical palette to identify the networks
network_pal = sns.husl_palette(8, s=.45)
network_lut = dict(zip(map(str, used_networks), network_pal))
# Convert the palette to vectors that will be drawn on the side of the matrix
networks = df.columns.get_level_values("network")
network_colors = pd.Series(networks, index=df.columns).map(network_lut)
# Draw the full plot
sns.clustermap(df.corr(), center=0, cmap="vlag",
row_colors=network_colors, col_colors=network_colors,
linewidths=.75, figsize=(13, 13))
压轴图