直方图
首先需要区分清楚概念:直方图和条形图。
- 条形图:条形图用长条形表示每一个类别,长条形的长度表示类别的频数,宽度表示表示类别。
- 直方图:直方图是一种统计报告图,形式上也是一个个的长条形,但是直方图用长条形的面积表示频数,所以长条形的高度表示
频数/组距
,宽度表示组距,其长度和宽度均有意义。当宽度相同时,一般就用长条形长度表示频数。
直方图一般用来描述等距数据,柱状图一般用来描述名称(类别)数据或顺序数据。直观上,直方图各个长条形是衔接在一起的,表示数据间的数学关系;条形图各长条形之间留有空隙,区分不同的类。
单分布
data = np.random.normal(0,20,1000)
bins = np.arange(-100,100,5)
plt.hist(data,bins=bins, edgecolor="black")
plt.xlim([min(data)-5,max(data)+5])
plt.title("直方图")
多分布
import random
data1 = [random.gauss(15,10) for i in range(500)]
data2 = [random.gauss(5,5) for i in range(500)]
bins = np.arange(-50,50,2.5)
plt.hist(data1,bins=bins,label='class 1',alpha = 0.3, edgecolor="black")
plt.hist(data2,bins=bins,label='class 2',alpha = 0.3, edgecolor="black")
plt.legend(loc='best')
散点图
mu_vec1 = np.array([0,0])
cov_mat1 = np.array([[2,0],[0,2]])
#构造数据,np.random.multivariate_normal构造一个二元正态分布矩阵
x1_samples = np.random.multivariate_normal(mu_vec1, cov_mat1, 100)
x2_samples = np.random.multivariate_normal(mu_vec1+0.2, cov_mat1+0.2, 100)
x3_samples = np.random.multivariate_normal(mu_vec1+0.4, cov_mat1+0.4, 100)
plt.figure(figsize = (8,6))
plt.scatter(x1_samples[:,0],x1_samples[:,1],marker ='x',color='blue',alpha=0.6,label='x1')
plt.scatter(x2_samples[:,0],x2_samples[:,1],marker ='o',color='red',alpha=0.6,label='x2')
plt.scatter(x3_samples[:,0],x3_samples[:,1],marker ='^',color='green',alpha=0.6,label='x3')
plt.legend(loc='best')
plt.show()
饼图
m = 51212.
f = 40742.
m_perc = m/(m+f)
f_perc = f/(m+f)
colors = ['navy','lightcoral']
labels = ["Male","Female"]
plt.figure(figsize=(5,5))
#explode缝隙大小,autopct显示百分比
paches,texts,autotexts = plt.pie([m_perc,f_perc],labels = labels,autopct = '%1.1f%%',explode=[0,0.05],colors = colors)
for text in texts+autotexts:
text.set_fontsize(15)
for text in autotexts:
text.set_color('white')
嵌套组合
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
#数据标签
def autolabel(rects):
for rect in rects:
height = rect.get_height()
ax1.text(rect.get_x() + rect.get_width()/2., 1.02*height,
"{:,}".format(float(height)),
ha='center', va='bottom',fontsize=18)
#构造数据
top10_arrivals_countries = ['CANADA','MEXICO','UNITED\nKINGDOM',\
'JAPAN','CHINA','GERMANY','SOUTH\nKOREA',\
'FRANCE','BRAZIL','AUSTRALIA']
top10_arrivals_values = [16.625687, 15.378026, 3.934508, 2.999718,\
2.618737, 1.769498, 1.628563, 1.419409,\
1.393710, 1.136974]
arrivals_countries = ['WESTERN\nEUROPE','ASIA','SOUTH\nAMERICA',\
'OCEANIA','CARIBBEAN','MIDDLE\nEAST',\
'CENTRAL\nAMERICA','EASTERN\nEUROPE','AFRICA']
arrivals_percent = [36.9,30.4,13.8,4.4,4.0,3.6,2.9,2.6,1.5]
fig, ax1 = plt.subplots(figsize=(20,12))
ax = ax1.bar(range(10),top10_arrivals_values,color='blue')
plt.xticks(range(10),top10_arrivals_countries,fontsize=18)
ax2 = inset_axes(ax1,width = 6,height = 6,loc = 5)
explode = (0.08, 0.08, 0.05, 0.05,0.05,0.05,0.05,0.05,0.05)
patches, texts, autotexts = ax2.pie(arrivals_percent,labels=arrivals_countries,autopct='%1.1f%%',explode=explode)
for text in texts+autotexts:
text.set_fontsize(16)
for spine in ax1.spines.values():
spine.set_visible(False)
autolabel(ax)
3D
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
#设置3D
ax = Axes3D(fig)
x = np.arange(-4,4,0.25)
y = np.arange(-4,4,0.25)
X,Y = np.meshgrid(x,y)
Z = (X**2+Y**2)
ax.plot_surface(X,Y,Z,cmap='rainbow')
ax.contour(X,Y,Z,zdim='z',offset = -2 ,cmap='rainbow')
#ax.set_zlim(-2,2)
plt.show()