可以通过代码去学习直方图
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
s = pd.Series(np.random.randn(1000))
s.hist(bins = 20,#箱子的宽度
histtype = 'bar', #风格,bar,barstacked,step,stepfilled
align = 'mid',#对齐方式--left,right
orientation = 'vertical', #水平还是垂直
alpha = 0.5,
density = True)#密度分布
s.plot(kind = 'kde',style = 'k--',grid = True)#可以加一个密度曲线
堆叠直方图
#堆叠直方图
#使用DataFrame.plot.hist()和Series.plot.hist()方法绘制
plt.figure(num = 1)
df = pd.DataFrame({'a':np.random.randn(1000)+1,
'b':np.random.randn(1000),
'c':np.random.randn(1000)-1,
'd':np.random.randn(1000)-2},
columns = ['a','b','c','d'])
df.plot.hist(stacked = True,#是否堆叠
bins = 20,
colormap = 'Greens_r',#_r是我加了一个反向
alpha = 0.5,
grid = True )
下面是另一种风格的写法,多看多写会印象深刻
import random
data1 = [random.gauss(15,10) for i in range(500)]
data2 = [random.gauss(5,5) for i in range(500)]
bins = np.arange(-50,50,2.5)
plt.hist(data1,bins=bins,label='class 1',alpha = 0.3)
plt.hist(data2,bins=bins,label='class 2',alpha = 0.3)
plt.legend(loc='best')
plt.show()
输出结果图:
散点图
mu_vec1 = np.array([0,0])#平均值
cov_mat1 = np.array([[2,0],[0,2]])#协方差
x1_samples = np.random.multivariate_normal(mu_vec1, cov_mat1, 100)
x2_samples = np.random.multivariate_normal(mu_vec1+0.2, cov_mat1+0.2, 100)
x3_samples = np.random.multivariate_normal(mu_vec1+0.4, cov_mat1+0.4, 100)
plt.figure(figsize = (8,6))
plt.scatter(x1_samples[:,0],x1_samples[:,1],marker ='x',color='blue',alpha=0.6,label='x1')
plt.scatter(x2_samples[:,0],x2_samples[:,1],marker ='o',color='red',alpha=0.6,label='x2')
plt.scatter(x3_samples[:,0],x3_samples[:,1],marker ='^',color='green',alpha=0.6,label='x3')
plt.legend(loc='best')
plt.show()
输出结果图:
标注出坐标:
x_coords = [0.13, 0.22, 0.39, 0.59, 0.68, 0.74, 0.93]
y_coords = [0.75, 0.34, 0.44, 0.52, 0.80, 0.25, 0.55]
plt.figure(figsize = (8,6))
plt.scatter(x_coords,y_coords,marker='s',s=50)
for x,y in zip(x_coords,y_coords):
plt.annotate('(%s,%s)'%(x,y),xy=(x,y),xytext=(0,-15),textcoords = 'offset points',ha='center')
plt.show()
# xytext:设置位置
# textcoords:显示坐标
# ha:坐标对齐
输出结果图:
将点根据大小不同来做一个规范:
mu_vec1 = np.array([0,0])
cov_mat1 = np.array([[1,0],[0,1]])
X = np.random.multivariate_normal(mu_vec1, cov_mat1, 500)
fig = plt.figure(figsize=(8,6))
R=X**2 #离X越远,就越大
R_sum=R.sum(axis = 1) # 以(0,0)为圆心,算平方和
plt.scatter(X[:,0],X[:,1],color='grey',marker='o',s=20*R_sum,alpha=0.5)
plt.show()
输出结果图: