数据处理之Matplotlib
使用 Anaconda
Numpy,matplotlib库来完成一些数据统计操作
1.绘制正弦曲线,并设置标题、坐标轴名称、坐标轴范围
import numpy as np
import matplotlib.pyplot as plt
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['FangSong']
mpl.rcParams['axes.unicode_minus'] = False
x = np.arange(0, 2*np.pi, 0.01)
y = np.sin(x) >>> plt.plot(x, y)
plt.title(u'正弦曲线', fontdict={'size':20}) # 设置标题
plt.xlabel(u'弧度', fontdict={'size':16}) # 显示横轴名称
plt.ylabel(u'正弦值', fontdict={'size':16}) # 显示纵轴名称
plt.axis([-0.1*np.pi, 2.1*np.pi, -1.1, 1.1]) # 设置坐标轴范围
plt.show()
2.同一坐标系中绘制多种曲线并通过样式、宽度、颜色加以区分
import numpy as np
import matplotlib.pyplot as plt
from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['FangSong']
mpl.rcParams['axes.unicode_minus'] = False
x = np.linspace(-4, 4, 200)
f1 = np.power(10, x)
f2 = np.power(np.e, x)
f3 = np.power(2, x)
plt.plot(x, f1, 'r', ls='-', linewidth=2, label='$10^x$')
plt.plot(x, f2, 'b', ls='--', linewidth=2, label='$e^x$')
plt.plot(x, f3, 'g', ls=':', linewidth=2, label='$2^x$')
plt.axis([-4, 4, -0.5, 8])
plt.text(1, 7.5, r'$10^x$', fontsize=16)
plt.text(2.2, 7.5, r'$e^x$', fontsize=16)
plt.text(3.2, 7.5, r'$2^x$', fontsize=16)
plt.title('幂函数曲线', fontsize=16)
plt.legend(loc='upper left')
plt.show()
3.绘制多轴图,即将多幅子图绘制在同一画板
import matplotlib.pyplot as plt
plt.subplot(221) # 两行两列的第1个位置
plt.axis([-1, 2, -1, 2])
plt.axhline(y=0.5, color='b')
plt.axhline(y=0.5, xmin=0.25, xmax=0.75, color='r')
plt.subplot(222) # 两行两列的第2个位置
plt.axis([-1, 2, -1, 2])
plt.axvline(x=0, ymin=0, linewidth=4, color='r')
plt.axvline(x=1.0, ymin=-0.5, ymax=0.5, linewidth=4, color='g')
plt.subplot(212) # 两行一列的第2个位置
plt.axis([-1, 2, -1, 2])
plt.axvspan(1.25, 1.55, facecolor='g', alpha=0.5)
plt.axhspan(0.25, 0.75, facecolor='0.5', alpha=0.5)
plt.show()
4.直方图的绘制
import numpy as np
import matplotlib.pyplot as plt
data = np.random.normal(5.0, 3.0, 1000)
plt.hist(data)
bins = np.arange(-5, 16, 1)
plt.hist(data, bins) # 使用自定义的分段区域
plt.show()
5.绘制散点图
import numpy as np
import matplotlib.pyplot as plt
x = np.random.rand(50)
y = np.random.rand(50)
area = np.pi * (15 * np.random.rand(50))**2
color = 2 * np.pi * np.random.rand(50)
plt.scatter(x, y, s=area, c=color, alpha=0.5, cmap=plt.cm.hsv)
plt.show()
6.绘制盒状图
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
def list_generator(mean,dis,number):#封装一下这个函数,用来后面生成数据
return np.random.normal(mean,dis*dis,number)#normal分布,输入的参数是均值、标准差以及生成
的数量
#我们生成四组数据用来做实验,我们都只生成100个数据
list1=list_generator(0.8531,0.0956,100)
list2=list_generator(0.8631,0.0656,100)
list3=list_generator(0.8731,0.1056,100)
list4=list_generator(0.8831,0.0756,100)
#把四个list导入到pandas的数据结构中,dataframe
data = pd.DataFrame({"Hausdorff":list1,
"City-block":list2,
"Wasserstein":list3,
"KL-divergence":list4})
data.boxplot()#这里,pandas自己有处理的过程,很方便哦。
plt.ylabel("ARI")
plt.xlabel("Dissimilarity Measures")#我们设置横纵坐标的标题。
plt.show()
7. 2D
import numpy as np
import matplotlib.pyplot as plt
y, x = np.ogrid[-2:2:200j, -3:3:300j]
z = x * np.exp( - x**2 - y**2)
extent = [np.min(x), np.max(x), np.min(y), np.max(y)]
plt.subplot(121)
cs = plt.contour(z, 10, extent=extent)
plt.clabel(cs)
plt.subplot(122)
plt.contourf(x.reshape(-1), y.reshape(-1), z, 20)
plt.show()
8. 3D绘图
import numpy as np
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d
x, y = np.mgrid[-2:2:50j,-2:2:50j]
z = x*np.exp(-x**2-y**2)
ax = plt.subplot(111,projection='3d')
ax.plot_surface(x,y,z,rstride=2,cstride=1,cmap=plt.cm.coolwarm,alpha=0.8)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')
plt.show()