1、3D图的绘制:
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = Axes3D(fig) #把图变为3D图
x = np.arange(-4,4,0.25)
y = np.arange(-4,4,0.25)
X,Y = np.meshgrid(x,y) #构造网格
Z = np.sin(np.sqrt(X**2+Y**2))
ax.plot_surface(X,Y,Z,rstride = 1,cstride = 1,cmap='rainbow')
#rstride = 1,cstride = 1表示网格密度
ax.contour(X,Y,Z,zdim='z',offset = -2 ,cmap='rainbow') #画出图的投影
ax.set_zlim(-2,2)
plt.show()
首先可以查看一下坐标系:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111,projection = '3d')
plt.show()
然后导入数据画图:
fig = plt.figure()
ax = fig.gca(projection='3d')
theta = np.linspace(-4 * np.pi, 4 * np.pi, 100)
z = np.linspace(-2, 2, 100)
r = z**2 + 1
x = r * np.sin(theta)
y = r * np.cos(theta)
ax.plot(x,y,z)
plt.show()
同理:画三维散点图只需要把2个轴改为三个轴:
np.random.seed(1)
def randrange(n,vmin,vmax):
return (vmax-vmin)*np.random.rand(n)+vmin
fig = plt.figure()
ax = fig.add_subplot(111,projection = '3d')
n = 100
for c,m,zlow,zhigh in [('r','o',-50,-25),('b','x','-30','-5')]:
xs = randrange(n,23,32)
ys = randrange(n,0,100)
zs = randrange(n,int(zlow),int(zhigh))
ax.scatter(xs,ys,zs,c=c,marker=m)
plt.show()
三维条形图:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
for c, z in zip(['r', 'g', 'b', 'y'], [30, 20, 10, 0]):
xs = np.arange(20)
ys = np.random.rand(20)
cs = [c]*len(xs)
ax.bar(xs,ys,zs = z,zdir='y',color = cs,alpha = 0.5) #zdir='y'把条竖起来
plt.show()
2、饼图
m = 51212.
f = 40742.
m_perc = m/(m+f)
f_perc = f/(m+f)
colors = ['navy','lightcoral']
labels = ["Male","Female"]
plt.figure(figsize=(8,8))
paches,texts,autotexts = plt.pie([m_perc,f_perc],labels = labels,autopct = '%1.1f%%',explode=[0,0.05],colors = colors)
#autopct = '%1.1f%%'显示百分比,explode=[0,0.05]饼图块之间的分离程度
#texts是标签,autotexts是百分比数值
for text in texts+autotexts: #修改字体大小
text.set_fontsize(20)
for text in autotexts: #修改字体颜色
text.set_color('white')
3、子图布局:
排列布局:
ax1 = plt.subplot2grid((3,3),(0,0)) #3行3列,0,0位置
ax2 = plt.subplot2grid((3,3),(1,0))
ax3 = plt.subplot2grid((3,3),(0,2),rowspan=3) #rowspan占用行数
ax4 = plt.subplot2grid((3,3),(2,0),colspan = 2) #colspan占用列数
ax5 = plt.subplot2grid((3,3),(0,1),rowspan=2)
嵌套图:
x = np.linspace(0,10,1000)
y2 = np.sin(x**2)
y1 = x**2
fig,ax1 = plt.subplots()
left,bottom,width,height = [0.22,0.45,0.3,0.35]
ax2 = fig.add_axes([left,bottom,width,height]) #原图里加坐标系
ax1.plot(x,y1)
ax2.plot(x,y2)
或者也可以用inset_axes,如在柱状图里画饼图:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
def autolabel(rects): #柱上标签
for rect in rects:
height = rect.get_height()
ax1.text(rect.get_x() + rect.get_width()/2., 1.02*height,
"{:,}".format(float(height)),
ha='center', va='bottom',fontsize=18)
top10_arrivals_countries = ['CANADA','MEXICO','UNITED\nKINGDOM',\
'JAPAN','CHINA','GERMANY','SOUTH\nKOREA',\
'FRANCE','BRAZIL','AUSTRALIA']
top10_arrivals_values = [16.625687, 15.378026, 3.934508, 2.999718,\
2.618737, 1.769498, 1.628563, 1.419409,\
1.393710, 1.136974]
arrivals_countries = ['WESTERN\nEUROPE','ASIA','SOUTH\nAMERICA',\
'OCEANIA','CARIBBEAN','MIDDLE\nEAST',\
'CENTRAL\nAMERICA','EASTERN\nEUROPE','AFRICA']
arrivals_percent = [36.9,30.4,13.8,4.4,4.0,3.6,2.9,2.6,1.5]
fig, ax1 = plt.subplots(figsize=(20,12))
tang = ax1.bar(range(10),top10_arrivals_values,color='blue')
plt.xticks(range(10),top10_arrivals_countries,fontsize=18)
ax2 = inset_axes(ax1,width = 6,height = 6,loc = 5)
explode = (0.08, 0.08, 0.05, 0.05,0.05,0.05,0.05,0.05,0.05) #图与图的缝隙
patches, texts, autotexts = ax2.pie(arrivals_percent,labels=arrivals_countries,autopct='%1.1f%%',explode=explode)
for text in texts+autotexts:
text.set_fontsize(16)
for spine in ax1.spines.values():
spine.set_visible(False)
autolabel(tang)
4、甚至可以在坐标图里画卡通图案:
闲着没事干画个
import numpy as np
from matplotlib.patches import Circle, Wedge, Polygon, Ellipse
from matplotlib.collections import PatchCollection
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
patches = []
# Full and ring sectors drawn by Wedge((x,y),r,deg1,deg2)
leftstripe = Wedge((.46, .5), .15, 90,100) # Full sector by default
midstripe = Wedge((.5,.5), .15, 85,95)
rightstripe = Wedge((.54,.5), .15, 80,90)
lefteye = Wedge((.36, .46), .06, 0, 360, width=0.03) # Ring sector drawn when width <1
righteye = Wedge((.63, .46), .06, 0, 360, width=0.03)
nose = Wedge((.5, .32), .08, 75,105, width=0.03)
mouthleft = Wedge((.44, .4), .08, 240,320, width=0.01)
mouthright = Wedge((.56, .4), .08, 220,300, width=0.01)
patches += [leftstripe,midstripe,rightstripe,lefteye,righteye,nose,mouthleft,mouthright]
# Circles
leftiris = Circle((.36,.46),0.04)
rightiris = Circle((.63,.46),0.04)
patches += [leftiris,rightiris]
# Polygons drawn by passing coordinates of vertices
leftear = Polygon([[.2,.6],[.3,.8],[.4,.64]], True)
rightear = Polygon([[.6,.64],[.7,.8],[.8,.6]], True)
topleftwhisker = Polygon([[.01,.4],[.18,.38],[.17,.42]], True)
bottomleftwhisker = Polygon([[.01,.3],[.18,.32],[.2,.28]], True)
toprightwhisker = Polygon([[.99,.41],[.82,.39],[.82,.43]], True)
bottomrightwhisker = Polygon([[.99,.31],[.82,.33],[.81,.29]], True)
patches+=[leftear,rightear,topleftwhisker,bottomleftwhisker,toprightwhisker,bottomrightwhisker]
# Ellipse drawn by Ellipse((x,y),width,height)
body = Ellipse((0.5,-0.18),0.6,0.8)
patches.append(body)
# Draw the patches
colors = 100*np.random.rand(len(patches)) # set random colors
p = PatchCollection(patches, alpha=0.4)
p.set_array(np.array(colors))
ax.add_collection(p)
# Show the figure
plt.show()
不过搞数据科学的应该不会这么闲吧。。。
5、数据框处理示例
对于一个df:
np.random.seed(0)
df = pd.DataFrame({'Condition 1': np.random.rand(20),
'Condition 2': np.random.rand(20)*0.9,
'Condition 3': np.random.rand(20)*1.1})
fig,ax = plt.subplots()
df.plot.bar(ax=ax,stacked=True) #stacked=True把不同因变量的条堆一起
plt.show()
可以算每个因变量各自所占百分比:
from matplotlib.ticker import FuncFormatter
df_ratio = df.div(df.sum(axis=1),axis=0)
fig,ax = plt.subplots()
df_ratio.plot.bar(ax=ax,stacked=True)
ax.yaxis.set_major_formatter(FuncFormatter(lambda y,_:'{:.0%}'.format(y)))
#把数值设置为百分比
plt.show()
对于一个有缺失值的数据框:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00383/risk_factors_cervical_cancer.csv'
df = pd.read_csv(url, na_values="?")
from sklearn.preprocessing import Imputer
impute = pd.DataFrame(Imputer().fit_transform(df)) #按均值填充缺失值
impute.columns = df.columns
impute.index = df.index
impute.head()