数据可视化
线图:
weight = np.linspace(0, 100, 50)
height = np.sin(weight)
plt.plot(weight, height, linestyle='-', color='red', marker='o')
plt.show()
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
ts=pd.Series(np.random.randn(1000),index=pd.date_range('20000101',periods=1000))
ats=ts.cumsum()
ats.describe()
Out[8]:
count 1000.000000
mean 24.155624
std 16.227862
min -5.519669
25% 8.058423
50% 24.760727
75% 38.461114
max 54.866714
dtype: float64
ats.plot()
Out[9]: <matplotlib.axes._subplots.AxesSubplot at 0x4cd129d828>
plt.show()
ts.plot(title='cumsum',style='r-',figsize=(8,6))
df=pd.DataFrame(np.random.randn(1000,4),index=ats.index,columns=list('ABCD'))
df=df.cumsum()
df.describe()
Out[16]:
A B C D
count 1000.000000 1000.000000 1000.000000 1000.000000
mean -5.286443 -14.942259 24.056395 -17.585319
std 7.552125 10.697173 9.433380 11.237561
min -26.149702 -32.121641 -1.869861 -50.323512
25% -8.743886 -24.111616 18.178664 -21.977234
50% -4.435576 -17.054723 24.370001 -16.041080
75% 0.068311 -4.981979 30.465245 -9.708967
max 8.670913 6.512569 45.166084 1.463544
df.plot()
Out[17]: <matplotlib.axes._subplots.AxesSubplot at 0x4cd3fdf710>
plt.show()
df.plot(subplots=True,figsize=(6,12),sharey=True)
Out[19]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x0000004CD407D6D8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0000004CD45D84A8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0000004CD41DB780>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0000004CD4201A58>],
dtype=object)
plt.show()
df.describe()
Out[24]:
A B C D ID
count 1000.000000 1000.000000 1000.000000 1000.000000 1000.000000
mean -5.286443 -14.942259 24.056395 -17.585319 499.500000
std 7.552125 10.697173 9.433380 11.237561 288.819436
min -26.149702 -32.121641 -1.869861 -50.323512 0.000000
25% -8.743886 -24.111616 18.178664 -21.977234 249.750000
50% -4.435576 -17.054723 24.370001 -16.041080 499.500000
75% 0.068311 -4.981979 30.465245 -9.708967 749.250000
max 8.670913 6.512569 45.166084 1.463544 999.000000
df.plot(x='ID',y=['A','C'])
Out[25]: <matplotlib.axes._subplots.AxesSubplot at 0x4cd44c46d8>
plt.show()
柱状图:
x = np.arange(5)
height = [20, 10, 15, 54, 23]
plt.bar(x, height, width=0.5, color='red')
plt.show()
x = np.arange(5)
y = [20, 10, 15, 54, 23]
y2 = [10, 5, 20, 60, 30]
bar_width = 0.3
plt.bar(x, y, bar_width, color='b')
plt.bar(x + bar_width, y2, bar_width, color='r')
plt.show()
x = np.arange(5)
y = [20, 10, 15, 54, 23]
y2 = [10, 5, 20, 60, 30]
bar_width = 0.3
plt.bar(x, y, bar_width, color='b')
plt.bar(x, y2, bar_width, color='r', bottom=y)
plt.show()
df=pd.DataFrame(np.random.randn(10,4),columns=list('ABCD'))
df
Out[28]:
A B C D
0 -1.000899 -1.459471 -0.951918 0.476966
1 -0.250823 0.357176 0.354575 2.201217
2 -0.472851 0.970697 -1.199046 1.653239
3 -0.738507 1.166673 1.029219 -1.603170
4 0.404418 -0.577106 -0.783924 0.942183
5 -1.494829 1.739991 1.826403 -0.510799
6 -0.685105 -1.589427 0.507934 0.463677
7 0.128637 0.505620 -1.981238 1.248609
8 1.422470 0.426650 -1.366355 -2.374405
9 -1.648767 -0.177603 -1.077124 0.130565
df.iloc[0]
Out[30]:
A -1.000899
B -1.459471
C -0.951918
D 0.476966
Name: 0, dtype: float64
df.iloc[0].plot(kind='bar')
Out[31]: <matplotlib.axes._subplots.AxesSubplot at 0x4cd43cf048>
plt.show()
df.plot.bar(stacked=True)
Out[35]: <matplotlib.axes._subplots.AxesSubplot at 0x4cd4bea438>
plt.show()
df.plot.barh(stacked=True)
Out[37]: <matplotlib.axes._subplots.AxesSubplot at 0x4cd5db5c50>
plt.show()
直方图hist:
from matplotlib import pyplot as plt
import numpy as np
mu = 100
sigma = 20
x = mu + sigma * np.random.randn(2000)
plt.hist(x, bins=100, color='green', normed=True)
plt.show()
x = np.random.randn(1000) + 2
y = np.random.randn(1000) + 3
plt.hist2d(x, y, bins=40)
plt.show()
df=pd.DataFrame({'a':np.random.randn(1000)+1,
'b':np.random.randn(1000),
'c':np.random.randn(1000)-1},columns=list('abc'))
df['a'].hist()
Out[43]: <matplotlib.axes._subplots.AxesSubplot at 0x4cd4ba6978>
plt.show()
df['a'].hist(bins=20)
Out[45]: <matplotlib.axes._subplots.AxesSubplot at 0x4cd6169da0>
plt.show()
df.plot.hist(subplots=True,sharex=True,sharey=True)
Out[47]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x0000004CD66E1DD8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0000004CD6718AC8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x0000004CD6455CC0>],
dtype=object)
plt.show()
概率密度图:
df.plot.kde()
Out[49]: <matplotlib.axes._subplots.AxesSubplot at 0x4cd653e278>
plt.show()
散点图:
df
Out[52]:
a b c d
0 0.806606 0.146134 0.267457 0.528070
1 0.416368 0.600725 0.587314 0.919670
2 0.991003 0.541265 0.861808 0.753876
3 0.317595 0.531358 0.680400 0.857542
4 0.808783 0.986448 0.860283 0.586517
5 0.908003 0.491612 0.078318 0.722666
6 0.972409 0.134909 0.097019 0.174766
7 0.282438 0.206912 0.837568 0.452598
8 0.372139 0.948273 0.412657 0.465505
9 0.574312 0.472374 0.379385 0.989896
df.plot.scatter(x='a',y='b')
Out[53]: <matplotlib.axes._subplots.AxesSubplot at 0x4cd652c048>
plt.show()
height = np.random.randn(1000)
weight = np.random.randn(1000)
plt.scatter(weight, height, s=100, c='r', marker='o', alpha=0.5)
plt.show()
饼图:
labels = 'A', 'B', 'C', 'D'
fracs = [15, 30, 45, 10]
exploed = [0, 0.05, 0.08, 0]
plt.pie(labels=labels, x=fracs, autopct='%.0f%%', explode=exploed, shadow=True)
plt.show()
s=pd.Series(3*np.random.rand(4),index=list('abcd'),name='series')
s
Out[56]:
a 2.033608
b 2.959749
c 2.153261
d 1.090337
Name: series, dtype: float64
s.plot.pie(figsize=(6,6),labels=['AA','BB','CC','DD'],autopct='%0.2f',fontsize=12,colors=list('rgbc'))
Out[57]: <matplotlib.axes._subplots.AxesSubplot at 0x4cd7750550>
plt.show()
箱型图:
data = np.random.normal(loc=0, scale=1, size=1000)
plt.boxplot(data, sym='o', whis=1.2)
plt.show()
data = np.random.normal(loc=0, scale=1, size=(1000, 4))
label = list('ABCD')
plt.boxplot(data, sym='o', whis=1.2, labels=label)
plt.show()
样式字符串:
x = [1, 2, 3]
y = [3, 2, 1]
# 颜色,点型,线型
plt.plot(x, y, 'cx--')
plt.show()
pylab类似matlab(不推荐):
from pylab import *
x = [1, 2, 3]
y = [3, 2, 1]
plot(x, y, 'cx--')
title('pylab')
show()