数据可视化

#绘图和可视化
import matplotlib.pyplot as plt
import numpy as np
data = np.arange(10)
data
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
plt.plot(data)
[<matplotlib.lines.Line2D at 0x187c0ce60f0>]

在这里插入图片描述

fig = plt.figure()
ax1 = fig.add_subplot(221)
ax2 = fig.add_subplot(222)
ax3 = fig.add_subplot(223)
<IPython.core.display.Javascript object>
plt.plot(np.random.randn(50).cumsum(),'k--')
[<matplotlib.lines.Line2D at 0x187c22704a8>]
ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)
(array([ 1.,  3.,  2.,  3.,  2.,  8.,  1.,  4., 11., 11.,  7., 13., 10.,
         3.,  6.,  2.,  7.,  3.,  1.,  2.]),
 array([-2.62147564, -2.3737514 , -2.12602716, -1.87830293, -1.63057869,
        -1.38285445, -1.13513022, -0.88740598, -0.63968174, -0.39195751,
        -0.14423327,  0.10349097,  0.3512152 ,  0.59893944,  0.84666368,
         1.09438791,  1.34211215,  1.58983639,  1.83756062,  2.08528486,
         2.3330091 ]),
 <a list of 20 Patch objects>)
 ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))
<matplotlib.collections.PathCollection at 0x187c22789b0>
fig,axes = plt.subplots(3,3)
axes
<IPython.core.display.Javascript object>
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x00000187C7249B00>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x00000187C770BB00>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x00000187C773A1D0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x00000187C775F860>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x00000187C7786F28>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x00000187C77B65C0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x00000187C77DEC50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x00000187C780F320>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x00000187C78369B0>]],
      dtype=object)
#调整subplot周围的间距
#subplot_adjust(left=None,bottom=None,top=None,wspace=None,hspace=None)
fig,axex = plt.subplots(2,2,sharex=True,sharey=True)
for i in range(2):
    for j in range(2):
        axex[i,j].hist(np.random.randn(500),bins=50,color='k',alpha=0.5)
plt.subplots_adjust(wspace=0,hspace=0) #0间距
<IPython.core.display.Javascript object>
#颜色,标记和线型
plt.plot(np.random.randn(30).cumsum(),'k--')
<IPython.core.display.Javascript object>
[<matplotlib.lines.Line2D at 0x187c2549a20>]
plt.plot(np.random.randn(30).cumsum(), color='r', linestyle='dashed', marker='o')
<IPython.core.display.Javascript object>
[<matplotlib.lines.Line2D at 0x187ca07f390>]
data = np.random.randn(30).cumsum()

plt.plot(data,'k-',drawstyle='steps-post',label='steps-post')
<IPython.core.display.Javascript object>
[<matplotlib.lines.Line2D at 0x187caf1bc50>]
plt.plot(data,'k--',label='Default')
[<matplotlib.lines.Line2D at 0x187cb1e17f0>]
plt.legend(loc='best')    #标签
<matplotlib.legend.Legend at 0x187cb1edda0>
#刻度,标签和图例
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xticks([0,250,500,750,1000])#设置刻度
ax.set_title("My first matplotlib plot")
ax.set_xlabel('Stages')
ax.plot(np.random.randn(1000).cumsum())
[<matplotlib.lines.Line2D at 0x2af31855470>]

在这里插入图片描述

#利用label添加图例
from numpy.random import randn
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(randn(1000).cumsum(),'k',label='one')
ax.plot(randn(1000).cumsum(),'k--',label='two')
ax.plot(randn(1000).cumsum(),'k.',label='three')
ax.legend(loc='best')
<matplotlib.legend.Legend at 0x2af319c55c0>

png

#注解以及在Subplot上绘图
#ax.annotate方法可以在指定的x和y坐标轴绘制标签
from datetime import datetime
import pandas as pd
fig = plt.figure()
ax = fig.add_subplot(111)
data = pd.read_csv('examples/spx.csv',index_col=0,parse_dates=True)
spx = data['SPX']
spx.plot(ax=ax,style='k-')
crisis_data = [
    (datetime(2007,10,11),'Peak of bull market'),
    (datetime(2008,3,12),'Bear Stearns Fails'),
    (datetime(2008,9,15),'Lehman Bankruptcy')
]
for date,label in crisis_data:#添加注解
    ax.annotate(label, xy=(date, spx.asof(date) + 75),
                xytext=(date, spx.asof(date) + 225),
                arrowprops=dict(facecolor='black', headwidth=4, width=2,
                                headlength=4),
                horizontalalignment='left', verticalalignment='top')
#设置起始和结束边界    
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])

ax.set_title('Important dates in the 2008-2009 financial crisis')
Text(0.5,1,'Important dates in the 2008-2009 financial crisis')

在这里插入图片描述

#绘制图形
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
                   color='g', alpha=0.5)

ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

<matplotlib.patches.Polygon at 0x2af32aff0b8>

png

#保存图片到本地
#plt.savefig('figpath.png', dpi=400, bbox_inches='tight')
#rc配置图片属性
#一,matplotlib简介
plt.plot([1,2,3],[5,7,4])
plt.show()

在这里插入图片描述

#二,图例,标题和标签
x = [1,2,3]
y = [5,7,4]

x2 = [1,2,3]
y2 = [10,14,12]

plt.plot(x,y,label='First Line')#为线指明名称
plt.plot(x2,y2,label='Second Line')
plt.xlabel('Plot Number')#为坐标轴命名
plt.ylabel('Important var')
plt.title('Interting Graph\nCheck it out')#为图命名
plt.legend()#显示
plt.show()

在这里插入图片描述

#三,条形图和直方图
#条形图bar
#直方图hist
plt.bar([1,3,5,7,9],[5,2,7,8,2],label='Example one')
plt.bar([2,4,6,8,10],[8,6,2,5,6],label='Example two',color='g')
plt.legend()
plt.xlabel('bar number')
plt.ylabel('bar height')
plt.title('Epic Graph\nAnother Line!')
plt.show()

在这里插入图片描述

population_ages = [22,55,62,45,21,22,34,42,42,4,99,102,110,120,121,122,130,111,115,112,80,75,65,54,44,43,42,48]
bins = [0,10,20,30,40,50,60,70,80,90,100,110,120,130]

plt.hist(population_ages,bins,histtype = 'bar',rwidth=0.8)

plt.xlabel('x')
plt.ylabel('y')
plt.title("Interesting Graph\nCheck it out")
plt.legend()
plt.show()

在这里插入图片描述

#四,散点图
#散点图scatter
x = [1,2,3,4,5,6,7,8]
y = [5,2,4,2,1,4,5,2]
plt.scatter(x,y,label='skitscat',color='k',s=25,marker='o')

plt.xlabel('x')
plt.ylabel('y')
plt.title('Interesting Graph\nCheck it out')
plt.legend()
plt.show()

在这里插入图片描述

#五,堆叠图
#堆叠图stackplot
#堆叠图用于显示『部分对整体』随时间的关系
days = [1,2,3,4,5]

sleeping = [7,8,6,11,7]
eating =   [2,3,4,3,2]
working =  [7,8,7,2,2]
playing =  [8,5,7,8,13]

plt.stackplot(days,sleeping,eating,working,playing,colors=['m','c','r','k'])
plt.xlabel('x')
plt.ylabel('y')
plt.title('Interesting Graph\nCheck it out')
plt.show()

在这里插入图片描述

#为不同数据添加标签
days = [1,2,3,4,5]

sleeping = [7,8,6,11,7]
eating =   [2,3,4,3,2]
working =  [7,8,7,2,2]
playing =  [8,5,7,8,13]

plt.plot([],[],color='m',label='Sleeping',linewidth=5)
plt.plot([],[],color='c', label='Eating', linewidth=5)
plt.plot([],[],color='r', label='Working', linewidth=5)
plt.plot([],[],color='k', label='Playing', linewidth=5)

plt.stackplot(days,sleeping,eating,working,playing,colors=['m','c','r','k'])
plt.xlabel('x')
plt.ylabel('y')
plt.title('Interesting Graph\nCheck it out')
plt.legend()
plt.show()

在这里插入图片描述

#六,饼图
#饼图pie
slices = [7,2,2,13]
activities = ['sleeping','eating','working','playing']
cols=['c','m','r','b']
plt.pie(slices,labels=activities,colors=cols,startangle=90,shadow=True,explode=(0,0.1,0,0),autopct='%1.1f%%')
#startangle:起始角度;shawdow:阴影;explode:拉出一个切片;autopct:显示百分比
plt.title('Interesting Graph\nCheck it out')
plt.show()

在这里插入图片描述

#七,从文件中加载数据
#从网络加载数据
#时间戳的转换
#3D绘图
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import style
style.use('ggplot')

fig = plt.figure()
ax1 = fig.add_subplot(111, projection='3d')

x3 = [1,2,3,4,5,6,7,8,9,10]
y3 = [5,6,7,8,2,5,6,3,7,2]
z3 = np.zeros(10)

dx = np.ones(10)
dy = np.ones(10)
dz = [1,2,3,4,5,6,7,8,9,10]

ax1.bar3d(x3, y3, z3, dx, dy, dz)


ax1.set_xlabel('x axis')
ax1.set_ylabel('y axis')
ax1.set_zlabel('z axis')

plt.show()

在这里插入图片描述

from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
from matplotlib import style

style.use('ggplot')

fig = plt.figure()
ax1 = fig.add_subplot(111, projection='3d')

x = [1,2,3,4,5,6,7,8,9,10]
y = [5,6,7,8,2,5,6,3,7,2]
z = [1,2,6,3,2,7,3,3,7,2]

x2 = [-1,-2,-3,-4,-5,-6,-7,-8,-9,-10]
y2 = [-5,-6,-7,-8,-2,-5,-6,-3,-7,-2]
z2 = [1,2,6,3,2,7,3,3,7,2]

ax1.scatter(x, y, z, c='g', marker='o')
ax1.scatter(x2, y2, z2, c ='r', marker='o')

ax1.set_xlabel('x axis')
ax1.set_ylabel('y axis')
ax1.set_zlabel('z axis')

plt.show()

在这里插入图片描述

from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import style
style.use('ggplot')

fig = plt.figure()
ax1 = fig.add_subplot(111, projection='3d')

x, y, z = axes3d.get_test_data()

print(axes3d.__file__)
ax1.plot_wireframe(x,y,z, rstride = 3, cstride = 3)

ax1.set_xlabel('x axis')
ax1.set_ylabel('y axis')
ax1.set_zlabel('z axis')

plt.show()

C:\Anaconda\lib\site-packages\mpl_toolkits\mplot3d\axes3d.py

在这里插入图片描述

from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plot
import numpy as np
from matplotlib import style

x = np.linspace(-10, 10, 101)
y = x
x, y = np.meshgrid(x, y)
z = x ** 2 + y ** 2
ax = plot.subplot(111, projection='3d')
ax.plot_wireframe(x, y, z)
plot.show()


在这里插入图片描述

t = np.linspace(0, np.pi * 2, 100)
s = np.linspace(0, np.pi, 100)
t, s = np.meshgrid(t, s)
x = np.cos(t) * np.sin(s)
y = np.sin(t) * np.sin(s)
z = np.cos(s)
ax = plot.subplot(111, projection='3d')
ax.plot_wireframe(x, y, z)
plot.show()

在这里插入图片描述

#使用pandas和seaborn绘图
#线型图
import pandas as pd
s = pd.Series(np.random.randn(10).cumsum(),index=np.arange(0,100,10))
s.plot() #索引为x轴
<matplotlib.axes._subplots.AxesSubplot at 0x1911c648eb8>

在这里插入图片描述

df = pd.DataFrame(np.random.rand(10,4).cumsum(0),
                 columns=['A','B','C','D'],
                 index = np.arange(0,100,10))
df
ABCD
00.5971040.5893930.7357770.767290
100.7966630.8697991.5120951.604585
201.0408851.3464731.7741272.543832
301.3625101.3622422.6730322.946192
401.5815082.1740473.0904303.386457
502.2563052.6299833.3728894.060371
602.3854613.1302214.2223644.311171
702.8964833.5951634.2601494.468304
803.0995164.3358025.2079535.096282
903.6809954.7798415.8653325.155942
df.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1911dcc1400>

在这里插入图片描述

#柱形图
fig,axes = plt.subplots(2,1)
data = pd.Series(np.random.rand(16),index=list('abcdefghijklmnop'))
data.plot.bar(ax=axes[0],color='k',alpha=0.7) #x轴为基准
data.plot.barh(ax=axes[1],color='k',alpha=0.7) #y轴为基准
<matplotlib.axes._subplots.AxesSubplot at 0x1911d7e25c0>

在这里插入图片描述

#堆积柱状图
data = pd.DataFrame(np.random.rand(6,4),
                   index=['one','two','three','four','five','six'],
                   columns=pd.Index(list('ABCD'),name='Genus'))
data
GenusABCD
one0.5948530.1895210.1303110.998033
two0.2474270.2213260.6630430.141174
three0.3984590.2427880.1120280.985286
four0.3650950.1673540.5455880.185118
five0.0248610.3890380.2772690.950699
six0.7081340.8981260.2595650.452563
data.plot.bar()
<matplotlib.axes._subplots.AxesSubplot at 0x1911dcc1b00>

在这里插入图片描述

#堆积柱状图
df.plot.barh(stacked=True, alpha=0.5)
<matplotlib.axes._subplots.AxesSubplot at 0x1911def7898>

在这里插入图片描述

tips = pd.read_csv('examples/tips.csv')
party_counts = pd.crosstab(tips['day'],tips['size'])
party_counts
size123456
day
Fri1161100
Sat253181310
Sun039151831
Thur1484513
party_counts = party_counts.loc[:,2:5]
party_pcts = party_counts.div(party_counts.sum(1),axis =0)
party_pcts
size2345
day
Fri0.8888890.0555560.0555560.000000
Sat0.6235290.2117650.1529410.011765
Sun0.5200000.2000000.2400000.040000
Thur0.8275860.0689660.0862070.017241
party_pcts.plot.bar()
<matplotlib.axes._subplots.AxesSubplot at 0x1911e03da58>

在这里插入图片描述

#使用seaborn
import seaborn as sns
tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip']) #小费比例
tips.head()

total_billtipsmokerdaytimesizetip_pct
016.991.01NoSunDinner20.063204
110.341.66NoSunDinner30.191244
221.013.50NoSunDinner30.199886
323.683.31NoSunDinner20.162494
424.593.61NoSunDinner40.172069
sns.barplot(x='tip_pct',y='day',data=tips,orient='h')
C:\Anaconda\lib\site-packages\scipy\stats\stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval





<matplotlib.axes._subplots.AxesSubplot at 0x1911ee03be0>

在这里插入图片描述

sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')
C:\Anaconda\lib\site-packages\scipy\stats\stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval





<matplotlib.axes._subplots.AxesSubplot at 0x1911ee621d0>

在这里插入图片描述

#直方图和密度图
tips['tip_pct'].plot.hist(bins=50)
<matplotlib.axes._subplots.AxesSubplot at 0x1911eed9400>

在这里插入图片描述

#密度图
tips['tip_pct'].plot.density()
<matplotlib.axes._subplots.AxesSubplot at 0x1912003f748>

在这里插入图片描述

#直方图和密度估计图
comp1 = np.random.normal(0,1,size=200)

comp2 = np.random.normal(10,2,size=200)

values = pd.Series(np.concatenate([comp1,comp2]))
sns.distplot(values,bins=100,color='k')
C:\Anaconda\lib\site-packages\scipy\stats\stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval





<matplotlib.axes._subplots.AxesSubplot at 0x191202d5710>

在这里插入图片描述

#散点图或点图
macro = pd.read_csv('examples/macrodata.csv')
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
trans_data = np.log(data).diff().dropna()#计算对数差
trans_data[-5:]
cpim1tbilrateunemp
198-0.0079040.045361-0.3968810.105361
199-0.0219790.066753-2.2772670.139762
2000.0023400.0102860.6061360.160343
2010.0084190.037461-0.2006710.127339
2020.0088940.012202-0.4054650.042560
sns.regplot('m1','unemp',data=trans_data)
plt.title('changes in log %s versus log %s'%('m1','unemp'))

C:\Anaconda\lib\site-packages\scipy\stats\stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval





Text(0.5,1,'changes in log m1 versus log unemp')

在这里插入图片描述

#散点图矩阵
sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha': 0.2})

C:\Anaconda\lib\site-packages\scipy\stats\stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval





<seaborn.axisgrid.PairGrid at 0x191203fc2b0>

在这里插入图片描述

#分面网格图 添加额外分组维度
sns.factorplot(x='day', y='tip_pct', hue='time', col='smoker',
                 kind='bar', data=tips[tips.tip_pct < 1])

C:\Anaconda\lib\site-packages\seaborn\categorical.py:3666: UserWarning: The `factorplot` function has been renamed to `catplot`. The original name will be removed in a future release. Please update your code. Note that the default `kind` in `factorplot` (`'point'`) has changed `'strip'` in `catplot`.
  warnings.warn(msg)
C:\Anaconda\lib\site-packages\scipy\stats\stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval





<seaborn.axisgrid.FacetGrid at 0x191209ed6d8>

在这里插入图片描述

#箱型图
sns.factorplot(x='tip_pct', y='day', kind='box',
                 data=tips[tips.tip_pct < 0.5])
C:\Anaconda\lib\site-packages\seaborn\categorical.py:3666: UserWarning: The `factorplot` function has been renamed to `catplot`. The original name will be removed in a future release. Please update your code. Note that the default `kind` in `factorplot` (`'point'`) has changed `'strip'` in `catplot`.
  warnings.warn(msg)





<seaborn.axisgrid.FacetGrid at 0x19120bf3710>

在这里插入图片描述


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值