import seaborn as sns
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import random
def sinplot(filp=1):
x = np.linspace(0,14,100) # 0-14 生成100个点
for i in range(1,7):
plt.plot(x,np.sin(x + i*0.5)*(7-i)*filp)
sinplot()
组合曲线图
sns.set() # 设置风格或者主题
sinplot()
风格主题一般有
- darkgrid
- whitegrid
- dark
- white
- ticks
箱线图
np.random.seed(0) # 设置全局随机种子
sns.set_style('ticks')
# 产生一个正态分布的20*6的数组,点对点
data = np.random.normal(size = (20,6))+np.arange(6)/2
sns.boxplot(data=data)
<matplotlib.axes._subplots.AxesSubplot at 0x2598873b888>
# 去掉上和右框线
sns.boxenplot(data=data)
sns.despine(offset=10)
sns.violinplot(data)
sns.despine(offset=10) # offset调节轴线距离
sns.set_style('whitegrid')
sns.boxplot(data=data,palette='deep')
# sns.despine(left=True)
<matplotlib.axes._subplots.AxesSubplot at 0x25988609048>
sns.set_style('whitegrid')
sns.boxplot(data=data,palette='deep')
sns.despine(left=True) # 指定隐藏
## 指定多个子图
with sns.axes_style('ticks'):
plt.subplot(211)
sinplot()
plt.subplot(212)
sinplot(-1)
sns.set()
调节线条和坐标轴大小
sns.set_context('paper') # 类似的参数还有poster\notebook
plt.figure(figsize=(8,6))
sinplot()
sns.set_context('poster') # 类似的参数还有poster(要大一点)\notebook
plt.figure(figsize=(8,6))
sinplot()
sns.set_context('notebook',font_scale=5.5,rc={'lines.linewidth':10.5}) # font_size 坐标数字大小,rc是线条粗细
plt.figure(figsize=(8,6))
sinplot()
调色
调色板
- color_palette()传入颜色
- set_palette()设置图的颜色
分类调色板
current_palette = sns.color_palette()
sns.palplot(current_palette)
一共10个颜色,通过color_palette()调节个数
sns.palplot(sns.color_palette('hls',20)) # hls是颜色空间,传出20种颜色
sns.set(rc={'figure.figsize':(20,8)}) # 指定画布大小
data = np.random.normal(size=(20,20))+np.arange(20)/2
sns.boxenplot(data=data,palette=sns.color_palette('hls',20))
<matplotlib.axes._subplots.AxesSubplot at 0x2598a3da108>
控制颜色亮度和饱和度
- hls_palette()
- l = light亮度
- s = saturation饱和度
sns.palplot(sns.hls_palette(20,l=0.09,s=0.01))
# 用于一对儿的数据
sns.palplot(sns.color_palette('Paired',20)) # 一对数据颜色相近
使用XKCD颜色来命名颜色
# 在指定固定颜色的时候可以用这种方法
plt.plot([0,1],[2,3],sns.xkcd_rgb['red'],lw=4)
plt.plot([0,1],[2,4],sns.xkcd_rgb['green'],lw=3)
plt.plot([0,1],[2,5],sns.xkcd_rgb['blue'],lw=2)
[<matplotlib.lines.Line2D at 0x2598a7b7a88>]
# 或者使用这种方法指定
colors= ['windows blue','amber','purple']
sns.palplot(sns.xkcd_palette(colors))
连续调色
一般热力图使用
sns.palplot(sns.color_palette('Reds')) # 注意这里要首字母大写。是复数形式
sns.palplot(sns.color_palette('Reds_r')) # 注意这里要首字母大写。是复数形式
# _r = reverse 颠倒
色调线性变换
- cubehelix_palette()
sns.palplot(sns.color_palette('cubehelix',10)) # 这个颜色还蛮好看的
sns.palplot(sns.cubehelix_palette(10,start =-0.1,rot = - 0.75,reverse = True))
sns.palplot(sns.cubehelix_palette(10,start =-0.1,rot = - 0.75))
sns.palplot(sns.light_palette('red',reverse=True))
sns.palplot(sns.dark_palette('red'))
轮廓图或者叫等高线图
# 参数长度为300个多元正态分布,并做转置
np.random.seed(2)
x,y = np.random.multivariate_normal([0,0],[[1,-0.5],[-0.5,1]],size=300).T
pal = sns.light_palette('green',as_cmap=True)
sns.kdeplot(x,y,camp=pal)
C:\Users\kingS\anaconda3\lib\site-packages\seaborn\distributions.py:437: UserWarning: The following kwargs were not used by contour: 'camp'
cset = contour_func(xx, yy, z, n_levels, **kwargs)
<matplotlib.axes._subplots.AxesSubplot at 0x2598ad709c8>
from scipy import stats,integrate # 导入科学计算库
# 设置主题参数
sns.set(color_codes=True)
np.random.seed(sum(map(ord,'distributions')))
x = np.random.normal(size=100)
sns.distplot(x,kde=False) # kde是是否需要核密度估计的参数
<matplotlib.axes._subplots.AxesSubplot at 0x259906898c8>
sns.distplot(x,bins = 5,kde=True) # kde是是否需要核密度估计的参数
# bins 调节多少个区间
<matplotlib.axes._subplots.AxesSubplot at 0x2599066b448>
sns.distplot(x,kde=False,fit=stats.gamma) # fit拟合分布函数gamma
<matplotlib.axes._subplots.AxesSubplot at 0x25990ed1408>
np.random.normal(size=100)
array([-7.93670539e-02, 2.35623791e+00, 1.69823731e-01, -1.65983154e+00,
1.38854404e+00, -1.56926211e+00, -7.30554288e-01, -6.02577362e-01,
8.98263749e-01, 7.82067606e-01, -1.41254369e-01, -4.47245943e-01,
2.91969802e-01, 4.27114940e-01, 5.94314228e-01, 1.53917596e-01,
-1.53200567e+00, -5.69558781e-01, 7.88319084e-01, 2.82753716e-01,
-5.86497918e-01, -1.16397748e+00, 2.88947587e-01, -2.63436097e-01,
-1.50246105e+00, -1.75745611e+00, -1.36483802e+00, -7.33840531e-01,
-5.64724904e-02, 1.33945114e+00, 2.06119424e+00, 5.25737590e-01,
-1.97816956e-03, -3.53299540e-01, -3.12083544e-01, -7.51424413e-01,
5.79962214e-01, -1.76068456e-01, -9.22589067e-01, 4.57838865e-01,
4.67008945e-01, -8.33531122e-01, 2.33001609e-01, -1.61272298e+00,
1.48554200e-01, -6.34437667e-01, -2.68405188e-01, -2.83733400e-01,
-1.46665528e+00, -8.31031975e-01, 2.20334408e-01, -7.42256917e-01,
-1.03642212e+00, 1.46894035e-01, 1.83128376e+00, -7.97836688e-01,
-2.51106251e-02, -7.51063377e-01, 2.59559153e-01, 3.74713802e-01,
-1.62470969e+00, 1.25857468e+00, 1.11413325e-01, -9.66904388e-01,
5.24940304e-01, -1.04693257e+00, 1.19548873e+00, 8.69765920e-01,
-5.38073347e-01, 2.78726165e-01, 2.47195055e-01, 3.69496598e-01,
-2.80957399e-01, -1.70529391e+00, 9.04984230e-01, 1.60629597e+00,
-2.67589907e-01, 5.65918694e-01, -3.29983562e-01, -1.48961748e-02,
-1.49366521e+00, 5.76244481e-01, 1.41480402e+00, -3.64581041e-01,
4.19481870e-01, 6.01141850e-01, 9.62268465e-01, -2.15160561e-01,
8.14040853e-01, -1.20475982e+00, 5.40559848e-01, 7.75185845e-01,
1.73212150e+00, -2.17369452e-02, -1.71062699e+00, -1.28783899e+00,
9.02107169e-01, -1.00361959e+00, -8.49754217e-01, -2.09257982e+00])
np.random.normal(100) # 不加size=1000和加了之后的区别
99.1868940499386
import pandas as pd
# 参数多元分布数据
mean,cov = [0,1],[(1,0.5),(0.5,1)]
data = np.random.multivariate_normal(mean,cov,200)
df = pd.DataFrame(data,columns = ['x','y'])
df
x | y | |
---|---|---|
0 | -0.579463 | 0.414803 |
1 | 0.880828 | 2.285103 |
2 | -0.307332 | 0.653860 |
3 | 1.029192 | 1.250403 |
4 | -0.892471 | 1.026103 |
... | ... | ... |
195 | 0.249314 | 1.051238 |
196 | -1.222601 | 0.894471 |
197 | 2.329006 | 2.152045 |
198 | -0.192728 | 1.552358 |
199 | -0.325603 | 1.476723 |
200 rows × 2 columns
散点图
sns.jointplot(x = 'x',y = 'y',data=df,color = 'k')
<seaborn.axisgrid.JointGrid at 0x2599217f888>
蜂巢散点图
x,y = np.random.multivariate_normal(mean,cov,1000).T
with sns.axes_style('ticks'):
sns.jointplot(x = x,y = y,kind = 'hex',color = 'k')
两两关系图-散点图组合
iris = sns.load_dataset('iris')
sns.pairplot(iris)
<seaborn.axisgrid.PairGrid at 0x259940ad288>
sns回归分析
np.random.seed(sum(map(ord,'regression')))
tips = sns.load_dataset('tips') # 导入tips数据
tips.head(5) # 查看前五行
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
regplot()和lmplot()都可以画回归关系的图
plt.figure(figsize=(8,6))
sns.regplot(x = 'total_bill',y='tip',data=tips)
<matplotlib.axes._subplots.AxesSubplot at 0x25994d9abc8>
sns.set(rc={'figure.figsize':(8,8)}) # 设置画布大小
sns.regplot(data=tips,x='size',y='tip')
<matplotlib.axes._subplots.AxesSubplot at 0x25994b29188>
sns.regplot(x='size',y = 'tip',data=tips,x_jitter=0.05) # 给x加上随机波动为0.05
<matplotlib.axes._subplots.AxesSubplot at 0x2599539ad48>
## 类别值的可视化展示
sns.set(style='whitegrid',color_codes=True)
np.random.seed(sum(map(ord,'categorical')))
titanic = sns.load_dataset('titanic')
tips = sns.load_dataset('tips')
iris = sns.load_dataset('iris')
sns.stripplot(x='day',y='total_bill',data=tips,jitter=.1)
<matplotlib.axes._subplots.AxesSubplot at 0x2599553ea08>
圣诞树图
像圣诞树一样的,和jitter的功能相同
sns.swarmplot(x='day',y='total_bill',data=tips)
<matplotlib.axes._subplots.AxesSubplot at 0x2599558ca08>
sns.swarmplot(x='day',y='total_bill',hue='sex',data=tips) # 加一个指标
<matplotlib.axes._subplots.AxesSubplot at 0x259955e5188>
盒图
- IQR统计学概念四分位距
- N = 1.5IQR,若果一个值>Q3+N或者 < Q1-N则为离群点
sns.boxplot(x='day',y='total_bill',hue='time',data=tips)
#,rc={'lines.linewidth':10.5}
<matplotlib.axes._subplots.AxesSubplot at 0x25996ad2e48>
sns.violinplot(x='total_bill',y='day',hue='time',data=tips)
<matplotlib.axes._subplots.AxesSubplot at 0x25996a13f48>
sns.violinplot(x='day',y='total_bill',hue='sex',data=tips,split=True)
<matplotlib.axes._subplots.AxesSubplot at 0x25996b65288>
sns.violinplot(x='day',y='total_bill',data=tips)
sns.swarmplot(x='day',y='total_bill',data=tips,color='w',alpha=0.5)
<matplotlib.axes._subplots.AxesSubplot at 0x25996e4eec8>
条形图
表示集中趋势
sns.barplot(x='sex',y='survived',hue='class',data=titanic)
<matplotlib.axes._subplots.AxesSubplot at 0x25996d325c8>
点图
点图可以更好地描述差异性
sns.pointplot(x='sex',y='survived',hue='class',data=titanic)
<matplotlib.axes._subplots.AxesSubplot at 0x25996db6c48>
点图的优化
- palette = {}颜色设置
- markers = []数据点样式
- linestyles=[]线条样式
sns.pointplot(x='class',y='survived',hue='sex',data=titanic,
palette={'male':"g",'female':'m'},
markers=['.','o'],
linestyles=['-','--'])
<matplotlib.axes._subplots.AxesSubplot at 0x2599725b048>
sns.boxplot(data=iris,orient='v')# orient调整横或竖,'h'=横向,'v'=竖
<matplotlib.axes._subplots.AxesSubplot at 0x2599833a848>
多层面板分类
sns.factorplot(x='day',y='total_bill',hue='smoker',data=tips,kind='bar')
C:\Users\kingS\anaconda3\lib\site-packages\seaborn\categorical.py:3669: UserWarning: The `factorplot` function has been renamed to `catplot`. The original name will be removed in a future release. Please update your code. Note that the default `kind` in `factorplot` (`'point'`) has changed `'strip'` in `catplot`.
warnings.warn(msg)
<seaborn.axisgrid.FacetGrid at 0x25998352788>
sns.factorplot(x='day',y='total_bill',hue='smoker',col='time',data=tips,kind='swarm')
<seaborn.axisgrid.FacetGrid at 0x259985107c8>
sns.factorplot(x='day',y='total_bill',hue='smoker',col='day',data=tips,kind='box',size=4,aspect=0.5)
C:\Users\kingS\anaconda3\lib\site-packages\seaborn\categorical.py:3675: UserWarning: The `size` parameter has been renamed to `height`; please update your code.
warnings.warn(msg, UserWarning)
<seaborn.axisgrid.FacetGrid at 0x25998548488>
tips
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
... | ... | ... | ... | ... | ... | ... | ... |
239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 |
240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 |
241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 |
242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 |
243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 |
244 rows × 7 columns
g = sns.FacetGrid(tips,col='time')
g.map(plt.hist,'tip')
<seaborn.axisgrid.FacetGrid at 0x25998628b08>
g = sns.FacetGrid(tips,col='sex',hue='smoker')
g.map(plt.scatter,'total_bill','tip',alpha=0.5)
g.add_legend() # smoker的标签
<seaborn.axisgrid.FacetGrid at 0x259998e9948>
g = sns.FacetGrid(tips,row='smoker',col='time',margin_titles=True)
g.map(sns.regplot,'size','total_bill',color='0.1',fit_reg=True,x_jitter=0.1)
<seaborn.axisgrid.FacetGrid at 0x25999ddd188>
g = sns.FacetGrid(tips,col='day',size=4,aspect=0.5)
g.map(sns.barplot,'sex','total_bill')
C:\Users\kingS\anaconda3\lib\site-packages\seaborn\axisgrid.py:243: UserWarning: The `size` parameter has been renamed to `height`; please update your code.
warnings.warn(msg, UserWarning)
C:\Users\kingS\anaconda3\lib\site-packages\seaborn\axisgrid.py:728: UserWarning: Using the barplot function without specifying `order` is likely to produce an incorrect plot.
warnings.warn(warning)
<seaborn.axisgrid.FacetGrid at 0x25999f2dcc8>
# 导入pandas的类别方法
from pandas import Categorical
ordered_days = tips.day.value_counts().index
print(ordered_days)
ordered_days = Categorical(['Thur','Fri','Sat','Sun'])
g = sns.FacetGrid(tips,row='day',row_order=ordered_days,size = 2,aspect = 4)
g.map(sns.boxplot,'total_bill')
CategoricalIndex(['Sat', 'Sun', 'Thur', 'Fri'], categories=['Thur', 'Fri', 'Sat', 'Sun'], ordered=False, dtype='category')
C:\Users\kingS\anaconda3\lib\site-packages\seaborn\axisgrid.py:243: UserWarning: The `size` parameter has been renamed to `height`; please update your code.
warnings.warn(msg, UserWarning)
C:\Users\kingS\anaconda3\lib\site-packages\seaborn\axisgrid.py:728: UserWarning: Using the boxplot function without specifying `order` is likely to produce an incorrect plot.
warnings.warn(warning)
<seaborn.axisgrid.FacetGrid at 0x259a45378c8>
pal = dict(Lunch = 'seagreen',Dinner='gray')
g = sns.FacetGrid(tips,hue='time',palette=pal,size=5,hue_kws={'marker':['*','v']})
g.map(plt.scatter,'total_bill','tip',s=50,alpha=0.7,linewidth=0.5,edgecolor='w') # s表示圆圈的大小
g.add_legend()
<seaborn.axisgrid.FacetGrid at 0x259a4d40308>
with sns.axes_style('white'):
g = sns.FacetGrid(tips,row='sex',col='smoker',margin_titles=True,size=2.5)
g.map(plt.scatter,'total_bill','tip',color='#ff3384',edgecolor='w',lw=0.5)
g.set_axis_labels('Total bill ($)','Tip')
g.set(xticks=[10,30,50],yticks=[2,6,10]) # 调节轴的长度
g.fig.subplots_adjust(wspace=0.02,hspace=0.02) # 调节子图间隔
iris = sns.load_dataset('iris')
g = sns.PairGrid(iris)
g.map(plt.scatter)
<seaborn.axisgrid.PairGrid at 0x259a5290e08>
g = sns.PairGrid(iris)
g.map_diag(plt.hist)
g.map_offdiag(plt.scatter)
<seaborn.axisgrid.PairGrid at 0x259a26099c8>
g = sns.PairGrid(iris,hue = 'species') # 添加区分的类别
g.map_diag(plt.hist)
g.map_offdiag(plt.scatter)
g.add_legend()
<seaborn.axisgrid.PairGrid at 0x259a6e5b688>
g = sns.PairGrid(iris,vars=['sepal_length','sepal_width'],hue='species')
g.map(plt.scatter) # 指定变量画图
<seaborn.axisgrid.PairGrid at 0x259a73e8608>
g = sns.PairGrid(tips,hue='size',palette='GnBu_d')
g.map(plt.scatter,s=50,edgecolor='w')
g.add_legend()
<seaborn.axisgrid.PairGrid at 0x259a7ff8bc8>
热力图
sns.set()
np.random.seed(20)
uniform_data = np.random.normal(size=(3,3))
print(uniform_data)
heatmap = sns.heatmap(uniform_data)
[[ 0.88389311 0.19586502 0.35753652]
[-2.34326191 -1.08483259 0.55969629]
[ 0.93946935 -0.97848104 0.50309684]]
np.random.seed(20)
uniform_data = np.random.normal(size=(3,3))
print(uniform_data)
heatmap = sns.heatmap(uniform_data,vmin=0.2,vmax=0.5,center=0)
# 调整调色板的区间 vmin and vmax
# 调整调色板的中心 center
[[ 0.88389311 0.19586502 0.35753652]
[-2.34326191 -1.08483259 0.55969629]
[ 0.93946935 -0.97848104 0.50309684]]
flights = sns.load_dataset('flights')
flights
year | month | passengers | |
---|---|---|---|
0 | 1949 | January | 112 |
1 | 1949 | February | 118 |
2 | 1949 | March | 132 |
3 | 1949 | April | 129 |
4 | 1949 | May | 121 |
... | ... | ... | ... |
139 | 1960 | August | 606 |
140 | 1960 | September | 508 |
141 | 1960 | October | 461 |
142 | 1960 | November | 390 |
143 | 1960 | December | 432 |
144 rows × 3 columns
flights = sns.load_dataset('flights')
flights = flights.pivot('month', 'year', 'passengers')
print(flights)
# 读到表格中
#flight.to_csv("res.csv",sep=',',encoding='gbk')
sns.set(rc={'figure.figsize':(8,8)})
ax = sns.heatmap(flights)
year 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 \
month
January 112 115 145 171 196 204 242 284 315 340 360
February 118 126 150 180 196 188 233 277 301 318 342
March 132 141 178 193 236 235 267 317 356 362 406
April 129 135 163 181 235 227 269 313 348 348 396
May 121 125 172 183 229 234 270 318 355 363 420
June 135 149 178 218 243 264 315 374 422 435 472
July 148 170 199 230 264 302 364 413 465 491 548
August 148 170 199 242 272 293 347 405 467 505 559
September 136 158 184 209 237 259 312 355 404 404 463
October 119 133 162 191 211 229 274 306 347 359 407
November 104 114 146 172 180 203 237 271 305 310 362
December 118 140 166 194 201 229 278 306 336 337 405
year 1960
month
January 417
February 391
March 419
April 461
May 472
June 535
July 622
August 606
September 508
October 461
November 390
December 432
flights = sns.load_dataset('flights')
# 取出这三个属性画热力图,坐标点的位置是passengers
flights = flights.pivot('month', 'year', 'passengers')
ax = sns.heatmap(flights, annot=False, fmt='d',linewidths = 0.05,cmap='PuRd') #实际的数值注解在图上
# annot 打开数据标签。fmt是调节格式整型,默认是科学计数法的格式
# linewidths 调节间隔
# cmap 调色板
cmap调色板参数请参照!
https://blog.csdn.net/qq_38048756/article/details/118724555