目录
1.绘制单变量分布
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
#绘制单变量分布
np.random.seed(0) # 确定随机数生成器的种子,如果不使用每次生成图形不一样
arr = np.random.randn(100) # 生成随机数组
print(arr)
ax = sns.distplot(arr, bins=10,hist=True,kde=True,rug=True) # 绘制直方图
plt.show()#必须写,否则报错
2.绘制双变量分布
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# 创建DataFrame对象
dataframe_obj = pd.DataFrame({"x": np.random.randn(500),"y": np.random.randn(500)})
# 绘制散点图
# sns.jointplot(x="x", y="y", data=dataframe_obj,kind="scatter") #散点图
# sns.jointplot(x="x", y="y", data=dataframe_obj,kind="hex") #直方图
sns.jointplot(x="x", y="y", data=dataframe_obj,kind="kde") #核密度估计图形
plt.show()
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
#绘制成对的双变量分布
score = np.random.randint(40, 100, (10, 5))
subjects = ["语文", "数学", "英语", "政治", "体育"]
stu=["同学"+str(i) for i in range(score.shape[0])]
data=pd.DataFrame(score,columns=subjects,index=stu)
print(data)
sns.pairplot(data)
plt.show()
3.分类散点图
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
tips=pd.read_csv("F:/数学建模/机器学习/seaborn-data/tips.csv")
sns.stripplot(x="day",y="total_bill",data=tips,hue="time")
#sns.swarmplot(x="day",y="total_bill",data=tips) #所有的数据点都没有重叠
plt.show()
4.箱型图
要想查看各个分类中的数据分布,显而易见,散点图是不满足需求的,原因是它不够直观。5
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
tips=pd.read_csv("F:/数学建模/机器学习/seaborn-data/tips.csv")
sns.boxplot(x="day",y="total_bill",data=tips) #hue="time" 按时间进行分类
plt.show()
5.相关性基本分析
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
data=pd.read_csv("F:/数学建模/机器学习/nba_2017_nba_players_with_salary.csv")
print(data.describe()) #计算平均值,最大值,最小值等
data=data.loc[:,["Rk","AGE","MP","FG","FGA"]]
corr=data.corr() #计算线性相关
print(corr)
# plt.figure(figsize=(20,8),dpi=100)
sns.heatmap(corr,square=True,linewidths=0.1,annot=True)#annot设置相关性
plt.show()
6.表的高级操作
6.1降序排列
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
data=pd.read_csv("F:/数学建模/机器学习/nba_2017_nba_players_with_salary.csv")
data=data.loc[:,["Rk","AGE","MP","FG","FGA"]].sort_values(by="Rk",ascending=False) #降序排列
print(data)
6.2单变量分布情况
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
data=pd.read_csv("F:/数学建模/机器学习/nba_2017_nba_players_with_salary.csv")
plt.figure(figsize=(10,10))
plt.subplot(3,1,1)
sns.distplot(data["SALARY_MILLIONS"])
plt.ylabel("salary")
plt.show()
plt.figure(figsize=(10,10))
plt.subplot(3,1,2)
sns.distplot(data["RPM"])
plt.ylabel("RPM")
plt.show()
plt.figure(figsize=(10,10))
plt.subplot(3,1,3)
sns.distplot(data["AGE"])
plt.ylabel("AGE")
plt.show()
6.3双变量
import pandas as pd
import seaborn as sns
data=pd.read_csv("F:/数学建模/机器学习/nba_2017_nba_players_with_salary.csv")
data=sns.jointplot(data.AGE,data.SALARY_MILLIONS,kind="hex")
plt.show()
6.4多变量
import pandas as pd
import seaborn as sns
data=pd.read_csv("F:/数学建模/机器学习/nba_2017_nba_players_with_salary.csv")
data=data.loc[:,["Rk","AGE","MP","FG","FGA"]]
sns.pairplot(data)
plt.show()