单变量分析
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import integrate, stats
import seaborn as sns
sns.set(color_codes=True)
np.random.seed(sum(map(ord,"distributions")))
x=np.random.normal(size=100)
sns.distplot(x,kde=False)
sns.distplot(x,bins=20,kde=False)
plt.show()
散点图(两个变量之间的关系)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import integrate, stats
import seaborn as sns
sns.set(color_codes=True)
mean,cov=[0,1],[(1,0.5),(0.5,1)]
data=np.random.multivariate_normal(mean,cov,200)
df=pd.DataFrame(data,columns=["x","y"])
print(df)
#散点图描述特征之间的关系
sns.jointplot(x="x",y="y",data=df)
plt.show()
两个变量分析
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import integrate, stats
import seaborn as sns
sns.set(color_codes=True)
iris=sns.load_dataset("iris")
#画出所有的两两关系
sns.pairplot(iris)
plt.show()
回归变量分析
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import integrate, stats
import seaborn as sns
sns.set(color_codes=True)
np.random.seed(sum(map(ord,"regression")))
tips=sns.load_dataset("tips")
print(tips.head())
#绘制回归变量:regplot(推荐)或者mplot
sns.regplot(x="total_bill",y="tip",data=tips)
sns.regplot(data=tips,x="size",y="tip")
plt.show()
多变量分析
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import integrate, stats
import seaborn as sns
sns.set(style="whitegrid",color_codes=True)
np.random.seed(sum(map(ord,"categoricial")))
titanic=sns.load_dataset("titanic")
tips=sns.load_dataset("tips")
iris=sns.load_dataset("iris")
#sns.stripplot(x="day",y="total_bill",data=tips)
sns.stripplot(x="day",y="total_bill",data=tips,jitter=True)
sns.swarmplot(x="day",y="total_bill",data=tips)
sns.swarmplot(x="day",y="total_bill",hue="sex",data=tips)
plt.show()
盒图与小提琴图
#盒图:离群点
sns.boxplot(x="day",y="total_bill",hue="time",data=tips)
sns.violinplot(x="total_bill",y="day",hue="time",data=tips)
plt.show()
分类属性绘图
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import integrate, stats
import seaborn as sns
sns.set(style="whitegrid",color_codes=True)
np.random.seed(sum(map(ord,"categoricial")))
titanic=sns.load_dataset("titanic")
tips=sns.load_dataset("tips")
iris=sns.load_dataset("iris")
sns.violinplot(x="day",y="total_bill",data=tips,inner=None)
sns.swarmplot(x="day",y="total_bill",data=tips,color="w",alpha=0.5)
plt.show()
条形图:显示值的集中趋势
sns.barplot(x="sex",y="survived",hue="class",data=titanic)
点图:描述变化
sns.pointplot(x="sex",y="survived",hue="class",data=titanic)
sns.pointplot(x="class",y="survived",hue="sex",data=titanic,
palette={"male":"g","female":"m"},
markers=["*","o"],linestyles=["-","--"])