import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
path = ‘C:/Users/统计学学习/data.xlsx’
data = pd.read_excel(path)
embark = data.groupby([‘Embarked’])
embark_basic = data.groupby([‘Embarked’]).agg([‘count’,‘min’,‘max’,‘median’,‘mean’,‘var’,‘std’])
age_basic = embark_basic[‘Age’]
fare_basic = embark_basic[‘Fare’]
age_basic
fare_basic
import seaborn as sns
sns.set_palette(“hls”) #设置所有图的颜色,使用hls色彩空间
sns.distplot(data[‘Age’],color=“r”,bins=10,kde=True)
plt.title(‘Age’)
plt.xlim(-10,80)
plt.grid(True)
plt.show()
from scipy import stats
from scipy import stats
age = data[‘Age’]
plt.figure()
age.plot(kind = ‘kde’) #原始数据的正态分布
M_S = stats.norm.fit(age) #正态分布拟合的平均值loc,标准差 scale
normalDistribution = stats.norm(M_S[0], M_S[1]) # 绘制拟合的正态分布图
x