import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib as matplot import seaborn as sns %matplotlib inlineprice_discretized2 df = pd.DataFrame.from_csv('ch3-Turnover.csv',index_col=None) df.isnull().any() df.head() df = df.rename(columns={'satisfaction_level':'satisfaction','last_evaluation':'evaluation','number_porject':'projectCount' ,'average_montly_hours':'averageMonthlyHours','time_spend_company':'yearsAtCompany',"work_accident":'workAccident' ,'promotion_last_5years':'promotion','sales':'deparment','left':'turnover'}) front =df["turnover"] df.drop(labels=['turnover'],axis=1,inplace =True) df.insert(0,'turnover',front) df.head() df.shape df.dtypes turnover_rate =df.turnover.value_counts()/len(df) turnover_rate(14999,10) df.describe() turnover_Summary = df.groupby("turnover") turnover_Summary.mean() corr = df.corr() corr =(corr) sns.heatmap(corr,xticklabels=corr.columns.value,yticklabels=corr.columns.value)
#设置matplotlib figure 格式 f,axes =plt.subplots(ncols=3,figszie=(15,6)) #Employee Satisfaction 图像 sns.displot(df.satisfaction,kde=False,color='g',ax=axes[0]).set_title('Employee Satisfaction Distribution') axes[0].set_ylabel('Employee Count') #Employee Evaluation sns.displot(df.satisfaction,kde=False,color='g',ax=axes[1]).set_title('Employee Evaluation Distribution') axes[1].set_ylabel('Employee Count') #Employ Average Monthly Hours 图像 sns.displot(df.satisfaction,kde=False,color='g',ax=axes[2]).set_title('Employee Average Monthly Hours Distribution') axes[2].set_ylabel('Employee Count') f,axes =plt.subplots(ncols=3,figszie=(15,4)) sns.countplot(y="salary",hue='turnover',data=df).set_title('Employ Salary Turnover Distribution');