import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib. pyplot as plt
import warnings
warnings. filterwarnings( "ignore" )
df = pd. read_csv( "./data/HR.csv" )
df = df[ df[ "last_evaluation" ] <= 1 ] [ df[ "salary" ] != "nme" ] [ df[ "department" ] != "sale" ]
树状图
sns. countplot( x= "salary" , data= df)
<matplotlib.axes._subplots.AxesSubplot at 0x1a1873c048>
sns. countplot( x= "salary" , hue= "department" , data= df)
<matplotlib.axes._subplots.AxesSubplot at 0x1a182c0d30>
绘制直方图
f = plt. figure( )
f. add_subplot( 131 )
sns. distplot( df[ "satisfaction_level" ] , bins= 10 )
<matplotlib.axes._subplots.AxesSubplot at 0x1a17815748>
f = plt. figure( )
f. add_subplot( 131 )
sns. distplot( df[ "satisfaction_level" ] , bins= 10 , kde= False )
<matplotlib.axes._subplots.AxesSubplot at 0x113840f98>
f = plt. figure( )
f. add_subplot( 131 )
sns. distplot( df[ "satisfaction_level" ] , bins= 10 , hist= False )
<matplotlib.axes._subplots.AxesSubplot at 0x1a17c3e9b0>
f = plt. figure( )
f. add_subplot( 131 )
sns. distplot( df[ "satisfaction_level" ] , bins= 10 )
f. add_subplot( 132 )
sns. distplot( df[ "last_evaluation" ] , bins= 10 )
f. add_subplot( 133 )
sns. distplot( df[ "average_monthly_hours" ] , bins= 10 )
<matplotlib.axes._subplots.AxesSubplot at 0x1a17d31160>
箱线图
sns. boxplot( y= df[ "time_spend_company" ] )
<matplotlib.axes._subplots.AxesSubplot at 0x1a17f06128>
sns. boxplot( x= df[ "time_spend_company" ] , saturation= 0.75 , whis= 3 )
<matplotlib.axes._subplots.AxesSubplot at 0x1a18e94b70>
折线图
sub_df = df. groupby( "time_spend_company" ) . mean( )
sns. pointplot( sub_df. index, sub_df[ "left" ] )
<matplotlib.axes._subplots.AxesSubplot at 0x1a19e08208>
sns. pointplot( x= "time_spend_company" , y= "left" , data= df)
<matplotlib.axes._subplots.AxesSubplot at 0x1a1919c6d8>
lbs = df[ "department" ] . value_counts( ) . index
plt. pie( df[ "department" ] . value_counts( normalize= True ) , labels= lbs, autopct= "%1.1f%%" )
plt. show( )
lbs = df[ "department" ] . value_counts( ) . index
explodes= [ 0.1 if i== "sales" else 0 for i in lbs]
plt. pie( df[ "department" ] . value_counts( normalize= True ) , explode= explodes, labels= lbs, autopct= "%1.1f%%" )
plt. show( )
lbs = df[ "salary" ] . value_counts( ) . index
explodes= [ 0.1 if i== "low" else 0 for i in lbs]
plt. pie( df[ "salary" ] . value_counts( normalize= True ) , explode= explodes, labels= lbs, autopct= "%1.1f%%" )
plt. show( )