fromsklearnimporttreefromsklearn.model_selectionimporttrain_test_splitimportnumpyasnpimportpandasaspddefprocess_df_for_ml(df):"""
Process a dataframe for model training/prediction use.
Returns X/y tensors.
"""df=df.copy()# Map salary to 0,1,2df.salary=df.salary.map({"low":0,"medium":1,"high":2})# dropping left and sales X for the df, y for the leftX=df.drop(["left","sales"],axis=1)y=df["left"]return(X,y)# Read and reindex CSV.df=pd.read_csv("HR_comma_sep.csv")df=df.reindex()# Train a decision tree.X,y=process_df_for_ml(df)X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=0,stratify=y)clftree=tree.DecisionTreeClassifier(max_depth=3)clftree.fit(X_train,y_train)# Test the decision tree on people who haven't left yet.notleftdf=df[df["left"]==0].copy()X,y=process_df_for_ml(notleftdf)# Plug in a new column with ones and zeroes from the prediction.notleftdf["will_leave"]=clftree.predict(X)# Print those with the will-leave flag on.print(notleftdf[notleftdf["will_leave"]==1])
2908

被折叠的 条评论
为什么被折叠?



