思维导图
代码
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib. pyplot as plt
train= pd. read_csv( 'D:\\pythondata\\result.csv' )
print ( train. shape)
train[ 'Cabin' ] = train[ 'Cabin' ] . fillna( 'NA' )
train[ 'Embarked' ] = train[ 'Embarked' ] . fillna( 'S' )
train[ 'Age' ] = train[ 'Age' ] . fillna( train[ 'Age' ] . mean( ) )
print ( train. isnull( ) . mean( ) . sort_values( ascending= False ) )
data= train[ [ 'Pclass' , 'Sex' , 'Age' , 'SibSp' , 'Parch' , 'Fare' , 'Embarked' ] ]
data= pd. get_dummies( data)
print ( data. head( 5 ) )
from sklearn. model_selection import train_test_split
X= data
y= train[ 'Survived' ]
X_train, X_test, y_train, y_test= train_test_split( X, y, stratify= y, random_state= 0 )
print ( X_train. shape, X_test. shape)
from sklearn. linear_model import LogisticRegression
from sklearn. ensemble import RandomForestClassifier
lr= LogisticRegression( )
lr. fit( X_train, y_train)
print ( "Training set score:{:.2f}" . format ( lr. score( X_train, y_train) ) )
print ( "Testing set score:{:.2f}" . format ( lr. score( X_test, y_test) ) )
lr2 = LogisticRegression( C= 100 )
lr2. fit( X_train, y_train)
print ( "Training set score: {:.2f}" . format ( lr2. score( X_train, y_train) ) )
print ( "Testing set score: {:.2f}" . format ( lr2. score( X_test, y_test) ) )
rfc = RandomForestClassifier( )
rfc. fit( X_train, y_train)
print ( "Training set score: {:.2f}" . format ( rfc. score( X_train, y_train) ) )
print ( "Testing set score: {:.2f}" . format ( rfc. score( X_test, y_test) ) )
rfc2 = RandomForestClassifier( n_estimators= 100 , max_depth= 5 )
rfc2. fit( X_train, y_train)
print ( "Training set score: {:.2f}" . format ( rfc2. score( X_train, y_train) ) )
print ( "Testing set score: {:.2f}" . format ( rfc2. score( X_test, y_test) ) )
pred = lr. predict( X_train)
pred[ : 10 ]
pred_proba = lr. predict_proba( X_train)
pred_proba[ : 10 ]