autosklearn实现Titanic分类
import autosklearn. classification as autoskcl
import pandas as pd
import numpy as np
import sklearn as sk
import seaborn as sns
import matplotlib. pyplot as plt
from sklearn. model_selection import train_test_split
data = pd. read_csv( 'data/titanic_train.csv' )
data[ 'Sex' ] = data[ 'Sex' ] . astype( 'category' )
data[ 'Embarked' ] = data[ 'Embarked' ] . astype( 'category' )
data[ 'Sex' ] = data[ 'Sex' ] . cat. codes
data[ 'Embarked' ] = data[ 'Embarked' ] . cat. codes
data_c = data. drop( [ 'PassengerId' , 'Name' , 'Ticket' , 'Cabin' ] , axis= 1 )
data_c[ 'Age' ] = data_c[ 'Age' ] . fillna( data_c[ 'Age' ] . mean( ) )
cols = data_c. columns
features = cols[ 1 : ]
labels = cols[ 0 ]
for feature in features:
data_c[ feature] = ( data_c[ feature] - data_c[ feature] . mean( ) ) / data_c[ feature] . std( )
X = data_c[ features]
y = data_c[ labels]
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size= .3 )
cls = autoskcl. AutoSklearnClassifier( )
cls. fit( X_train, y_train)
predictions = cls. predict( X_test)
print ( predictions)
print ( sk. metrics. accuracy_score( y_test, predictions) )