df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')
id_test = df_test['id']
y_train=df_train['target']
df_train.drop(['id','target','bin_3','nom_6'],axis=1,inplace=True)
df_test.drop(['id','bin_3','nom_6'],axis=1,inplace=True)
obj=df_train.select_dtypes(include='object').columns
con=df_train.select_dtypes(exclude='object').columns
for col in con:
df_train[col].fillna(int(-1),inplace=True)
df_test[col].fillna(int(-1),inplace=True)
for col in obj:
df_train[col].fillna(str(-1),inplace=True)
df_test[col].fillna(str(-1),inplace=True)
df_train['ord_6']=df_train['ord_5'].map(lambda x:x[0])
df_train['ord_7']=df_train['ord_5'].map(lambda x:x[1])
df_test['ord_6']=df_test['ord_5'].map(lambda x:x[0])
df_test['ord_7']=df_test['ord_5'].map(lambda x:x[1])
df_train.drop(['ord_5'],axis=1,inplace=True)
df_test.drop(['ord_5'],axis=1,inplace=True)
obj=['ord_7']
con=[col for col in df_train.columns if col not in obj]
ord=OrdinalEncoder()
ord.fit(list(df_train[obj].values) + list(df_test[obj].values))
df_train[obj]=pd.DataFrame(ord.transform(df_train[obj].values))
df_test[obj]=pd.DataFrame(ord.transform(df_test[obj].values))
scaler=MinMaxScaler()
scaler.fit(list(df_train[obj].values) + list(df_test[obj].values))
df_train[obj]=pd.DataFrame(scaler.transform(df_train[obj].values))
df_test[obj]=pd.DataFrame(scaler.transform(df_test[obj].values))
hot=OneHotEncoder(sparse=True)
hot.fit(list(df_train[con].values) + list(df_test[con].values))
train=hot.transform(df_train[con].values)
test=hot.transform(df_test[con].values)
df_train.drop(con,axis=1,inplace=True)
df_test.drop(con,axis=1,inplace=True)
df_train =scipy.sparse.hstack([train,scipy.sparse.coo_matrix(df_train)]).tocsr()
df_test =scipy.sparse.hstack([test,scipy.sparse.coo_matrix(df_test)]).tocsr()
model = LogisticRegression(penalty='l2', C=float(0.07334286390056426), class_weight={0: 1, 1: 1.32}, random_state=42, solver='lbfgs', max_iter=2011, fit_intercept=True,verbose=0,n_jobs=-1)
model.fit(df_train,y_train)
mean_pred = pd.DataFrame(model.predict_proba(df_test)[:,1])
mean_pred.index = id_test
mean_pred.columns = ['target']
mean_pred.to_csv('sub.csv', index_label='id', index=True)
score:0.78797