import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.rcParams["font.sans-serif"]=["FangSong"]
plt.rcParams["axes.unicode_minus"]=Falseimport warnings
warnings.filterwarnings("ignore")from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split,KFold,cross_validate,cross_val_score
import lightgbm
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier as RF
数据导入
train = pd.read_csv('train_done_1.csv',index_col='id')
X = train.drop(columns='isDefault')
y = train.isDefault
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/16, random_state=2020)
lgb.fit(X_train1,y_train1)
y_pre = lgb.predict_proba(X_test.values)
y_pre
from sklearn.metrics import roc_auc_score
np.array([i[1]for i in y_pre])
roc_auc_score(y_test.values,np.array([i[1]for i in y_pre]))
test_data = pd.read_csv('test_done_1.csv',index_col='id')
test_pre = lgb.predict_proba(test_data.values)
pd.DataFrame({'id':test_data.index,'isDefault':[i[1]for i in test_pre]}).set_index('id').to_csv('sub_1.csv')