自己练习手写的逻辑回归算法。
数据来源:http://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
def get_data(): #数据集读取函数
data=pd.read_excel('...\\default of credit card clients.xls', header=0,skiprows=[0],index_col=[0])
x_feature = list(data.columns)
x_feature.remove('default payment next month')
x = data[x_feature]
y = data['default payment next month']
sm = SMOTE(random_state=42) # 处理过采样的方法
X, Y = sm.fit_sample(x, y)
x_train, x_test, y_train, y_test = train_test_split(X, Y,
test_size = 0.2, random_state = 0)
return x_train,