import numpy as np
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def initialize_params(dims):
"""
参数初始化函数
"""
W = np.zeros((dims, 1))
b = 0
return W, b
def logistic(X, y, W, b):
"""
:param X: 输入特征矩阵
:param y: 输入标签向量
:param W: 权重系数
:param b: 偏置参数
:return:
"""
num_train = X.shape[0]
num_feature = X.shape[1]
# 对数几率回归模型输出
a = sigmoid(x=(np.dot(X, W) + b))
# 交叉熵损失
cost = -1 / num_train * np.sum(y*np.log(a) + (1-y)*np.log(1-a))
# 权重梯度
dW = np.dot(X.T, (a-y)) / num_train
# 偏置梯度
db = np.sum(a-y) / num_train
# 压缩损失数组维度
cost = np.squeeze(cost)
"""
a: 对数几率回归模型输出
cost: 损失
dW: 权重梯度
db: 偏置梯度
"""
return a, cost, dW, db
def logistic_train(X, y, learning_rate, epochs):
W, b = initialize_params(dims=X.shape[1])
dW = 0
db = 0
cost_list = []
for i in range(epochs):
a, cost, dW, db = logistic(X=X, y=y, W=W, b=b)
# 参数更新
W = W - learning_rate * dW
b = b - learning_rate * db
# 记录损失
if i % 100 == 0:
cost_list.append(cost)
# 打印训练过程中的损失
if i % 100 == 0:
print(f'i: {i}, cost: {cost}')
params = {
'W': W,
'b': b
}
grads = {
'dW': dW,
'db': db
}
return cost_list, params, grads
def predict(X, params):
"""
定义预测函数
"""
y_pred = sigmoid(x=(np.dot(X, params['W']) + params['b']))
for i in range(len(y_pred)):
if y_pred[i] > 0.5:
y_pred[i] = 1
else:
y_pred[i] = 0
return y_pred
if __name__ == '__main__':
from sklearn.datasets._samples_generator import make_classification
X, labels = make_classification(
n_samples=100,
n_features=2,
n_redundant=0,
n_informative=2,
random_state=1,
n_clusters_per_class=2
)
# 设置随机数种子
rng = np.random.RandomState(2)
# 对生成的特征数据添加一组均匀分布噪声
X += 2 * rng.uniform(size=X.shape)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], labels[:offset]
X_test, y_test = X[offset:], labels[offset:]
y_train = y_train.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))
cost_list, params, grads = logistic_train(X_train, y_train, 0.01, 1000)
print(params)
y_pred = predict(X_test, params)
print(y_pred)
# 第二种方法
from sklearn.linear_model import LogisticRegression
# 拟合训练集
clf = LogisticRegression(random_state=0).fit(X_train, y_train)
# 预测测试集
y_pred = clf.predict(X_test)
print(y_pred)
量化交易之机器学习篇 - 实现逻辑回归模型的两种方式
最新推荐文章于 2024-07-06 07:15:46 发布