正则化实现
//正则化
import numpy as np
import matplotlib.pyplot as plt
# 1. 完成数据集的读取
data_s1 = np.loadtxt('s1.txt', delimiter=',')
data_s2 = np.loadtxt('s2.txt', delimiter=',')
# x_s1, y_s1 = data_s1[:, :-1], data_s1[:, -1:]
# x_s2, y_s2 = data_s2[:, :-1], data_s2[:, -1:]
x_s1, y_s1 = np.hsplit(data_s1, [-1, ])
x_s2, y_s2 = np.hsplit(data_s2, [-1, ])
X_s1 = np.insert(x_s1, 0, 1, axis=1)
X_s2 = np.insert(x_s2, 0, 1, axis=1)
m, n = X_s1.shape
def funcZ(X, theta):
z = X.dot(theta)
return z
# 2. 实现Sigmoid函数,并画出该函数
def Sigmoid(z):
h = 1/(1+np.exp(-z))
return h
# 3. 实现逻辑回归的代价函数,实现正则化逻辑回归
def computeCost(h, y, lamda, theta):
cost = -np.mean(y*np.log(h)+(1-y)*np.log(1-h))
R = lamda/(2*m)*np.sum(theta[1:]**2)
return cost+R
# 4. 实现梯度下降函数
def grad(X, h, y, lamda, theta):
d_theta = (1/m)*np.dot(X.T, h-y)
R = lamda/m*np.insert(theta[1:], 0, 0, axis=0)
return d_theta+R
# 5. 通过梯度下降计算回归模型,用所得模型对测试集的数据进行预测,并计算准确率
def train(alpha, iters, theta, X, lamda, y):
Jcost = []
for step in range(iters):
z = funcZ(X, theta)
h = Sigmoid(z)
jcost = computeCost(h, y, lamda, theta)
d_theta = grad(X, h, y, lamda, theta)
theta -= alpha*d_theta
if step % 200 == 0:
print('step:', step, 'cost:', jcost)
Jcost.append(jcost)
return theta, Jcost
if __name__ == '__main__':
alpha = 0.1
iters = 10000
lamda = 4
theta = np.random.uniform(-0.5, 0.5, [n, 1])
theta, Jcost = train(alpha, iters, theta, X_s1, lamda, y_s1)
z = funcZ(X_s2, theta)
h = Sigmoid(z)
print('predict:', h)
print('label_y:', y_s2)
print('精确度:', np.mean(y_s2 == [h > 0.5]))
# 6. 使用X2,X3两组特征画出逻辑回归0-1分布图
# numpy中的ravel()、flatten()、squeeze()都有将多维数组转换为一维数组的功能,区别:
# ravel():如果没有必要,不会产生源数据的副本,返回的是视图,修改时会影响原始矩阵
# flatten():返回源数据的副本,返回一份拷贝,对拷贝所做修改不会影响原始矩阵
# squeeze():只能对维数为1的维度降维
plt.scatter(X_s2[:, 2], X_s2[:, 3], c=y_s2.ravel())
plt.show()
plt.scatter(X_s2[:, 2], X_s2[:, 3], c=h.ravel())
plt.show()
plt.scatter(y_s2, y_s2)
plt.scatter(y_s2, h)
plt.show()
# plt.scatter(X_s2[:, 2], X_s2[:, 3])
# plt.show()
# plt.plot(X_s2[:, 2], y_s2, 'rx')
# plt.plot(X_s2[:, 2], h, 'gx')
# plt.show()
# plt.plot(X_s2[:, 3], y_s2, 'rx')
# plt.plot(X_s2[:, 3], h, 'gx')
# plt.show()
# pos = np.array([x_s2[i] for i in range(m) if y_s2[i] == 0])
# neg = np.array([x_s2[i] for i in range(len(x_s2)) if y_s2[i] == 1])
# plt.scatter(pos[:, 1], pos[:, 2])
# plt.scatter(neg[:, 1], neg[:, 2])
# plt.show()
# y_s2 = np.squeeze(y_s2)
# plt.scatter(X_s2[y_s2 == 0, 2], X_s2[y_s2 == 0, 3])
# plt.scatter(X_s2[y_s2 == 1, 2], X_s2[y_s2 == 1, 3])
# plt.show()
# # 实现Sigmoid函数,并画出该函数
# x = np.linspace(-10, 10, 200)
# y = Sigmoid(x)
# plt.plot(x, y)
# plt.show()
本文中数据集下载地址:
链接:https://pan.baidu.com/s/1io3aZH-QRCqnGardXOH5yg 提取码:83mu
学习讨论群请加QQ群:521364338,扫码进群领取人工智能学习资料,转载标明出处,侵权必究!