个人自己创建数据,实现分类任务
要求:
学习LR学习算法的核心代码就好
要求写出详细的注释说明
注意矩阵维度
参考负责人
#coding=utf-8
#Version:python3.6.0
#Tools:Pycharm 2017.3.2
__date__ = '2019/6/2 21:43'
__author__ = 'ranchunfu'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
np.random.seed(1)
#制造数据
def get_data(seed):
np.random.seed(seed)
data_size_1 = 300
x1_1 = np.random.normal(loc=5.0, scale=1.0, size=data_size_1)
x2_1 = np.random.normal(loc=4.0, scale=1.0, size=data_size_1)
y_1 = [0 for _ in range(data_size_1)]
data_size_2 = 400
x1_2 = np.random.normal(loc=10.0, scale=2.0, size=data_size_2)
x2_2 = np.random.normal(loc=8.0, scale=2.0, size=data_size_2)
y_2 = [1 for _ in range(data_size_2)]
x1 = np.concatenate((x1_1, x1_2), axis=0)
x2 = np.concatenate((x2_1, x2_2), axis=0)
x = np.hstack((x1.reshape(-1, 1), x2.reshape(-1, 1)))
y = np.concatenate((y_1, y_2), axis=0)
data_size_all = data_size_1 + data_size_2
shuffled_index = np.random.permutation(data_size_all)
x = x[shuffled_index]
y = y[shuffled_index]
return x, y
x, y =get_data(1)
#分类训练集,测试集
def train_test_split(x,y):
split_index = int(len(y)*0.7)
x_train = x[:split_index]
y_train = y[:split_index]
x_test = x[split_index:]
y_test = y[split_index:]
return x_train, y_train, x_test, y_test
#数据维度处理
# x.shape(2,m) y.shape(1,m)
x_train, y_train, x_test, y_test = train_test_split(x, y )
x_train = (x_train - np.min(x_train, axis=0)) / (np.max(x_train, axis=0) - np.min(x_train, axis=0))
x_test = (x_test - np.min(x_test, axis=0)) / (np.max(x_test, axis=0) - np.min(x_test, axis=0))
x_train = x_train.T
y_train = y_train.reshape(1,-1)
x_test = x_test.T
y_test = y_test.reshape(1,-1)
class LR:
def __init__(self, learning_rate=0.1, iter=100):
self.learning_rate = learning_rate
self.iter = iter
def _sigmoid(self, Z):
return 1 / (1 + np.exp(-Z))
# 向前传播
def _forward(self, x):
Z = np.dot(self.w, x) + self.b
return self._sigmoid(Z)
def fit(self, x, y):
self.x = x
self.y = y
self.w = np.random.randn(1, x.shape[0]) * 0.01
self.b = np.zeros(1)
def cost(self):
cost = -self.y * np.log(self._forward(self.x)) - (1 - self.y) * np.log(1 - self._forward(self.x))
return np.mean(cost)
# 反向传播
def grad(self):
m = x.shape[1]
for i in range(self.iter):
A = self._forward(self.x)
dz = A - self.y
# dw = 1 / m * np.dot(self.x, dz.T)
dw = 1 / m * np.dot(dz, self.x.T)
db = 1 / m * np.sum(dz)
self.w = self.w - self.learning_rate * dw
self.b = self.b - self.learning_rate * db
def predict(self, x):
Z = np.dot(self.w, x) + self.b
A = self._sigmoid(Z)
y_pred = np.zeros(shape=(1,A.shape[1]),dtype=int)
for i in range(A.shape[1]):
if A[0, i] <= 0.5:
y_pred[0, i] = 0
else:
y_pred[0, i] = 1
return y_pred
if __name__ == '__main__':
lr = LR(learning_rate=0.1, iter=1500)
lr.fit(x_train, y_train)
lr.grad()
y_train_pred = lr.predict(x_train)
print(100 - np.mean(np.abs(y_train_pred - y_train)) * 100)
lr.fit(x_test, y_test)
lr.grad()
y_test_pred = lr.predict(x_test)
print(100 - np.mean(np.abs(y_test_pred - y_test)) * 100)
#测试数据可视化与分类边界
cValue = ['g', 'b']
plt.scatter(x_test[0, :], x_test[1, :], c=[cValue[i] for i in np.squeeze(y_test)], marker='o')
x1 = np.arange(0.1, 1.0, 0.1)
x2 = (-lr.b - lr.w[0,1]*x1)/lr.w[0,1]
plt.plot(x1,x2,'r')
plt.show()