#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2018/4/1 9:36
# @Author : HJH
# @Site :
# @File : linear2.py
# @Software: PyCharm
import numpy as np
from sklearn.datasets import load_diabetes
import matplotlib.pyplot as plt
from numpy import mat
from numpy import random
from numpy.linalg import det
class linear(object):
def __init__(self):
self.W = None
self.b = None
def loss(self, X, y):
num_train = X.shape[0]
h = X.dot(self.W) + self.b
loss = 0.5 * np.sum(np.square(h - y)) / num_train
dW = X.T.dot((h - y)) / num_train
db = np.sum((h - y)) / num_train
return loss, dW, db
def train(self, X, y, learn_rate=0.001, iters=10000):
num_feature = X.shape[1]
self.W = np.random.rand(num_feature, 1)
self.b = np.zeros((num_feature,1))
loss_list = []
for i in range(iters):
loss, dW, db = self.loss(X, y)
loss_list.append(loss)
self.W -= learn_rate * dW
self.b -= learn_rate * db
if i % 500 == 0:
print('iters = %d,loss = %f' % (i, loss))
return loss_list
def predict(self, X_test):
y_pred = X_test.dot(self.W) + self.b
return y_pred
#使用岭回归来
#相对于直接使用最小二乘法来计算线性回归
#岭回归损失了无偏性,来换取高的数值稳定性,从而得到较高的计算精度。
def ridge_regression(self, X, y,lam=0.2):
[m, n] = np.shape(X)
# print(m,n)
# print(y.shape)
x_mat = np.hstack((X, mat(np.ones((m, 1)))))
# print(x_mat.shape)
self.weight = mat(random.rand(n + 1,1))
xTx = x_mat.T * x_mat + lam * mat(np.eye(n))
# print(xTx.shape)
if det(xTx) == 0.0:
print("the det of xTx is zero!")
return
self.weight = xTx.I * x_mat.T * y
# print(self.weight.shape)
return self.weight
def load_batasets():
#直接加载sklearn中的数据E:\Anaconda\Anaconda3-4.2.0\Lib\site-packages\sklearn\datasets\data
diabetes = load_diabetes()
#print(diabetes.keys())
data = diabetes.data # real -0.2<x<0.2
target = diabetes.target # integer 25<y<346
# print(data.shape)
# print(target.shape)
# print(data[:5])
# print(target[:5])
#print(data)
X = data[:, :1]
#print(X)
y = target
#print(y)
X_train = X[:-20]
X_test = X[-20:]
y_train = y[:-20].reshape((-1, 1))
y_test = y[-20:].reshape((-1, 1))
# print('X_train=', X_train.shape)
# print('X_test=', X_test.shape)
# print('y_train=', y_train.shape)
# print('y_test=', y_test.shape)
return X_train,X_test,y_train,y_test
def plot(X_train,y_train,X_test,y_test,linear,loss_list,ridge_weight):
f = X_train.dot(linear.W) + linear.b
ridge_f=X_train.dot(ridge_weight[0]) + ridge_weight[1]
fig = plt.figure()
plt.subplot(211)
plt.scatter(X_train, y_train, color='black')
plt.scatter(X_test, y_test, color='blue')
plt.scatter(X_test, linear.predict(X_test), color='red')
plt.plot(X_train, f, 'r', label='gradient')
plt.plot(X_train, ridge_f, 'y', label='ridge')
plt.legend()
plt.xlabel('X')
plt.ylabel('y')
plt.subplot(212)
plt.plot(loss_list, color='blue')
plt.xlabel('epochs')
plt.ylabel('errors')
plt.show()
if __name__=='__main__':
X_train, X_test, y_train, y_test=load_batasets()
classfiy=linear()
loss_list=classfiy.train(X_train,y_train)
ridge_weight=classfiy.ridge_regression(X_train,y_train)
plot(X_train, y_train,X_test, y_test,classfiy,loss_list,ridge_weight)
多特征:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2018/4/5 11:04
# @Author : HJH
# @Site :
# @File : temp.py
# @Software: PyCharm
import numpy as np
from sklearn.datasets import load_diabetes
import matplotlib.pyplot as plt
from numpy import mat
from numpy import random
from numpy.linalg import det
class linear(object):
def __init__(self):
self.W = None
def loss(self, X, y):
num_train = X.shape[0]
# print(self.W.shape,self.b.shape,X.shape)
h = X.dot(self.W)
loss = 0.5 * np.sum(np.square(h - y)) / num_train
dW = X.T.dot((h - y)) / num_train
return loss, dW
def train(self, X, y, learn_rate=0.001, iters=10000):
num_feature = X.shape[1]
self.W = np.random.rand(num_feature, 1)
loss_list = []
for i in range(iters):
loss, dW = self.loss(X, y)
loss_list.append(loss)
self.W -= learn_rate * dW
if i % 500 == 0:
print('iters = %d,loss = %f' % (i, loss))
return loss_list
def predict(self, X_test):
y_pred = X_test.dot(self.W)
return y_pred
def load_batasets():
#直接加载sklearn中的糖尿病数据集E:\Anaconda\Anaconda3-4.2.0\Lib\site-packages\sklearn\datasets\data
diabetes = load_diabetes()
data = diabetes.data # real -0.2<x<0.2
target = diabetes.target # integer 25<y<346
X = data[:, :]
y = target
X_train = X[:-20,:]
X_train=np.hstack((X_train, mat(np.ones((X_train.shape[0], 1)))))
X_test = X[-20:,:]
X_test=np.hstack((X_test, mat(np.ones((X_test.shape[0], 1)))))
y_train = y[:-20].reshape((-1, 1))
y_test = y[-20:].reshape((-1, 1))
# print('X_train=', X_train.shape)
# print('X_test=', X_test.shape)
# print('y_train=', y_train.shape)
# print('y_test=', y_test.shape)
return X_train,X_test,y_train,y_test
def plot(X_train,y_train,X_test,y_test,linear,loss_list):
X_train=X_train[:,:1]
f = X_train.dot(linear.W[0])+linear.W[-1]
f=f.T
fig = plt.figure()
plt.subplot(211)
plt.scatter(X_train, y_train, color='black')
plt.scatter(X_test[:,:1], y_test[:,:1], color='blue')
plt.scatter(X_test[:,:1], linear.predict(X_test)[:,:1], color='red')
plt.plot(X_train, f, 'r',label='gradient')
plt.legend()
plt.xlabel('X')
plt.ylabel('y')
plt.subplot(212)
plt.plot(loss_list, color='blue')
plt.xlabel('epochs')
plt.ylabel('errors')
plt.show()
if __name__=='__main__':
X_train, X_test, y_train, y_test=load_batasets()
classfiy=linear()
loss_list=classfiy.train(X_train,y_train)
plot(X_train, y_train,X_test, y_test,classfiy,loss_list)