实现简单线性回归
import numpy as np
import matplotlib.pyplot as plt
x = np.array([1.,2.,3.,4.,5.])
y = np.array([1.,3.,2.,3.,5.])
plt.scatter(x,y)
plt.axis([0,6,0,6])#控制坐标范围
#实现一下上一节的公式
x_mean = np.mean(x)
y_mean = np.mean(y)
使用了一个技巧,zip(a,b)
#初始化分子分母
num = 0.0
d = 0.0
for x_i,y_i in zip(x,y):
num += (x_i - x_mean) * (y_i - y_mean)
d += (x_i - x_mean) ** 2
a = num / d
b = y_mean - a * x_mean
print('a = ',a)
print('b = ',b)
结果
a = 0.8
b = 0.39999999999999947
#绘制图像
y_hat = a * x + b
plt.axis([0,6,0,6])#注意中括号
plt.scatter(x,y)
plt.plot(x,y_hat,color = 'r')
依然自己实现一个封装(注意拼写,A.ndim,注意a_)
import numpy as np
class Simple_linear_Regression1:
def __init__(self):
"""初始化Simple Line Regression模型"""
self.a_ = None
self.b_ = None
def fit(self,x_train,y_train):
"""根据训练数据集x_train和y_train训练Simple_linear_Regression模型"""
assert x_train.ndim == 1,\
"Simple Linear Regression can only solve single feature training data"
assert len(x_train) == len(y_train),\
"the size of x_train must be equal to the size of y_train"
# self.x_train = x_train
# self.y_train = y_train
x_mean = np.mean(x_train)
y_mean = np.mean(y_train)
num = 0.0
d = 0.0
for x,y in zip(x_train,y_train):
num += (x - x_mean) * (y - y_mean)
d += (x -x_mean) ** 2
self.a_ = num / d #a_一般这样写是给函数里面计算得出的属性的
self.b_ = y_mean - self.a_ * x_mean
return self
#这块自己写的没sklearn内味儿
# def predict(self,x_predict):
# """给定待测数据集x_predict,返回表示x_predict的结果向量"""
# y_predict = [ x * self.a_ + self.b_ for x in x_predict]
# return y_predict
#重新写的
def predict(self, x_predict):
"""给定待测数据集x_predict,返回表示x_predict的结果向量"""
assert x_predict.ndim == 1,\
"Simple Linear Regression can only solve single feature training data."
assert self.a_ is not None and self.b_ is not None,\
"must fit before predict!"
return np.array([self._predict(x) for x in x_predict])
def _predict(self,x_single):
"""给定单个待遇测数据x_single,返回x_single的预测结果值"""
return self.a_ * x_single + self.b_
def __repr__(self):
return 'Simple_linear_Regression1'
使用一下我们的封装
import numpy as np
import matplotlib.pyplot as plt
x = np.array([1.,2.,3.,4.,5.])
y = np.array([1.,3.,2.,3.,5.])
from Simple_linear_Regression.SimpleLinearRegression import Simple_linear_Regression1
reg1 = Simple_linear_Regression1()
x = np.array([1., 2., 3., 4., 5.])
y = np.array([1., 3., 2., 3., 5.])
reg1.fit(x,y)
print(reg1.a_)
print(reg1.b_)
l = np.array([1,2,3,4,5])
y_hat1 = reg1.predict(l)
print(y_hat1)
0.8
0.39999999999999947
[1.2 2. 2.8 3.6 4.4]
plt.scatter(x,y)
plt.plot(x,y_hat1,color = 'r')