import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.linear_model import LinearRegression
X = np.linspace(2,10,20).reshape(-1,1)
# f(x) = wx +b
y = np.random.randint(1,6,size = 1)*X + np.random.randint(-5,5,size =1)
# 噪声,加盐
y += np.random.randn(20,1)*0.8
plt.scatter(X,y,color = 'red')
<matplotlib.collections.PathCollection at 0x1d33e60b710>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-ovyfZRgJ-1636168173635)(output_1_1.png)]
lr = LinearRegression()
lr.fit(X,y)
w = lr.coef_[0,0]
b = lr.intercept_[0]
print(w,b)
0.878707573986 -0.869197396312
plt.scatter(X,y)
x = np.linspace(1,11,50)
plt.plot(x,w*x + b,color = 'green')
[<matplotlib.lines.Line2D at 0x1d3404ed550>]
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-fxQwFoST-1636168173638)(output_3_1.png)]
# 使用梯度下降解决一元一次的线性问题:w,b
class LinearModel(object):
def __init__(self):
self.w = np.random.randn(1)[0]
self.b = np.random.randn(1)[0]
# 数学建模:将数据X和目标值关系用数学公式来表达
def model(self,x): # model模型 f(x) = wx + b
return self.w*x + b
def loss(self,x,y): #最小二乘法
cost = (y - self.model(x))**2
# 偏导数,求解两个未知数:w,b
gradient_w = 2*(y-self.model(x))*(-x)
gradient_b = 2*(y-self.model(x))*(-1)
return cost,gradient_w,gradient_b
# 梯度下降
def gradient_descent(self,gradient_w,gradient_b,learning_rate=0.1):
# 更新w,b
self.w -= gradient_w*learning_rate
self.b -= gradient_b*learning_rate
# 训练fit
def fit(self,X,y):
count = 0 # 算法执行优化了3000次,退出 代表着算法执行的最大次数
tol = 0.0001
last_w = self.w + 0.1
last_b = self.b + 0.1
length = len(X)
while True:
if count > 3000: # 执行的次数到了
break
# 求解的斜率和截距的精确度达到要求
if (abs(last_w - self.w) < tol) and (abs(last_b - self.b) < tol):
break
cost = 0
gradient_w = 0
gradient_b = 0
for i in range(length):
cost_,gradient_w_,gradient_b_ = self.loss(X[i,0],y[i,0])
cost += cost_/length
gradient_w += gradient_w_/length
gradient_b += gradient_b_/length
print('--------------------------执行次数:%d.损失值:%0.2f'%(count,cost))
last_w = self.w
last_b = self.b
# 更新截距和斜率
self.gradient_descent(gradient_w,gradient_b,0.002)
count += 1
def result(self):
return self.w,self.b
lm = LinearModel()
lm.fit(X,y)
--------------------------执行次数:0.损失值:100.26
--------------------------执行次数:1.损失值:69.65
--------------------------执行次数:2.损失值:48.44
--------------------------执行次数:3.损失值:33.74
--------------------------执行次数:4.损失值:23.55
--------------------------执行次数:5.损失值:16.49
--------------------------执行次数:6.损失值:11.60
--------------------------执行次数:7.损失值:8.21
--------------------------执行次数:8.损失值:5.87
--------------------------执行次数:9.损失值:4.24
--------------------------执行次数:10.损失值:3.11
--------------------------执行次数:11.损失值:2.33
--------------------------执行次数:12.损失值:1.79
--------------------------执行次数:13.损失值:1.41
--------------------------执行次数:14.损失值:1.15
--------------------------执行次数:15.损失值:0.97
--------------------------执行次数:16.损失值:0.85
--------------------------执行次数:17.损失值:0.76
--------------------------执行次数:18.损失值:0.70
--------------------------执行次数:19.损失值:0.66
--------------------------执行次数:20.损失值:0.63
--------------------------执行次数:21.损失值:0.61
--------------------------执行次数:22.损失值:0.60
--------------------------执行次数:23.损失值:0.59
--------------------------执行次数:24.损失值:0.58
--------------------------执行次数:25.损失值:0.58
--------------------------执行次数:26.损失值:0.57
--------------------------执行次数:27.损失值:0.57
--------------------------执行次数:28.损失值:0.57
--------------------------执行次数:29.损失值:0.57
--------------------------执行次数:30.损失值:0.57
--------------------------执行次数:31.损失值:0.57
--------------------------执行次数:32.损失值:0.57
--------------------------执行次数:33.损失值:0.57
--------------------------执行次数:34.损失值:0.57
--------------------------执行次数:35.损失值:0.57
--------------------------执行次数:36.损失值:0.57
--------------------------执行次数:37.损失值:0.57
--------------------------执行次数:38.损失值:0.57
--------------------------执行次数:39.损失值:0.57
--------------------------执行次数:40.损失值:0.57
--------------------------执行次数:41.损失值:0.57
--------------------------执行次数:42.损失值:0.57
--------------------------执行次数:43.损失值:0.57
lm.result()
(0.87822523127715901, -0.8977846808879012)
# sklearn中的LinerRegression
print(w,b)
plt.scatter(X,y,c= 'red')
plt.plot(x,1.9649*x - 4.64088,color = 'green')
plt.plot(x,w*x + b,color = 'blue')
plt.title('自定义的算法拟合曲线',fontproperties = 'KaiTi')
Text(0.5,1,'自定义的算法拟合曲线')
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-WahAJXUV-1636168173641)(output_9_1.png)]