最小二乘法求线性回归
1、读取数据
import numpy as np
import matplotlib.pyplot as plt
# 读取信息
points = np.genfromtxt('data.csv',delimiter=',')
x = points[:,0]
y = points[:,1]
plt.scatter(x,y) # 扫描所有点
plt.show()
2、定义损失函数
# 将求拟合函数转化为求损失函数,(当损失函数的值最小时,拟合效果越好)
def cost_func(w,b,points):
M = len(points)
cost_sum = 0
for i in range(M):
x = points[i,0]
y = points[i,1]
cost_sum += (y - w*x - b) ** 2
return cost_sum / M
3、定义最小二乘法核心函数
## 1、求平均值
def average(data):
M = len(data)
data_sum = 0
for i in range(M):
data_sum += data[i]
return data_sum / M
#2、求拟合函数
def fit(points):
w = 0
b = 0
M = len(points)
x_bar = average(points[:,0])
sum_son = 0
sum_par_x2 = 0
for i in range(M):
x = points[i,0]
y = points[i,1]
sum_son += y * (x - x_bar)
sum_par_x2 += x ** 2
w = sum_son / (sum_par_x2 - M * x_bar ** 2)
for i in range(M):
x = points[i,0]
y = points[i,1]
b += (y - w*x) / M
return w,b
4、结果验证
cost =
w,b = fit(points)
cost = cost_func(w,b,points)
print(cost)
x = points[:,0]
y = points[:,1]
y2 = w * x + b
plt.scatter(x,y) # 扫描所有点
plt.plot(x,y2,'r')
plt.show()
110.25738346621314
梯度下降法求线性回归
1、读取数据
import numpy as np
import matplotlib.pyplot as plt
# 读取数据
points = np.genfromtxt('data.csv',delimiter=',')
x = points[:,0]
y = points[:,1]
plt.scatter(x,y) # 扫描所有点
plt.show()
2、定义损失函数
def compute_cost(w,b,points):
total_cost = 0
M = len(points)
for i in range(M):
x = points[i,0]
y = points[i,1]
y_bar = w * x + b
total_cost += (y - y_bar) ** 2
return total_cost / M
3、定义模型的超参
# 定义步长
alpha = 0.0001
init_w = 0
init_b = 0
num_iter = 10
4、定义核心梯度下降算法函数
def grad_desc(points,init_w,init_b,alpha,num_iter):
w = init_w
b = init_b
# 定义list,保存损失函数的值,显示下降的过程
cost_list = []
#通过不断迭代w,b的过程,获取最佳的拟合函数
for i in range(num_iter):
#迭代过程中损失函数的变化
cost_list.append( compute_cost(w,b,points) )
# 递归优化w,b
w,b = step(w,b,points,alpha)
return w,b,cost_list
def step(w,b,points,alpha):
M = len(points)
sum_w = 0
sum_b = 0
for i in range(M):
x = points[i,0]
y = points[i,1]
sum_w += (w * x + b - y) * x
sum_b += (w * x + b - y)
update_w =w - alpha * (2 * sum_w / M )
update_b =b - alpha * (2 * sum_b / M )
return update_w,update_b
5、测试结果
w,b,cost = grad_desc(points,init_w,init_b,alpha,num_iter)
print("w-----"+str(w))
print("b-----"+str(b))
print("cost------"+str(cost))
w-----1.4774173755483797
b-----0.02963934787473238
cost------[5565.107834483211, 1484.5865574086486, 457.85425757376686, 199.50998572553874, 134.50591058200533, 118.14969342239947, 114.0341490603815, 112.99857731713661, 112.7379818756847, 112.67238435909097]
x = points[:,0]
y = points[:,1]
y2 = w * x + b
plt.scatter(x,y) # 扫描所有点
plt.plot(x,y2,'r')
plt.show()
print(compute_cost(w,b,points))
plt.plot(cost)
plt.show()
112.65585181499746
python库求线性回归
1、读取数据
import numpy as np
import matplotlib.pyplot as plt
# 导入机器学习的库
from sklearn.linear_model import LinearRegression
# 读取数据
points = np.genfromtxt('data.csv',delimiter = ',')
x = points[:,0]
y = points[:,1]
2、定义损失函数
cost_func
# 将求拟合函数转化为求损失函数,(当损失函数的值最小时,拟合效果越好)
def cost_func(w,b,points):
M = len(points)
cost_sum = 0
for i in range(M):
x = points[i,0]
y = points[i,1]
cost_sum += (y - w*x - b) ** 2
return cost_sum / M
3、使用系统库求解w,b
lr = LinearRegression() #理解为创建线性回归一个对象
x_m = x.reshape(-1,1) #转化成n行一列的数组,矩阵的形式
y_m = y.reshape(-1,1)
lr.fit(x_m,y_m) #需要传入二维的数组,矩阵的形式
#获取w,b
w = lr.coef_[0] # 获取系数
b = lr.intercept_[0] # 获取截距
4、结果显示
x = points[:,0]
y = points[:,1]
y2 = w * x + b
plt.scatter(x,y) # 扫描所有点
plt.plot(x,y2,'r')
plt.show()
print(cost_func(w,b,points))
[110.25738347]