之后陆续写一些机器学习算法
1. 手写代码实现线性回归
import numpy as np
import matplotlib.pyplot as plt
# 定义函数
def model(a, b, x):
return a * x + b
def cost_function(a, b, x, y):
n = 5 # 代表样本的数目
return 0.5 / n * (np.square(y - a * x - b)).sum()
def optimize(a, b, x, y):
n = 5
alpha = 1e-1 # 步长
y_hat = model(a, b, x)
da = 1 / n * (((y_hat - y) * x).sum())
db = 1 / n * ((y_hat - y).sum())
a = a - alpha * da
b = b - alpha * db
return a, b
# 读入数据 设置初始值
x = [13854, 12213, 11009, 10655, 9503] # 程序员工资,顺序为北京,上海,杭州,深圳,广州
x = np.reshape(x, (5, 1)) / 10000.0
y = [21332, 20162, 19138, 18621, 18016] # 算法工程师,顺序和上面一致
y = np.reshape(y, (5, 1)) / 10000.0
a = 0
b = 0
# 训练模型的主要步骤
def iterate(a, b, x, y, times):
for i in range(times):
a, b = optimize(a, b, x, y)
y_hat = model(a, b, x)
cost = cost_function(a, b, x, y)
print(a, b, cost)
plt.scatter(x, y)
plt.plot(x, y_hat)
return a, b
# 测试
a, b = iterate(a, b, x, y, 10000)
plt.show()
# 模型评价 SSR/SST
y_hat = model(a, b, x)
y_bar = y.mean()
SST = np.square(y - y_bar).sum()
SSR = np.square(y_hat - y_bar).sum()
SSE = np.square(y_hat - y).sum()
R = SSR / SST
print(R)
2.调用库实现线性回归
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
x = [13854, 12213, 11009, 10655, 9503]
x = np.reshape(x, (5, 1)) / 10000.0
y = [21332, 20162, 19138, 18621, 18016]
y = np.reshape(y, (5, 1)) / 10000.0
# 调用库中模型
LR = LinearRe## 标题gression()
# 训练模型
LR.fit(x, y)
y_hat = LR.predict(x)
# 画图
plt.scatter(x, y)
plt.plot(x, y_hat)
plt.show()
# 输出模型评价
print(LR.score(x, y))
3.正规方程实现线性回归
import numpy as np
import matplotlib.pyplot as plt
def model(a, b, x):
return a * x + b
x = [13854, 12213, 11009, 10655, 9503]
y = [21332, 20162, 19138, 18621, 18016]
# 将数据转换成对应的矩阵形式
x = np.reshape(x, (5, 1))
y = np.reshape(y, (5, 1))
x1 = []
y1 = []
for i in range(len(x)):
x1.append(list(x[i]))
for i in range(len(y)):
y1.append(list(y[i]))
print(x1)
print(y1)
plt.plot(x1, y, 'ks')
for i in range(len(x1)):
x1[i].insert(0, 1)
x = np.mat(x1)
y = np.mat(y1)
# 代入公式进行计算
end = (x.T * x).I * x.T * y
print(end)
end = end.getA().tolist()
x2 = [13854, 12213, 11009, 10655, 9503]
x2 = np.reshape(x2, (5, 1))
y_hat = model(end[1], end[0], x2)
plt.plot(x2, y_hat)
plt.show()