最近在学习机器学习的一些算法,从最开始的线性回归开始。线性回归其实就是用一条直线来模拟你的数据,并且让所产生的误差尽可能的小。
#coding=utf-8
import random
import numpy as np
from matplotlib import pyplot as pp
random.seed(1)#产生随机种子
house_size = [random.randrange(70,200) for i in range(10000)]
distance_from_citycenter = [random.randrange(1,30) for i in range(10000)]
floor = [random.randrange(1,20) for i in range(10000)]
house_price = []
for i in range(10000):
price = house_size[i]*random.randrange(5e4,10e4)+distance_from_citycenter[i]*(-1e4)+floor[i]*1e4+random.randrange(1,1e6)\
house_price.append(price)#假设已知的房价
x = [[1,house_size[i],distance_from_citycenter[i],floor[i]] for i in range(10000)]#构建所需要的数据为一个向量
x_matrix = np.array(x)#将它转换为矩阵的形式
y_matrix = np.array(house_price)#已知价格的矩阵
y_matrix = y_matrix.reshape(len(y_matrix),1)#将它转换为列向量的形式
theta = [0 for i in range(4)]#假设初始的未知数全为0
theta_matrix = np.array(theta)
theta_matrix = theta_matrix.reshape(len(theta_matrix),1)
def cost_function(x,theta,y):#定义代价函数
y_pred = x.dot(theta)
diff = y_pred - y
squared_error = np.power(diff,2)
return np.sum(squared_error)/(2*len(y))
def gradient(x,theta,y):#实现梯度下降,以此来获取最佳的未知数的值
y_pred = x.dot(theta)
diff = y_pred - y
gradient = (1/len(y))*x.T.dot(diff)
return gradient
theta_matrix = theta_matrix.astype("float64")
max_item=10000#迭代次数
learning_rate = 0.00001#学习效率
for i in range(max_item):
theta_matrix -= gradient(x_matrix,theta_matrix,y_matrix)*learning_rate
# if i%20 ==0:
# print(cost_function(x_matrix,theta_matrix,y_matrix))
print(theta_matrix)