#coding:utf-8
from math import sqrt
import matplotlib.pyplot as plt
dataset = [[1.2,1.1],[2.4,3.5],[4.1,3.2],[3.4,2.8],[5,5.4]]
x = [row[0] for row in dataset]
y = [row[1] for row in dataset]
pre_y = list()#画图用
#计算均值
def mean(values):
return sum(values)/float(len(values))
#计算x和y的协方差函数
def covariance(x,mean_x,y,mean_y):
cover = 0
for i in range(len(x)):
cover += (x[i] - mean_x)*(y[i] - mean_y)
return cover
#计算方差
def variance(values,mean):
return sum([(x-mean)**2 for x in values])
#计算回归系数
def coefficients(dataset):
x_mean,y_mean = mean(x),mean(y)
w1 = covariance(x,x_mean,y,y_mean) / variance(x,x_mean) #公式推导出来的
w0 = y_mean - w1 * x_mean #公式推导出来的
return (w0,w1)
#利用“均方差误差公式RMSE”求误差
def rmse_metric(actual,predicted):
sum_error = 0.0
for i in range(len(actual)):
predicted_error = predicted[i] - actual[i]
sum_error += (predicted_error**2)
mean_error = sum_error / float(len(actual))
return sqrt(mean_error)
#构建简单的线性回归
def simple_linear_regression(train,test):
predictions = list()
w0,w1 = coefficients(train)
for row in test:
y_model = w1 *row[0] + w0
predictions.append(y_model)
return predictions
#评估算法数据准备以及协调
def evaluate_algorithm(dataset, alogrithm):
test_set = list()
for row in dataset:
row_copy = list(row)
row_copy[-1] = None
test_set.append(row_copy)
predicted = alogrithm(dataset,test_set)
for val in predicted:
pre_y.append(val)
print('%.3f\t'%(val))
actual = [row[-1] for row in dataset]
rmse = rmse_metric(actual,predicted)
return rmse
#返回RMSE
rmse = evaluate_algorithm(dataset,simple_linear_regression)
print('RMSE:%.3f'%(rmse))
#画出散点图
x = [row[0] for row in dataset]
y = [row[1] for row in dataset]
print(len(pre_y))
plt.axis([0,6,0,6])
plt.plot(x,y,'bs')
plt.plot(x,pre_y,'ro')
plt.plot(x,pre_y,'r-')
plt.grid()
plt.show()
【结果】