根据一元线性回归的公式,自己计算
#coding=utf-8
import pandas as pd
from pandas import Series,DataFrame
import random
import numpy as np
import warnings
warnings.filterwarnings("ignore")
# training data
x_train = [2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7]
y_train = [5.6,4.8,5.3,5.7,6.2,5.9,6.4,6.1,6.2,6.7,6.4,6.7,7.1,7.3,6.9,6.9,7.2,7.5,7.8,7.8,8.9,9.2,8.5,8.7]
def GetParam(x,y,out):
#print(x)
#print(y)
#out[0] = 10
#out[1] = 20
n = len(x)#计算出n
#print(n)
#开始根据规则计算
x2 = [a*b for a, b in zip(x_train,x_train)]
xy = [a*b for a, b in zip(x_train,y_train)]
#print(x2)
#print(xy)
xs = sum(x)
ys = sum(y)
x2s = sum(x2)
xys = sum(xy)
#print(ys)
a = n*x2s - xs*xs
#print(a)
b = x2s*ys-xs*xys
#print(b)
out[0] = b/a
b = n*xys-xs*ys
out[1] = b/a
Param=[0,0]
GetParam(x_train,y_train,Param)
print(Param)
最终打印信息:
[4.05404761904763, 0.6342857142857112]
使用sklearn验证
from sklearn import datasets, linear_model
regr = linear_model.LinearRegression()
t = np.array(x_train)
t = np.array([x_train]).T
regr.fit(t, y_train) #train model
print(regr.intercept_)
print(regr.coef_)
最终结果:
4.05404761905
[ 0.63428571]
可见,自己的运算正确!