【Task4 Linear Regression(多变量)】(2天)
- 波士顿房产数据(完整数据)
- 实现多变量(手写代码)
import numpy as np
x=[[1,10,1],[1,20,1],[1,30,1],[1,30,2],[1,70,3],[1,70,2]]
x=np.mat(x)
print(x)
print(x[1])
print(x.T[1])#取单个x特征
y=[0.8,1.2,2.2,2.5,5.5,5.2]
h=lambda th0,th1,th2,x:th0+th1x[1]+th2x[2]
print(h(1,1,2,[1,10,1]))
y=np.mat(y).T#y转换为列向量
th=np.mat([0.1,0.2,0.3]).T#thta转为列向量
y_th0=lambda th,x,y:np.sum(xth-y)
print(y_th0(th,x,y))
y_th1=lambda th,x,y:np.sum(x.T[1](xth-y))
print(y_th1(th,x,y))
y_th2=lambda th,x,y:np.sum(x.T[2](xth-y))
y_th2(th,x,y)
lr=0.0001
for _ in range(100000):
th0=float(th[0])-lry_th0(th,x,y)
th1=float(th[1])-lry_th1(th,x,y)
th2=float(th[2])-lry_th2(th,x,y)
th=np.mat([[th0],[th1],[th2]])
print(th)
th=(x.Tx).Ix.T*y
th - 数据标准化(手写代码)
ss=StandardScaler()
ss.fit(x_train.y_train) - 网格搜索调参
5 from sklearn.linear_model import LinearRegression对比
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import warnings
import sklearn
from sklearn.linear_model import LinearRegression, LassoCV, RidgeCV, ElasticNetCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.linear_model.coordinate_descent import ConvergenceWarning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
path = ‘E:\AI10\housing.data’
fd = pd.read_csv(path,header=None,names=names)
fd.head()
x, y = np.split(data, (13,), axis=1)
print (x[0:5])
y = y.ravel()
models = [
Pipeline([
(‘ss’, StandardScaler()),
(‘poly’, PolynomialFeatures()),
(‘linear’, RidgeCV(alphas=np.logspace(-3,1,20)))
])
]
parameters = {
“linear__fit_intercept”: [True, False]
}
model = GridSearchCV(models[t], param_grid=parameters,cv=5, n_jobs=1)
model.fit(x_train, y_train)
print ("%s算法:最优参数:" % titles[t],model.best_params_)
print ("%s算法:R值=%.3f" % (titles[t], model.best_score_))