代码实现
import numpy as np
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, ElasticNetCV
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
# 处理warning
from sklearn.exceptions import ConvergenceWarning
import matplotlib as mpl
import warnings
def xss(y, y_hat):
y = y.ravel()
y_hat = y_hat.ravel()
# Version 1
tss = ((y - np.average(y)) ** 2).sum()
rss = ((y_hat - y) ** 2).sum()
ess = ((y_hat - np.average(y)) ** 2).sum()
r2 = 1 - rss / tss
# print 'RSS:', rss, '\t ESS:', ess
# print 'TSS:', tss, 'RSS + ESS = ', rss + ess
tss_list.append(tss)
rss_list.append(rss)
ess_list.append(ess)
ess_rss_list.append(rss + ess)
# Version 2
# tss = np.var(y)
# rss = np.average((y_hat - y) ** 2)
# r2 = 1 - rss / tss
corr_coef = np.corrcoef(y, y_hat)[0, 1]
return r2, corr_coef
if __name__ == "__main__":
# 设置将某类别的警告忽视
warnings.filterwarnings(action='ignore', category=ConvergenceWarning)
# 设置随机数种子,在试验模型中使用,但在工作中很少使用
np.random.seed(0)
# 设置输出样式-参数linewidth设置显示宽度
np.set_printoptions(linewidth=1000)
# 设置9个点
N = 9
# 设置x,0-6等间隔的数加上高斯噪声
x = np.linspace(0, 6, N) + np.random.randn(N)
x = np.sort(x)
# 设置y
y = x**2 - 4*x - 3 + np.random.randn(N)
# 设置成列向量,将行变成列
x.shape = -1, 1
y.shape = -1, 1
models = [Pipeline([
# PolynomialFeatures用来生成关于x的矩阵,其中degree表示多项式的次数,include_bias默认为True表示会包含1
('poly', PolynomialFeatures()),
# fit_intercept = False表示不去计算截距项,避免与上面的include_bias=True重复
('linear', LinearRegression(fit_intercept=False))]),
Pipeline([
('poly', PolynomialFeatures()),
# RidgeCV封装了网格搜索调优,相当于RidgeCV + GridsearchCV,alphas表示正则项
('linear', RidgeCV(alphas=np.logspace(-3, 2, 50), fit_intercept=False))]),
Pipeline([
('poly', PolynomialFeatures()),