1.1. Linear Models — scikit-learn 1.0.2 documentation
多项式回归:实际上是做了特征维度的扩展。
>>> from sklearn.preprocessing import PolynomialFeatures
>>> import numpy as np
>>> X = np.arange(6).reshape(3, 2)
>>> X
array([[0, 1],
[2, 3],
[4, 5]])
>>> poly = PolynomialFeatures(degree=2)
>>> poly.fit_transform(X)
array([[ 1., 0., 1., 0., 0., 1.],
[ 1., 2., 3., 4., 6., 9.],
[ 1., 4., 5., 16., 20., 25.]])
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# 文件名: polynomial_regression.py
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
m = 100
X = 6 * np.random.rand(m, 1) - 3
y = 0.5 * X ** 2 + X + 2 + np.random.randn(m, 1)
X_train = X[:80]
X_test = X[80:]
y_train = y[:80]
y_test = y[80:]
# plt.plot(X, y, 'b.')
d = {1: 'g-', 2: 'r+', 15: 'y*'}
for i in range(1,20):
poly_features = PolynomialFeatures(degree=i, include_bias=True)
X_poly_train = poly_features.fit_transform(X_train)
X_poly_test = poly_features.fit_transform(X_test)
# print(X_train[0])
# print(X_poly_train[0])
# print(X_train.shape)
# print(X_poly_train.shape)
lin_reg = LinearRegression(fit_intercept=False)
lin_reg.fit(X_poly_train, y_train)
# print(lin_reg.intercept_, lin_reg.coef_)
y_train_predict = lin_reg.predict(X_poly_train)
y_test_predict = lin_reg.predict(X_poly_test)
# plt.plot(X_poly_train[:, 1], y_train_predict, d[i])
# print(mean_squared_error(y_train, y_train_predict))
# print(mean_squared_error(y_test, y_test_predict))
plt.plot(i, mean_squared_error(y_train, y_train_predict), 'r+')
plt.plot(i, mean_squared_error(y_test, y_test_predict), 'b*')
plt.show()