# -*- coding:utf-8 -*-
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
def runplt(x=1):
plt.figure(x)
# 因为默认显示英文,为了显示中文要修改显示字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# --------------------------------------------
plt.title(r'披萨的尺寸价格关系曲线') # 标题
plt.xlabel(u'尺寸(diameter)') # x轴
plt.ylabel(u'价格(cost)') # y轴
plt.axis([0, 25, 0, 25]) # 坐标轴刻度范围
plt.grid(True) # 网格
return plt
plt = runplt(1)
X = [[1], [8], [10], [14], [18]]
y = [[7], [9], [13], [17.5], [18]]
# plt.plot(x,y,format_string,**kwargs)
# x轴数据,y轴数据,format_string控制曲线的格式字串
# format_string 由颜色字符,风格字符,和标记字符拼接为字符串
plt.plot(X, y, 'r:.')
# plt.show()
# 创建并拟合模型
model = LinearRegression() # 创建一个线性回归模型
model.fit(X, y) # 训练
print('预测一张12英寸匹萨价格:$%.2f' % model.predict(np.array([12]).reshape(-1, 1))[0]) # 进行预测
plt = runplt(2)
plt.plot(X, y, 'r:.')
X2 = [[0], [25]]
model = LinearRegression() # 建立一个线性回归
model.fit(X, y)
y2 = model.predict(X2) # 预测结果 (注意:predict_proba 是预测概率)
plt.plot(X, y, 'k.')
plt.plot(X2, y2, 'g-')
plt.text(10, 21, r'一元线性回归', fontdict={'size': 16, 'color': 'g'})
# plt.show()
# 残差预测值
plt = runplt(3)
yr = model.predict(X)
for idx, x in enumerate(X):
plt.plot([x, x], [y[idx], yr[idx]], 'r-') # 绘制预测值到真实值之间的误差
plt.text(10, 21, r'预测残差值', fontdict={'size': 16, 'color': 'g'})
# plt.show()
print('残差平方和: %.2f' % np.mean((model.predict(X) - y) ** 2))
X = [[6, 2], [8, 1], [10, 0], [14, 2], [18, 0]]
y = [[7], [9], [13], [17.5], [18]] # 重新定义数据
model = LinearRegression() # 重新建立一个线性回归模型
model.fit(X, y) # 训练
X_test = [[8, 2], [9, 0], [11, 2], [16, 2], [12, 0]]
y_test = [[11], [8.5], [15], [18], [11]]
predictions = model.predict(X_test) # 测试预测结果
print(predictions)
for i, prediction in enumerate(predictions):
print('目标为%s的概率(Predicted): %s ' % (y_test[i], prediction))
print('模型拟合水平(R-squared): %.2f' % model.score(X_test, y_test))
X_train = [[6], [8], [10], [14], [18]]
y_train = [[7], [9], [13], [17.5], [18]]
X_test = [[6], [8], [11], [16]]
y_test = [[8], [12], [15], [18]]
# 建立线性回归,并用训练的模型绘图
regressor = LinearRegression()
regressor.fit(X_train, y_train)
xx = np.linspace(0, 26, 100) # 生成线性数据
yy = regressor.predict(xx.reshape(xx.shape[0], 1)) # 预测值
plt = runplt(4)
plt.plot(X_train, y_train, 'k.') # 显示用于训练的数据
plt.plot(xx, yy, 'b-') # 显示预测值
quadratic_featurizer = PolynomialFeatures(degree=2) # 2次线性回归
X_train_quadratic = quadratic_featurizer.fit_transform(X_train)
X_test_quadratic = quadratic_featurizer.transform(X_test) # 标准化
regressor_quadratic = LinearRegression() # 建立二次线性回归模型
regressor_quadratic.fit(X_train_quadratic, y_train)
xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0], 1))
plt.plot(xx, regressor_quadratic.predict(xx_quadratic), 'r-') # 绘制出预测曲线
plt.text(10, 21, r'1-2次线性回归训练模型绘图', fontdict={'size': 16, 'color': 'g'})
# plt.show()
print(X_train)
print(X_train_quadratic)
print(X_test)
print(X_test_quadratic)
print('一次拟合水平1 r-squared:', regressor.score(X_test, y_test))
print('二次拟合水平2 r-squared:', regressor_quadratic.score(X_test_quadratic, y_test))
plt = runplt(5)
plt.plot(X_train, y_train, 'k.')
quadratic_featurizer = PolynomialFeatures(degree=2)
X_train_quadratic = quadratic_featurizer.fit_transform(X_train)
X_test_quadratic = quadratic_featurizer.transform(X_test)
regressor_quadratic = LinearRegression()
regressor_quadratic.fit(X_train_quadratic, y_train)
xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0], 1))
plt.plot(xx, regressor_quadratic.predict(xx_quadratic), 'r-')
cubic_featurizer = PolynomialFeatures(degree=3)
X_train_cubic = cubic_featurizer.fit_transform(X_train)
X_test_cubic = cubic_featurizer.transform(X_test)
regressor_cubic = LinearRegression()
regressor_cubic.fit(X_train_cubic, y_train)
xx_cubic = cubic_featurizer.transform(xx.reshape(xx.shape[0], 1))
plt.plot(xx, regressor_cubic.predict(xx_cubic))
plt.text(10, 21, r'2-3元线性回归训练模型绘图', fontdict={'size': 16, 'color': 'g'})
print(X_train_cubic)
print(X_test_cubic)
print('二次拟合水平2 r-squared:', regressor_quadratic.score(X_test_quadratic, y_test))
print('三次拟合水平3 r-squared:', regressor_cubic.score(X_test_cubic, y_test))
plt = runplt(6)
plt.plot(X_train, y_train, 'k.')
quadratic_featurizer = PolynomialFeatures(degree=2)
X_train_quadratic = quadratic_featurizer.fit_transform(X_train)
X_test_quadratic = quadratic_featurizer.transform(X_test)
regressor_quadratic = LinearRegression()
regressor_quadratic.fit(X_train_quadratic, y_train)
xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0], 1))
plt.plot(xx, regressor_quadratic.predict(xx_quadratic), 'r-')
seventh_featurizer = PolynomialFeatures(degree=7)
X_train_seventh = seventh_featurizer.fit_transform(X_train)
X_test_seventh = seventh_featurizer.transform(X_test)
regressor_seventh = LinearRegression()
regressor_seventh.fit(X_train_seventh, y_train)
xx_seventh = seventh_featurizer.transform(xx.reshape(xx.shape[0], 1))
plt.plot(xx, regressor_seventh.predict(xx_seventh))
plt.text(10, 21, r'2-7元线性回归训练模型绘图', fontdict={'size': 16, 'color': 'g'})
print('2 r-squared', regressor_quadratic.score(X_test_quadratic, y_test))
print('七次拟合水平7 r-squared', regressor_seventh.score(X_test_seventh, y_test))
plt = runplt(7)
plt.plot(X_train, y_train, 'ko:')
for x in range(10):
featurizer = PolynomialFeatures(degree=x)
X_train_1to10 = featurizer.fit_transform(X_train)
X_test_1to10 = featurizer.transform(X_test)
regressor_1to10 = LinearRegression()
regressor_1to10.fit(X_train_1to10, y_train)
xx_1to10 = featurizer.transform(xx.reshape(xx.shape[0], 1))
plt.plot(xx, regressor_1to10.predict(xx_1to10), label=str(x))
plt.legend(loc='best', bbox_to_anchor=(1.1,1.0))
plt.show()
sklearn的线性回归和matpoltlib的pyplot一起学
最新推荐文章于 2023-06-01 16:04:19 发布