一、简单额线性回归
解决回归问题
思想简单、实现容易
许多强大的非线性模型的基础
结果具有很好的可解释性
蕴含机器学习中的很多重要思想
二、最小二乘法
三、简单线性回归的实现
SimpleLinearRegression.py
import numpy as np class SimpleLinearRegression1: def __init__(self): """初始化Simple Linear Regression 模型""" self.a_ = None self.b_ = None def fit(self, x_train, y_train): """根据训练数据集x_train,y_train训练Simple Linear Regression模型""" assert x_train.ndim == 1, \ "Simple Linear Regressor can only solve single feature training data." assert len(x_train) == len(y_train), \ "the size of x_train must be equal to the size of y_train" x_mean = np.mean(x_train) y_mean = np.mean(y_train) num = 0.0 d = 0.0 for x, y in zip(x_train, y_train): num += (x - x_mean) * (y - y_mean) d += (x - x_mean) ** 2 self.a_ = num / d self.b_ = y_mean - self.a_ * x_mean return self def predict(self, x_predict): """给定待预测数据集x_predict,返回表示x_predict的结果向量""" assert x_predict.ndim == 1, \ "Simple Linear Regressor can only solve single feature training data." assert self.a_ is not None and self.b_ is not None, \ "must fit before predict!" return np.array([self._predict(x) for x in x_predict]) def _predict(self, x_single): """给定单个待预测数据x,返回x的预测结果值""" return self.a_ * x_single + self.b_ def __repr__(self): return "SimpleLinearRegression1()"
四、向量化
SimpleLinearRegression.py
class SimpleLinearRegression2: def __init__(self): """初始化Simple Linear Regression模型""" self.a_ = None self.b_ = None def fit(self, x_train, y_train): """根据训练数据集x_train,y_train训练Simple Linear Regression模型""" assert x_train.ndim == 1, \ "Simple Linear Regressor can only solve single feature training data." assert len(x_train) == len(y_train), \ "the size of x_train must be equal to the size of y_train" x_mean = np.mean(x_train) y_mean = np.mean(y_train) self.a_ = (x_train - x_mean).dot(y_train - y_mean) / (x_train - x_mean).dot(x_train - x_mean) self.b_ = y_mean - self.a_ * x_mean return self def predict(self, x_predict): """给定待预测数据集x_predict,返回表示x_predict的结果向量""" assert x_predict.ndim == 1, \ "Simple Linear Regressor can only solve single feature training data." assert self.a_ is not None and self.b_ is not None, \ "must fit before predict!" return np.array([self._predict(x) for x in x_predict]) def _predict(self, x_single): """给定单个待预测数据x_single,返回x_single的预测结果值""" return self.a_ * x_single + self.b_ def __repr__(self): return "SimpleLinearRegression2()"
五、衡量线性回归法的指标:MSE、RMS 和 MAE
metrics.py
import numpy as np from math import sqrt def accuracy_score(y_true, y_predict): """计算y_true和y_predict之间的准确率""" assert len(y_true) == len(y_predict), \ "the size of y_true must be equal to the size of y_predict" return np.sum(y_true == y_predict) / len(y_true) def mean_squared_error(y_true, y_predict): """计算y_true和y_predict之间的MSE""" assert len(y_true) == len(y_predict), \ "the size of y_true must be equal to the size of y_predict" return np.sum((y_true - y_predict)**2) / len(y_true) def root_mean_squared_error(y_true, y_predict): """计算y_true和y_predict之间的RMSE""" return sqrt(mean_squared_error(y_true, y_predict)) def mean_absolute_error(y_true, y_predict): """计算y_true和y_predict之间的MAE""" return np.sum(np.absolute(y_true - y_predict)) / len(y_true)
我写的文章只是我自己对bobo老师讲课内容的理解和整理,也只是我自己的弊见。bobo老师的课 是慕课网出品的。欢迎大家一起学习。