学习回顾之线性回归和多项式

最新推荐文章于 2023-03-22 17:39:30 发布

hola173841439

最新推荐文章于 2023-03-22 17:39:30 发布

阅读量384

点赞数

分类专栏：机器学习文章标签：机器学习

本文链接：https://blog.csdn.net/hola173841439/article/details/109033562

版权

机器学习专栏收录该内容

2 篇文章 0 订阅

订阅专栏

学习回顾

model = LinearRegression()  #线性回归
poly = PolynomialFeatures(orders[index])  #定义多项式
X = poly.fit_transform(x)    #将原本数据集的每一种特征转化为多项式的特征
print("X:",X)
model.fit(X, y)

y_pred = model.predict(X)

假设degree = 2
现在有（a，b）两个特征，使用degree=2的二次多项式则为 $1，a， a^2，ab，b，b^2)$
若就x一个特征 $1,x,x^2)$
线性回归：
$y=w_1*x_1+w_2*x_2+...+w_n*x_n$
然后用LinearRegression求多项式则需要用多项式把 $x$ 变成 $1,x,x^2,....)$ 这种形式替换成 $x_1,x_2,x_3,...,x_n$ 就行了

from os import path
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split

## Part 1
## About PolynomialFeatures
def about_PolynomialFeatures():
    '''
    使用PolynomialFeatures 这个类可以进行特征的构造，构造的方式就是特征与特征相乘（自己与自己，自己与其他人），这种方式叫做使用多项式的方式。
    例如：有 a、b 两个特征，那么它的 2 次多项式的次数为 [1,a,b,a^2,ab,b^2]。

    PolynomialFeatures 这个类有 3 个参数：
    degree：控制多项式的次数；
    interaction_only：默认为 False，如果指定为 True，那么就不会有特征自己和自己结合的项，组合的特征中没有 a^2 和 b^2；
    include_bias：默认为 True 。如果为 True 的话，那么结果中就会有 0 次幂项，即全为 1 这一列。

    对数据：
    [[0 1]
     [2 3]
     [4 5]]
    输出PolynomialFeatures所有参数组合下的转换结果，比如：
    Degree = 2, interaction_only = True, include_bias = True
    [[ 1.  0.  1.  0.]
    [ 1.  2.  3.  6.]
    [ 1.  4.  5. 20.]]
    ...
    '''

    X = np.arange(6).reshape(6,1)
    print(X)
    degree_list = [2, 3]
    interaction_only_list = [True, False]
    include_bias_list = [True, False]
    #start your code here
    # pass
    for deg in range(len(degree_list)):
        for i in range(len(interaction_only_list)):
            for j in range(len(include_bias_list)):
                print("degree=",degree_list[deg])
                print("interaction_only=",interaction_only_list[i],"include_bias=",include_bias_list[j])
                poly = PolynomialFeatures(degree=degree_list[deg],
                                          interaction_only=interaction_only_list[i],
                                          include_bias=[j])
                res = poly.fit_transform(X)
                print(res)
    #end your code here

    
#residual sum of squares 
def error(y, y_pred):
    return sum((y_pred-y)**2)


'''
对Web应用来说，什么时候需要增加部署资源是个决策问题，
如果增加不及时，影响用户体验，如果太早增加，则浪费资金。
假设目前资源能够应对的服务是每小时100,000 个请求，我们需要预测什么时候应该购买新的资源。

假设最近1个月的数据保存在文件web_traffic.tsv中（(tsv because it contains tab separated values).
每行数据表示时间和点击数，如果数据不存在，表示为nan.
'''
def load_data():
    file_name = path.dirname(__file__) + "/web_traffic.tsv"
    data = sp.genfromtxt(file_name, delimiter="\t")
    print(data)
    print(data.shape)
    
    #Preprocessing and cleaning the data 
    x = data[:,0]
    y = data[:,1]
    x = x[~sp.isnan(y)]   #去掉nan值
    y = y[~sp.isnan(y)]

    return x, y

def show_data(x, y, is_show):
    #Visualize the data
    plt.scatter(x,y)
    plt.title("Web traffic over the last month")
    plt.xlabel("Time")
    plt.ylabel("Hits/hour")
    plt.xticks([w*7*24 for w in range(10)], \
            ['week %i'%w for w in range(10)])
    plt.autoscale(tight=True)
    plt.grid()
    # plt.show(is_show)
    # my code
    if is_show == True:
        plt.show()
    # print(is_show)
    # plt.show(is_show)

    
## Part 2
## About Simple LinearRegression
def simple_linear_regression(X, y):
    '''
    使用sklearn的LinearRegression进行简单线性回归
    输出residual sum of squares，R-squared及模型的系数
    画出拟合的曲线
    '''
    if X.ndim == 1:
        x = X.reshape(-1,1)
    else:
        x = X
    show_data(x, y, False)
    #start your code here
    model = LinearRegression().fit(x,y)
    y_pred = model.predict(x)
    # print("residual sum of squares:",error(y,y_pred))
    #
    # print('模型系数：',)
    # pass
    #end your code here
    # cof系数， intercept偏置
    print(error(y, y_pred), model.score(x,y), model.intercept_, model.coef_)
    plt.plot(x, y_pred, color = 'blue', linewidth=3)
    plt.show()

def compare_LinearRegression(X, y):
    '''
    使用sklearn的LinearRegression进行1、2和3阶多项式回归
    Use PolynomialFeatures to generate polynomial and interaction features
    输出residual sum of squares，R-squared及模型的系数
    画出拟合的曲线
    '''
    if X.ndim == 1:
        x = X.reshape(-1,1)
    else:
        x = X
    show_data(x, y, False)
    colors = ['blue','green','red']
    orders = [1, 2, 3]
    for index in range(len(orders)):
        #start your code here
        #pass
        model = LinearRegression()  #线性回归
        poly = PolynomialFeatures(orders[index])  #定义多项式
        X = poly.fit_transform(x)    #将原本数据集的每一种特征转化为多项式的特征
        print("X:",X)
        model.fit(X, y)

        y_pred = model.predict(X)
        # model.fit(x,y)
        # y_pred = model.predict(x)
        # X = model.fit_transform(x)
        # y_pred = model.predict(x)
        #end your code here
        print("Degree={},".format(orders[index]), error(y, y_pred), model.score(X,y), model.intercept_, model.coef_)
        plt.plot(x, y_pred, color=colors[index], linewidth=3)
    plt.show()

   

def polynomial_regression_in_scipy(x, y):
    '''
    使用scipy (numpy) 的polyfit方法进行1、2和3阶多项式回归
    see:
    https://numpy.org/doc/stable/reference/generated/numpy.polyfit.html
    输出residual sum of squares和模型的系数
    画出拟合的曲线
    '''
    #start your code here
    # pass
    print(x)
    # if x.ndim == 1:
    #     # x = x.reshape(-1, 1)
    #     # x = flatten(x)
    # else:
    #     x = x
    # print(x)
    show_data(x, y, False)
    colors = ['blue', 'green', 'red']
    orders = [1, 2, 3]
    for index in range(len(orders)):
        p, residuals, rank, singular_values, rcond = np.polyfit(x, y, orders[index], full=True)
        p1 = np.poly1d(p)
        y_pred = p1(x)
        print("Degree={},".format(orders[index]), residuals, p)
        plt.plot(x, y_pred, color=colors[index], linewidth=3)
    plt.show()
    #end your code here


if __name__ == "__main__":
    about_PolynomialFeatures()

    x, y = load_data()
    
    # show_data(x, y, True)

    # simple_linear_regression(x, y)

    compare_LinearRegression(x, y)

    polynomial_regression_in_scipy(x, y)
    # x = np.arange(1, 17, 1)
    # y = np.array(
    #     [4.00, 6.40, 8.00, 8.80, 9.22, 9.50, 9.70, 9.86, 10.00, 10.20, 10.32, 10.42, 10.50, 10.55, 10.58, 10.60])
    #
    # # 第一个拟合，自由度为3
    # z1 = np.polyfit(x, y, 3)
    # # 生成多项式对象
    # p1 = np.poly1d(z1)
    # print(z1)
    # print(p1)

数据集：
链接：https://pan.baidu.com/s/1zWnKrNnO6l0XhbTLTfAQPg
提取码：mojm
复制这段内容后打开百度网盘手机App，操作更方便哦

hola173841439

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
学习回顾之线性回归和多项式

学习回顾model = LinearRegression() #线性回归poly = PolynomialFeatures(orders[index]) #定义多项式X = poly.fit_transform(x) #将原本数据集的每一种特征转化为多项式的特征print("X:",X)model.fit(X, y)y_pred = model.predict(X)假设degree = 2现在有（a，b）两个特征，使用degree=2的二次多项式则为（1，a，a2，ab，b，b
复制链接

扫一扫