学习回顾
model = LinearRegression() #线性回归
poly = PolynomialFeatures(orders[index]) #定义多项式
X = poly.fit_transform(x) #将原本数据集的每一种特征转化为多项式的特征
print("X:",X)
model.fit(X, y)
y_pred = model.predict(X)
假设degree = 2
现在有(a,b)两个特征,使用degree=2的二次多项式则为
(
1
,
a
,
a
2
,
a
b
,
b
,
b
2
)
(1,a, a^2,ab,b,b^2)
(1,a,a2,ab,b,b2)
若就x一个特征
(
1
,
x
,
x
2
)
(1,x,x^2)
(1,x,x2)
线性回归:
y
=
w
1
∗
x
1
+
w
2
∗
x
2
+
.
.
.
+
w
n
∗
x
n
y=w_1*x_1+w_2*x_2+...+w_n*x_n
y=w1∗x1+w2∗x2+...+wn∗xn
然后用LinearRegression求多项式则需要用多项式把
x
x
x变成
(
1
,
x
,
x
2
,
.
.
.
.
)
(1,x,x^2,....)
(1,x,x2,....)这种形式替换成
x
1
,
x
2
,
x
3
,
.
.
.
,
x
n
x_1,x_2,x_3,...,x_n
x1,x2,x3,...,xn就行了
from os import path
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
## Part 1
## About PolynomialFeatures
def about_PolynomialFeatures():
'''
使用PolynomialFeatures 这个类可以进行特征的构造,构造的方式就是特征与特征相乘(自己与自己,自己与其他人),这种方式叫做使用多项式的方式。
例如:有 a、b 两个特征,那么它的 2 次多项式的次数为 [1,a,b,a^2,ab,b^2]。
PolynomialFeatures 这个类有 3 个参数:
degree:控制多项式的次数;
interaction_only:默认为 False,如果指定为 True,那么就不会有特征自己和自己结合的项,组合的特征中没有 a^2 和 b^2;
include_bias:默认为 True 。如果为 True 的话,那么结果中就会有 0 次幂项,即全为 1 这一列。
对数据:
[[0 1]
[2 3]
[4 5]]
输出PolynomialFeatures所有参数组合下的转换结果,比如:
Degree = 2, interaction_only = True, include_bias = True
[[ 1. 0. 1. 0.]
[ 1. 2. 3. 6.]
[ 1. 4. 5. 20.]]
...
'''
X = np.arange(6).reshape(6,1)
print(X)
degree_list = [2, 3]
interaction_only_list = [True, False]
include_bias_list = [True, False]
#start your code here
# pass
for deg in range(len(degree_list)):
for i in range(len(interaction_only_list)):
for j in range(len(include_bias_list)):
print("degree=",degree_list[deg])
print("interaction_only=",interaction_only_list[i],"include_bias=",include_bias_list[j])
poly = PolynomialFeatures(degree=degree_list[deg],
interaction_only=interaction_only_list[i],
include_bias=[j])
res = poly.fit_transform(X)
print(res)
#end your code here
#residual sum of squares
def error(y, y_pred):
return sum((y_pred-y)**2)
'''
对Web应用来说,什么时候需要增加部署资源是个决策问题,
如果增加不及时,影响用户体验,如果太早增加,则浪费资金。
假设目前资源能够应对的服务是每小时100,000 个请求,我们需要预测什么时候应该购买新的资源。
假设最近1个月的数据保存在文件web_traffic.tsv中((tsv because it contains tab separated values).
每行数据表示时间和点击数,如果数据不存在,表示为nan.
'''
def load_data():
file_name = path.dirname(__file__) + "/web_traffic.tsv"
data = sp.genfromtxt(file_name, delimiter="\t")
print(data)
print(data.shape)
#Preprocessing and cleaning the data
x = data[:,0]
y = data[:,1]
x = x[~sp.isnan(y)] #去掉nan值
y = y[~sp.isnan(y)]
return x, y
def show_data(x, y, is_show):
#Visualize the data
plt.scatter(x,y)
plt.title("Web traffic over the last month")
plt.xlabel("Time")
plt.ylabel("Hits/hour")
plt.xticks([w*7*24 for w in range(10)], \
['week %i'%w for w in range(10)])
plt.autoscale(tight=True)
plt.grid()
# plt.show(is_show)
# my code
if is_show == True:
plt.show()
# print(is_show)
# plt.show(is_show)
## Part 2
## About Simple LinearRegression
def simple_linear_regression(X, y):
'''
使用sklearn的LinearRegression进行简单线性回归
输出residual sum of squares,R-squared及模型的系数
画出拟合的曲线
'''
if X.ndim == 1:
x = X.reshape(-1,1)
else:
x = X
show_data(x, y, False)
#start your code here
model = LinearRegression().fit(x,y)
y_pred = model.predict(x)
# print("residual sum of squares:",error(y,y_pred))
#
# print('模型系数:',)
# pass
#end your code here
# cof系数, intercept偏置
print(error(y, y_pred), model.score(x,y), model.intercept_, model.coef_)
plt.plot(x, y_pred, color = 'blue', linewidth=3)
plt.show()
def compare_LinearRegression(X, y):
'''
使用sklearn的LinearRegression进行1、2和3阶多项式回归
Use PolynomialFeatures to generate polynomial and interaction features
输出residual sum of squares,R-squared及模型的系数
画出拟合的曲线
'''
if X.ndim == 1:
x = X.reshape(-1,1)
else:
x = X
show_data(x, y, False)
colors = ['blue','green','red']
orders = [1, 2, 3]
for index in range(len(orders)):
#start your code here
#pass
model = LinearRegression() #线性回归
poly = PolynomialFeatures(orders[index]) #定义多项式
X = poly.fit_transform(x) #将原本数据集的每一种特征转化为多项式的特征
print("X:",X)
model.fit(X, y)
y_pred = model.predict(X)
# model.fit(x,y)
# y_pred = model.predict(x)
# X = model.fit_transform(x)
# y_pred = model.predict(x)
#end your code here
print("Degree={},".format(orders[index]), error(y, y_pred), model.score(X,y), model.intercept_, model.coef_)
plt.plot(x, y_pred, color=colors[index], linewidth=3)
plt.show()
def polynomial_regression_in_scipy(x, y):
'''
使用scipy (numpy) 的polyfit方法进行1、2和3阶多项式回归
see:
https://numpy.org/doc/stable/reference/generated/numpy.polyfit.html
输出residual sum of squares和模型的系数
画出拟合的曲线
'''
#start your code here
# pass
print(x)
# if x.ndim == 1:
# # x = x.reshape(-1, 1)
# # x = flatten(x)
# else:
# x = x
# print(x)
show_data(x, y, False)
colors = ['blue', 'green', 'red']
orders = [1, 2, 3]
for index in range(len(orders)):
p, residuals, rank, singular_values, rcond = np.polyfit(x, y, orders[index], full=True)
p1 = np.poly1d(p)
y_pred = p1(x)
print("Degree={},".format(orders[index]), residuals, p)
plt.plot(x, y_pred, color=colors[index], linewidth=3)
plt.show()
#end your code here
if __name__ == "__main__":
about_PolynomialFeatures()
x, y = load_data()
# show_data(x, y, True)
# simple_linear_regression(x, y)
compare_LinearRegression(x, y)
polynomial_regression_in_scipy(x, y)
# x = np.arange(1, 17, 1)
# y = np.array(
# [4.00, 6.40, 8.00, 8.80, 9.22, 9.50, 9.70, 9.86, 10.00, 10.20, 10.32, 10.42, 10.50, 10.55, 10.58, 10.60])
#
# # 第一个拟合,自由度为3
# z1 = np.polyfit(x, y, 3)
# # 生成多项式对象
# p1 = np.poly1d(z1)
# print(z1)
# print(p1)
数据集:
链接:https://pan.baidu.com/s/1zWnKrNnO6l0XhbTLTfAQPg
提取码:mojm
复制这段内容后打开百度网盘手机App,操作更方便哦