二元泰勒展开式
多元泰勒展开式
海森矩阵与泰勒展开式
定理:
设n是一个正整数.如果定义在一个包含
∂
\partial
∂的区间上的函数f在a点处n+1次可导,那么对于这个区间上的任意x,都有:
f
(
x
)
=
f
(
a
)
+
f
1
(
a
)
1
!
(
x
−
a
)
+
f
2
(
a
)
2
!
(
x
−
a
)
2
+
.
.
.
f
n
(
a
)
n
!
(
x
−
a
)
n
+
R
n
(
x
)
f(x) = f(a)+\frac{f^1(a)}{1!}(x-a)+\frac{f^2(a)}{2!}(x-a)^2+...\frac{f^n(a)}{n!}(x-a)^n+ R_n(x)
f(x)=f(a)+1!f1(a)(x−a)+2!f2(a)(x−a)2+...n!fn(a)(x−a)n+Rn(x)
其中多项式称为函数在a处的泰勒展开式,剩余的 R n ( x ) R_n(x) Rn(x)是泰勒公式的余项,是 ( x − a ) n (x-a)^n (x−a)n的高阶无穷小
凸函数的性质:
- 凸函数的局部极小值就是全局最小值
- 凸函数Hessian 矩阵半正定
- Q为半正定对称阵, f ( x ) = X T Q X 为 凸 函 数 f(x)=X^TQX为凸函数 f(x)=XTQX为凸函数
-
f
(
x
)
f(x)
f(x)为凸函数,
f
(
E
(
x
)
)
<
=
E
(
f
(
x
)
)
f(E(x))<=E(f(x))
f(E(x))<=E(f(x))(Jesson不等式)其中E(x)是x的期望
凸函数举例
指数函数 f ( x ) = e x f(x)=e^x f(x)=ex
幂函数 f ( x ) = x a f(x)=x^a f(x)=xa
负对数函数 f ( x ) = − l o g x f(x)=-logx f(x)=−logx
负商函数 f ( x ) = x l n x f(x)=xlnx f(x)=xlnx
范数 f ( x ) = ∣ ∣ x ∣ ∣ f(x)=||x|| f(x)=∣∣x∣∣
最大值函数 f ( x ) = m a x ( x 1 , x 2 , x 3 , . . . x n ) f(x)=max(x_1,x_2,x_3,...x_n) f(x)=max(x1,x2,x3,...xn)
证明:逐点最大值仍为凸函数
若f1,f2均为凸函数,定义函数f:
f
(
x
)
=
m
a
x
f
1
(
x
)
,
f
2
(
x
)
f(x)=max{f_1(x),f_2(x)}
f(x)=maxf1(x),f2(x)
则函数f为凸函数
若一个函数满足
(1)定义域是凸集
(2)
f
(
α
1
x
1
+
α
2
x
2
)
<
=
α
1
f
(
x
1
)
+
α
2
f
(
x
2
)
f(\alpha_1x_1+\alpha_2x_2)<=\alpha_1f(x_1)+\alpha_2f(x_2)
f(α1x1+α2x2)<=α1f(x1)+α2f(x2)
其中
∑
α
i
=
1
,
α
i
>
=
0
\sum\alpha_i=1,\alpha_i>=0
∑αi=1,αi>=0那么该函数为凸函数
示例:
f
(
x
1
+
x
2
2
)
<
=
f
(
x
1
)
+
f
(
x
2
)
2
f(\frac{x_1+x_2}{2})<=\frac{f(x_1)+f(x_2)}{2}
f(2x1+x2)<=2f(x1)+f(x2)
多项式回归
import numpy as np
import os
import numpy.random as rnd
#画图
%matplotlib inline
import matplotlib.pyplot as plt
#随机种子,使随机数不变
np.random.seed(42)
#保存图像
PROJECT_ROOT_DIR = '.'
MODEL_ID = 'linear_models'
#定义一个保存图像的函数
def save_fig(fig_id,tight_layout = True):
path = os.path.join(PROJECT_ROOT_DIR,"images",MODEL_ID,fig_id+".png")
print("Saving figure",fig_id)
plt.savefig(path,format = 'png',dpi =300)
#过滤警告
import warnings
warnings.filterwarnings(action = "ignore",message="internal gelsd")
m=100
X = 6*np.random.rand(m,1) -3#创建数据集的特征部分
y = 0.5*X**2 + X +2 + np.random.randn(m,1)#创建数据集的标签部分
plt.plot(X,y,"b.")#把数据点画出来
plt.xlabel("$x_1$",fontsize = 18)
plt.ylabel("$y$",rotation = 0,fontsize = 18)
plt.axis([-3,3,0,10])
save_fig('quadratic_data_plot')#保存图片
plt.show()
X_new = np.linspace(-3,3,100).reshape(100,1)
X_new_poly = poly_features.transform(X_new)
y_new = lin_reg.predict(X_new_poly)
plt.plot(X,y,"b.")
plt.plot(X_new,y_new,"r-",linewidth = 2, label="Predictions")
plt.xlabel("$x_1$",fontsize = 18)
plt.ylabel("$y$",rotation = 0,fontsize = 18)
plt.legend(loc = "upper left",fontsize = 14)
plt.axis = ([-3,3,0,10])
save_fig("quadratic_predictions_plot")
plt.show()
from sklearn.preprocessing import StandardScaler #从预处理包里导入标准化处理模块
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline #导入流水线作业
for style,width,degree in (("g-",1,300),("b--",2,2),("r-+",2,1)): #遍历三种画图方式
polybig_features = PolynomialFeatures(degree = degree, include_bias = False) #初始化多项式特征对象
std_scaler = StandardScaler() #数据标准化
mm_scaler = MinMaxScaler()
lin_reg = LinearRegression() #初始化线性分类器
polynomial_regression = Pipeline([
("poly_features",polybig_features), #pipeline第一步 处理特征
("std_scaler",std_scaler), #标准化数据
("lin_reg",lin_reg), #初始化线性回归器
])
polynomial_regression.fit(X,y) #训练模型
y_newbig = polynomial_regression.predict(X_new) #预测数据
plt.plot(X_new,y_newbig,style,label = str(degree),linewidth = width)
plt.plot(X,y,"b.",linewidth = 3) #画原始数据点
plt.legend(loc = "upper left")
plt.xlabel("$x_1$",fontsize = 18)
plt.ylabel("$y$",rotation = 0,fontsize = 18)
plt.axis = ([-3 , 3, 0, 10])
save_fig("high_degree_predictions_plot")
plt.show()
from sklearn.metrics import mean_squared_error #导入均方误差的衡量标准
from sklearn.model_selection import train_test_split #从模型选择包里导入数据集切分模块
def plot_learning_curves(model,X,y):
X_train,X_val,y_train,y_val = train_test_split(X,y,test_size=0.2,random_state = 10)
train_errors = []#收集训练误差
val_errors = []#收集验证误差
for m in range(1,len(X_train)):
model.fit(X_train[:m],y_train[:m])#拟合数据
y_train_predict = model.predict(X_train[:m])#预测训练集的值
y_val_predict = model.predict(X_val)#预测验证集的值
train_errors.append(mean_squared_error(y_train[:m],y_train_predict))#计算真实值和训练集预测值之间的均方误差并收集
val_errors.append(mean_squared_error(y_val,y_val_predict))#计算真实值和验证集预测值之间的均方误差并收集
plt.plot(np.sqrt(train_errors),"r-+",linewidth = 2,label = "train")
plt.plot(np.sqrt(val_errors),"b-",linewidth=3,label="val")
plt.legend(loc="upper right",fontsize=14)
plt.xlabel("Training set size",fontsize=14)
plt.ylabel("RMSE",fontsize=14)
lin_reg = LinearRegression() #初始化线性回归器
plot_learning_curves(lin_reg,X,y)
plt.axis([0,80,0,3])
save_fig("underfitting_learning_curves_plot")
plt.show()