1 #局部加权线性回归
2 #交叉验证计算泛化误差最小点
3
4
5 importnumpy6 from matplotlib importpyplot as plt7
8
9 #待拟合不含噪声之目标函数
10 deforiFunc(x):11 y = numpy.exp(-x) * numpy.sin(10*x)12 returny13 #待拟合包含噪声之目标函数
14 def traFunc(x, sigma=0.03):15 y = oriFunc(x) +numpy.random.normal(0, sigma, numpy.array(x).size)16 returny17
18
19 #局部加权线性回归之实现
20 classLW(object):21
22 def __init__(self, xBound=(0, 3), number=500, tauBound=(0.001, 100), epsilon=1.e-3):23 self.__xBound = xBound #采样边界
24 self.__number = number #采样数目
25 self.__tauBound = tauBound #tau之搜索边界
26 self.__epsilon = epsilon #tau之搜索精度
27
28
29 defget_data(self):30 '''
31 根据目标函数生成待拟合数据32 '''
33 X = numpy.linspace(*self.__xBound, self.__number)34 oriY_ = oriFunc(X) #不含误差之响应
35 traY_ = traFunc(X) #包含误差之响应
36
37 self.X = numpy.vstack((X.reshape((1, -1)), numpy.ones((1, X.shape[0]))))38 self.oriY_ = oriY_.reshape((-1, 1))39 self.traY_ = traY_.reshape((-1, 1))40
41 returnself.X, self.oriY_, self.traY_42
43
44 def lw_fitting(self, tau=None):45 if not hasattr(self, "X"):46 self.get_data()47 if tau isNone:48 if hasattr(self, "bestTau"):49 tau =self.bestTau50 else:51 tau =self.get_tau()52
53 xList, yList =list(), list()54 for val in numpy.linspace(*self.__xBound, self.__number * 5):55 x = numpy.array([[val], [1]])56 theta = self.__fitting(x, self.X, self.traY_, tau)57 y =numpy.matmul(theta.T, x)58 xList.append(x[0, 0])59 yList.append(y[0, 0])60
61 resiList = list() #残差计算
62 for idx in range(self.__number):63 x = self.X[:, idx:idx+1]64 theta = self.__fitting(x, self.X, self.traY_, tau)65 y =numpy.matmul(theta.T, x)66 resiList.append(self.traY_[idx, 0] -y[0, 0])67
68 returnxList, yList, self.X[0, :].tolist(), resiList69
70
71 defshow(self):72 '''
73 绘图展示整体拟合情况74 '''
75 xList, yList, sampleXList, sampleResiList =self.lw_fitting()76 y2List =oriFunc(numpy.array(xList))77 fig = plt.figure(figsize=(8, 14))78 ax1 = plt.subplot(2, 1, 1)79 ax2 = plt.subplot(2, 1, 2)80
81 ax1.scatter(self.X[0, :], self.traY_[:, 0], c="green", alpha=0.7, label="samples with noise")82 ax1.plot(xList, y2List, c="red", lw=4, alpha=0.7, label="standard curve")83 ax1.plot(xList, yList, c="black", lw=2, linestyle="--", label="fitting curve")84 ax1.set(xlabel="$x$", ylabel="$y$")85 ax1.legend()86
87 ax2.scatter(sampleXList, sampleResiList, c="blue", s=10)88 ax2.set(xlabel="$x$", ylabel="$\epsilon$", title="residual distribution")89
90 plt.show()91 plt.close()92 fig.tight_layout()93 fig.savefig("lw.png", dpi=300)94
95
96 def __fitting(self, x, X, Y_, tau, epsilon=1.e-9):97 tmpX = X[0:1, :]98 tmpW = (-(tmpX - x[0, 0]) ** 2 / tau ** 2 / 2).reshape(-1)99 W =numpy.diag(numpy.exp(tmpW))100
101 item1 =numpy.matmul(numpy.matmul(X, W), X.T)102 item2 = numpy.linalg.inv(item1 + epsilon *numpy.identity(item1.shape[0]))103 item3 =numpy.matmul(numpy.matmul(X, W), Y_)104
105 theta =numpy.matmul(item2, item3)106
107 returntheta108
109
110 defget_tau(self):111 '''
112 交叉验证返回最优tau113 采用黄金分割法计算最优tau114 '''
115 if not hasattr(self, "X"):116 self.get_data()117
118 lowerBound, upperBound = self.__tauBound
119 lowerTau = self.__calc_lowerTau(lowerBound, upperBound)120 upperTau = self.__calc_upperTau(lowerBound, upperBound)121 lowerErr = self.__calc_generalErr(self.X, self.traY_, lowerTau)122 upperErr = self.__calc_generalErr(self.X, self.traY_, upperTau)123
124 while (upperTau - lowerTau) > self.__epsilon:125 if lowerErr >upperErr:126 lowerBound =lowerTau127 lowerTau =upperTau128 lowerErr =upperErr129 upperTau = self.__calc_upperTau(lowerBound, upperBound)130 upperErr = self.__calc_generalErr(self.X, self.traY_, upperTau)131 else:132 upperBound =upperTau133 upperTau =lowerTau134 upperErr =lowerErr135 lowerTau = self.__calc_lowerTau(lowerBound, upperBound)136 lowerErr = self.__calc_generalErr(self.X, self.traY_, lowerTau)137
138 self.bestTau = (upperTau + lowerTau) / 2
139 returnself.bestTau140
141
142 def __calc_generalErr(self, X, Y_, tau):143 generalErr =0144
145 for idx in range(X.shape[1]):146 tmpx = X[:, idx:idx+1]147 tmpy_ = Y_[idx:idx+1, :]148 tmpX = numpy.hstack((X[:, 0:idx], X[:, idx+1:]))149 tmpY_ = numpy.vstack((Y_[0:idx, :], Y_[idx+1:, :]))150
151 theta = self.__fitting(tmpx, tmpX, tmpY_, tau)152 tmpy =numpy.matmul(theta.T, tmpx)153 generalErr += (tmpy_[0, 0] - tmpy[0, 0]) ** 2
154
155 returngeneralErr156
157
158 def __calc_lowerTau(self, lowerBound, upperBound):159 delta = upperBound -lowerBound160 lowerTau = upperBound - delta * 0.618
161 returnlowerTau162
163
164 def __calc_upperTau(self, lowerBound, upperBound):165 delta = upperBound -lowerBound166 upperTau = lowerBound + delta * 0.618
167 returnupperTau168
169
170
171
172 if __name__ == '__main__':173 obj =LW()174 obj.show()