拟合直线
根据原数据和含有噪声的数据分别绘制原直线和拟合直线
def LSM(x,y):
sumxx,sumx,sumy,sumxy=0,0,0,0
n = len(x)
for i in range(0,n):
sumx+=x[i]
sumy+=y[i]
sumxx+=x[i]*x[i]
sumxy+=x[i]*y[i]
a = (n*sumxy-sumx*sumy)/(n*sumxx-sumx**2)
b = (sumxx*sumy-sumx*sumxy)/(n*sumxx-sumx**2)
return a,b
k,b=2,5
x = np.arange(0,10,1)
y = k*x+b
u,f=0,1
noisy_d = np.random.normal(u,f,len(x))
noisy_y = y + noisy_d
new_a,new_b = LSM(x,noisy_y) # 拟合直线的斜率和截距
fitted_y = new_a*x+new_b # 拟合直线
plt.figure()
plt.plot(x,y,color='b',linestyle='None',marker='o') # 真值点
plt.plot(x,noisy_y,color='g',linestyle='None',marker='d') # 噪声点
plt.plot(x,y,color='r',linewidth=2.5,linestyle='-') # 原直线
plt.plot(x,fitted_y,color='c',linewidth=2.5,linestyle='--') #拟合直线
plt.legend(['normal','noisy_y','normal_l','fitted_1'],loc='upper left',ncol=1)
plt.show()
求取所有点到拟合直线的平均距离
def LSM(x,y):
sumxx,sumx,sumy,sumxy=0,0,0,0
n = len(x)
for i in range(0,n):
sumx+=x[i]
sumy+=y[i]
sumxx+=x[i]*x[i]
sumxy+=x[i]*y[i]
a = (n*sumxy-sumx*sumy)/(n*sumxx-sumx**2)
b = (sumxx*sumy-sumx*sumxy)/(n*sumxx-sumx**2)
return a,b
k,b=2,5
x = np.arange(0,10,1)
y = k*x+b
u,f=0,1
noisy_d = np.random.normal(u,f,len(x))
noisy_y = y + noisy_d
new_a,new_b = LSM(x,noisy_y) # 拟合直线的斜率和截距
fitted_y = new_a*x+new_b # 拟合直线
# 求取所有点到直线的平均距离
def avg_dir(x,y,a,b):
n,sum = len(x),0
for i in range(n):
# 点到直线的方程 |kx0-y0+b|/(k*k+1)^(1/2)
down = np.sqrt(a*a+1)
up = np.abs(a*x[i]-y[i]+b)
one = up/down
sum+=one
return sum/n
avg_number = avg_dir(x,y,new_a,new_b)
print("平均距离:\n",avg_number)
注意:每次执行后的结果不同
噪声值是服从均值=0,方差在1~10之间产生的,绘出真实点到拟合直线的平均距离的变化(折线图)
# 最小二乘法求取截距和斜率
def LSM(x,y):
sumxx,sumx,sumy,sumxy=0,0,0,0
n = len(x)
for i in range(0,n):
sumx+=x[i]
sumy+=y[i]
sumxx+=x[i]*x[i]
sumxy+=x[i]*y[i]
a = (n*sumxy-sumx*sumy)/(n*sumxx-sumx**2)
b = (sumxx*sumy-sumx*sumxy)/(n*sumxx-sumx**2)
return a,b
# 点到直线的平均距离
def avg_noisdir(x,y,a,b):
n,sum = len(x),0
for i in range(n):
up = np.abs(a*x[i]-y[i]+b)
down = np.power((a*a+1),1/2) # 因为sqrt不能对float进行开方,所以选择了power
one = up/down
sum+=one
return sum/n
u=0 # 均值
f=np.linspace(1,10,20,endpoint=True) # 方差,可在此处更改linespace的第三个参数值来改变方差的数量
# 真实点x,y到拟合直线的平均距离
k,b=2,5
x = np.arange(0,10,1)
y = k*x+b
l=[]
for i in f:
# 由于设置的方差不同则产生的噪声值不同使得噪声点的纵坐标不同
noisy_d = np.random.normal(u,i,len(x)) # 生成关于正态分布的数作为噪音值
noisy_y = y + noisy_d # 噪音点的纵坐标
new_a,new_b = LSM(x,noisy_y) # 最小二乘法得到的斜率和截距
fitted_y = new_a*x+new_b # 拟合直线
avg_d=avg_noisdir(x,y,new_a,new_b) # 调用函数得到平均距离
l.append(avg_d)
print(l)
plt.figure() # 画图显示平均距离随着方差的变化而变化的散点图
plt.xlabel('variance')
plt.ylabel('distence')
plt.title('relation')
plt.plot(f,l,color='c',linewidth=2.5,linestyle='-',marker='o')
plt.legend(['line'],loc='upper left',ncol=1)
plt.grid(True)
plt.show()