# -*- coding: utf-8 -*-
import numpy as np
import math
import matplotlib.pyplot as plt
def read(i):
fr=open('quasar_train.csv','r')
arrayline=fr.readlines()
y=arrayline[1].strip().split(',')
n=len(y)
y=map(lambda y: float(y), y)
x=np.zeros((2,n))
x[0]=arrayline[0].strip().split(',')
x[1]=1
return x,y
def linear_regression_theta(x,y): #计算线性回归系数
xx=np.dot(x,np.transpose(x))
xx_inverse=np.linalg.inv(xx)
xx_inverse_x=np.dot(xx_inverse,x)
xx_inverse_x_y=np.dot(xx_inverse_x,np.transpose(y))
theta=xx_inverse_x_y
y1 =[]
for i in range(len(x[0])):
y1.append(x[0][i] * theta[0] + theta[1])
return y1
def weighted_linear_regression(x,y,t):#加权线性回归
y2=[]
for i in range(len(x[0])):
w=np.zeros((len(x[0]),len(x[0])))
for j in range(len(x[0])):
w[j][j]=math.exp((x[0][i] - x[0][j])*(x[0][i] - x[0][j]) / (-2 * t * t))
xwx=np.dot(np.dot(x,w),np.transpose(x))
xwx_inverse=np.linalg.inv(xwx)
xwx_inverse_x=np.dot(xwx_inverse,x)
xwx_inverse_x_w = np.dot(xwx_inverse_x,w)
xwx_inverse_x_w_y=np.dot(xwx_inverse_x_w,np.transpose(y))
theta=xwx_inverse_x_w_y
y2.append(x[0][i] * theta[0] + theta[1])
return y2
def figure(x,y,y1,y2,y3,y4,y5,y6): #画图
plt.figure(1)
plt.xlabel('Wavelength')
plt.ylabel('Flux')
p1 = plt.scatter(x[0],y, marker='.', color='b', label='real value', s=10)
p2 = plt.plot(x[0], y1, color='r', linewidth=1, label='linear regression')
p3 = plt.plot(x[0], y2, color='g', linewidth=2, label='weighted linear regression t=5')
plt.legend(loc='upper right')
plt.show()
plt.figure(2)
plt.xlabel('Wavelength')
plt.ylabel('Flux')
p1 = plt.scatter(x[0], y, marker='.', color='blue', label='real value', s=10)
p4 = plt.plot(x[0], y3, color='grey', linewidth=1.5, label='weighted linear regression t=1')
p5 = plt.plot(x[0], y4, color='black', linewidth=2.5, label='weighted linear regression t=10')
p6 = plt.plot(x[0], y5, color='green', linewidth=2.5, label='weighted linear regression t=100')
p7 = plt.plot(x[0], y6, color='orange', linewidth=1.5, label='weighted linear regression t=1000')
plt.legend(loc='upper right')
plt.show()
def main():
x,y=read(1)
y1=linear_regression_theta(x,y)
y2= weighted_linear_regression(x,y,5)
y3= weighted_linear_regression(x,y,1)
y4= weighted_linear_regression(x,y,10)
y5= weighted_linear_regression(x,y,100)
y6= weighted_linear_regression(x,y,1000)
figure(x,y,y1,y2,y3,y4,y5,y6)
if __name__ == '__main__':
main()