斯坦福机器学习by吴恩达-线性回归 python实现

import numpy as np
import matplotlib.pyplot as plt


def load_data(filename):
    data=np.loadtxt(fname=filename,delimiter='\t')
    label=[]
    dataset=[]
    for i in range(len(data)):
        l=len(data[i])
        dataset.append(data[i][0:l-1])
        label.append(data[i][-1])
    label=np.array(label,dtype=np.float64).reshape(-1,1)
    dataset=np.array(dataset,dtype=np.float64)
    return dataset,label


X,y=load_data('ex0.txt')


def normal_equation(X_train,y_train):
    w=np.zeros((X_train.shape[0],1))
    # 注意.dot与multiply和*的区别,线性回归最优参数求解公式要记住
    w=np.linalg.pinv(X_train.T.dot(X_train)).dot(X_train.T).dot(y_train)
    print('w:\n',w)
    return w


w=normal_equation(X,y)
plt.scatter(X[:,1],y[:,0],marker='x',color='r')
plt.xlabel('value of X')
plt.ylabel('value of Y')
#"-"代表连续直线
plt.plot(X[:,1],X.dot(w),color='blue',linestyle='-')
plt.show()


y_pred=X.dot(w)
#.ravel起到降维的作用
print('corrcoef:\n',np.corrcoef(y.ravel(),y_pred.ravel()))


#局部加权线性回归,通过赋予预测点附近其他点权重解决线性回归欠拟合的问题,但是计算量很大
def lwlr(x_point,X_train,y_train,k):
    W=np.eye(X_train.shape[0])
    for i in range(X_train.shape[0]):
        X_matrix=x_point-X_train[i,:]
        W[i,i]=np.exp(X_matrix.dot(X_matrix.T)/(-2.0*k**2))
    w=np.linalg.pinv(X_train.T.dot(W).dot(X_train)).dot(X_train.T).dot(W).dot(y_train)
    return w


def lwlr_test(X_point,X_train,y_train,k):
    y_pred=np.zeros((X_train.shape[0],1))
    for i in range(X_train.shape[0]):
        y_pred[i]=X_point[i].dot(lwlr(X_point[i],X_train,y_train,k))
    return y_pred


print("should be 3.12204471")
print(X[0].dot(lwlr(X[0],X,y,1)))
print("should be 3.20175729")
print(X[0].dot(lwlr(X[0],X,y,0.001)))
y1=lwlr_test(X,X,y,1.0)
y2=lwlr_test(X,X,y,0.01)
y3=lwlr_test(X,X,y,0.003)


plt.scatter(X[:,1],y[:,0],marker='.',color='r')
plt.xlabel('value of X')
plt.ylabel('value of Y1')
plt.plot(X[:,1],y1.ravel(),color='blue',linestyle='-')
plt.show()


point_index=X[:,1].argsort(0)
X_sort=X[point_index]
plt.scatter(X[:,1],y[:,0],marker='x',color='r')
plt.xlabel('value of X')
plt.ylabel('value of Y2')
plt.plot(X_sort[:,1],y2[point_index],color='black')
plt.show()


plt.scatter(X[:,1],y[:,0],marker='x',color='r')
plt.xlabel('value of X')
plt.ylabel('value of Y3')
point_index=X[:,1].argsort(0)
X_sort=X[point_index]
plt.plot(X_sort[:,1],y3[point_index],color='black')
plt.show()


#岭回归
def ridge_regression(X_train,y_train,lamb):
    I=np.eye(X_train.shape[1])
    w=np.linalg.pinv(X_train.T.dot(X_train)+lamb*I).dot(X_train.T).dot(y_train)
    return w


def trans_norm(X):
    X_mean = np.mean(X, axis=0)
    X_var = np.var(X, axis=0)
    X_norm = (X - X_mean) / X_var
    return X_norm, X_mean, X_var


def test(X_train,y_train):
    #axis=0表示按列计算均值
    #岭回归和缩减技术需要对特征作标准化处理,使每维特征具有相同的重要性
    X_norm, X_mean, X_var=trans_norm(X_train)
    y_mean=np.mean(y_train,axis=0)
    y_var=np.var(y_train,axis=0)
    #y_norm=(y_train-y_mean)/y_var
    y_norm=y_train-y_mean
    num=30
    ws=np.zeros((num,X_train.shape[1]))
    for i in range(num):
        #lambda呈指数级变化
        w=ridge_regression(X_norm,y_norm,np.exp(i-10))
        ws[i,:]=w.ravel().T
    return ws


XX,yy=load_data("abalone.txt")
ws=test(XX,yy)
plt.plot([i-10 for i in range(30)],ws)
plt.xlabel("log(lambda)")
plt.ylabel("w")
plt.show()


def rssError(y,y_pred):
    return sum((y-y_pred)**2)


def stageWise(X_train,y_train,learn_rate,iter):
    y_mean=np.mean(y_train,axis=0)
    y_norm=y_train-y_mean
    X_norm,X_mean,X_var=trans_norm(X_train)
    ws=np.zeros((iter,X_train.shape[1]))
    w=np.zeros((X_train.shape[1],1))
    w_optimal=w.copy()
    for i in range(iter):
        if(i%100)==0:
            print(w.ravel())
        lowestError=np.inf
        for j in range(X_train.shape[1]):
            for update_size in [-1,1]:
                w_update=w.copy()
                w_update[j]+=learn_rate*update_size
                y_pred=X_norm.dot(w_update.reshape(-1,1))
                error=rssError(y_norm,y_pred)
                if error<lowestError:
                    lowestError=error
                    w_optimal=w_update
        w=w_optimal.copy()
        ws[i,:]=w.T
    return ws


ws = stageWise(XX, yy, 0.005, 1000)
plt.plot(ws)
plt.xlabel("iter")
plt.ylabel("w")
plt.show()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值