在网易公开课上看了吴恩达的课。自己用python实现了一遍
import numpy as np
R=2
alpha=0.01
beta=0.5
def norm_dataset(data_set,bool_set):
nu=len(data_set);nm=len(data_set[0]);miu=[]
for j in range(nm):
sum_score=0.0;com_num=0.0
for i in range(nu):
if bool_set[i][j]:
sum_score=sum_score+data_set[i][j]
com_num=com_num+1.0
miu.append(sum_score/com_num)
for i in range(nu):
if bool_set[i][j]:
data_set[i][j]=data_set[i][j]*1.0-sum_score*1.0/com_num
else:
data_set[i][j]=-1.0
return miu
def calc_loss(x,theta,data_set,bool_set):
nu=len(data_set);nm=len(data_set[0]);total_loss=0.0
for i in range(nu):
for j in range(nm):
if bool_set[i][j]:
xTtheta=0.0;x_loss=0.0;theta_loss=0.0
for k in range(R):
xTtheta=xTtheta+x[i,k]*theta[k,j]
x_loss=x_loss+x[i,k]*x[i,k]
theta_loss=theta_loss+theta[k,j]*theta[k,j]
total_loss=total_loss+0.5*(xTtheta-data_set[i][j])*(xTtheta-data_set[i][j])+beta*x_loss+beta*theta_loss
return total_loss
def eval(x,theta,data_set,bool_set):
nu=len(data_set);nm=len(data_set[0]);total_loss=0.0
for i in range(nu):
for j in range(nm):
if bool_set[i][j]:
xTtheta=0.0;x_loss=0.0;theta_loss=0.0
for k in range(R):
xTtheta=xTtheta+x[i,k]*theta[k,j]
total_loss=total_loss+0.5*(xTtheta-data_set[i][j])*(xTtheta-data_set[i][j])
return total_loss
def new_x_and_theta(x,theta,data_set,bool_set):
nu=len(data_set);nm=len(data_set[0]);total_loss=0.0;xTtheta_minus_y=0.0
x_partial=np.zeros((nu,R));theta_partial=np.zeros((R,nm))
for i in range(nu):
for j in range(nm):
xTtheta=0.0;
if bool_set[i][j]:
for k in range(R):
xTtheta=xTtheta+x[i,k]*theta[k,j]
for k in range(R):
x_partial[i,k]=x_partial[i,k]+((xTtheta-data_set[i][j])*theta[k,j]+beta*x[i,k])
for k in range(R):
theta_partial[k,j]=theta_partial[k,j]+((xTtheta-data_set[i][j])*x[i,k]+beta*theta[k,j])
for i in range(nu):
for k in range(R):
x[i,k]=x[i,k]-alpha*x_partial[i,k]
for j in range(nm):
for k in range(R):
theta[k,j]=theta[k,j]-alpha*theta_partial[k,j]
if __name__ == '__main__':
data_set=[[5,5,0,0],[5,-1,-1,0],[-1,4,0,-1],[0,0,5,4],[0,0,5,-1],[-1,-1,-1,-1]]
bool_set=[[],[],[],[],[],[]]
nu=len(data_set);nm=len(data_set[0])
for i in range(nu):
for j in range(nm):
if data_set[i][j]!=-1:
bool_set[i].append(True)
else:
bool_set[i].append(False)
x=np.random.random(size=(nu,R));theta=np.random.random(size=(R,nm))
miu=norm_dataset(data_set,bool_set)
print(data_set)
print(miu)
min_loss=10000000
final_x=np.random.random(size=(nu,R));final_theta=np.random.random(size=(R,nm))
for e in range(1000):
loss=calc_loss(x,theta,data_set,bool_set)
if(loss<min_loss):
# print(min_loss)
min_loss=loss
final_x=x.copy()
final_theta=theta.copy()
new_x_and_theta(x,theta,data_set,bool_set)
print(final_x)
print(final_theta)
print(eval(final_x,final_theta,data_set,bool_set))
result=np.zeros((nu,nm))
for i in range(nu):
for j in range(nm):
for k in range(R):
result[i,j]=result[i,j]+final_x[i,k]*final_theta[k,j]
result[i,j]=result[i][j]+miu[j]
for i in range(nu):
for j in range(nm):
if bool_set[i][j]:
data_set[i][j]=data_set[i][j]+miu[j]
print(result)
print(data_set)