做了一个课程作业要求动手实现多层感知机,暑假期间时间空闲,发一篇关于感知机的博文。
感知机构造为输入层,输出层,隐含层。
线性层框架和网络模型:
import numpy as np
import math
from collections import OrderedDict
class Module:
def __init__(self,lr=None):
self.linear1=Linear(7,15,batch_size=20)
self.linear1.first_layer=True
self.linear1.add_Motivation('sigmoid')
self.linear2=Linear(15,5,batch_size=20)
self.linear2.add_Motivation('relu')
self.linear3=Linear(5,1,batch_size=20)
self.linear3.last_layer=True
self.lr=1e-3
if(lr!=None):
self.lr=lr
def forward(self,x):
x=x.copy()
x=self.linear1.forward(x)
x=self.linear2.forward(x)
x=self.linear3.forward(x)
return x
def compute_gradient(self,x):
x=x.copy()
self.linear3.compute_local_gradient(x)
x=self.linear3.to_last.copy()
self.linear2.compute_local_gradient(x)
x=self.linear2.to_last.copy()
self.linear1.compute_local_gradient(x)
def backward(self):
self.linear3.backward(self.lr)
self.linear2.backward(self.lr)
self.linear1.backward(self.lr)
class Linear:
def __init__(self,in_feature,out_feature,batch_size=None,bias=None):
self.in_feature=in_feature
self.out_feature=out_feature
self.Oin=None #self.Oin=(in+1,batch_size)
self.Oout=None #self.Oout=(out,batch_size)
self.Onet=None #self.Onet=(out,batch_size)
self.bias=True
self.batch_size=1
if(batch_size!=None):
self.batch_size=batch_size
if(bias):
self.bias=bias
self.Weights=np.random.randn(self.out_feature,self.in_feature+1) #(out,in+1)
self.local_gradient=None
self.motivation=None
self.last_layer=False
self.to_last=None
self.first_layer=None
def sigmoid(self,x):
return 1/(1+np.exp(-x))
def relu(self,x):
return np.maximum(0,x)
def forward(self,x): #x=(batch_size,in)
self.Oin=x.copy()
ones=np.ones((1,self.batch_size))
#print(np.shape(ones))
self.Oin=np.row_stack((self.Oin,ones)) #self.Oin=(in+1,batch_size)
self.Onet=self.Weights.dot(self.Oin) #self.Onet=(out,batch_size)
#print(self.Onet)
if(self.motivation==None):
self.Oout=self.Onet.copy()
elif(self.motivation=='sigmoid'):
self.Oout=self.sigmoid(self.Onet)
elif(self.motivation=='relu'):
self.Oout=self.relu(self.Onet)
#print(self.Oout)
return self.Oout
def add_Motivation(self,TT):
if(TT=='sigmoid'):
self.motivation='sigmoid'
elif(TT=='relu'):
self.motivation='relu'
def compute_local_gradient(self,x):
if(self.last_layer): #x=(1,batch_size)
tp=-(x-self.Oout)
self.local_gradient=tp*(self.motivation_back)
else:
self.local_gradient= self.motivation_back * x #self.local_gradient=(out,batch_size)
to_l = []
for i in range(self.batch_size):
a=[]
for j in range(self.in_feature):
a.append(self.Weights[:,j]*self.local_gradient[:,i])
to_l.append(np.sum(a,axis=1))
to_l = np.array(to_l).T
self.to_last = to_l.copy() #to_last=(out,batch_size)
def backward(self,lr):
g=self.local_gradient.dot(self.Oin.T/self.batch_size)
# print("weight",self.Weights)
# print('g',g)
self.Weights=self.Weights-g*lr
@property
def motivation_back(self):
if(self.motivation):
if(self.motivation=='sigmoid'):
the_loss=self.sigmoid(self.Onet)*(1-self.sigmoid(self.Onet))
return the_loss
elif(self.motivation=='relu'):
tp: object=self.Onet.copy()
tp[tp>0]=1
tp[tp<=0]=0
return tp
else:
return np.ones(np.shape(self.Onet))
训练代码:
from Modul import Module
import numpy as np
import pandas as pd
import matplotlib.pyplot as plot
def get_loss(y_pre,label):
this_loss=(y_pre-label)**2
this_loss=np.sum(this_loss)/len(y_pre)
return this_loss
def converge_state(loss_all, i):
s = np.abs(loss_all[i - 1] - loss_all[i])/np.abs(loss_all[i])
return s
if __name__=='__main__':
use_cols=['quality_of_education','alumni_employment','quality_of_faculty','publications','influence'
,'citations','patents','score']
datas=pd.read_csv('cwurData.csv',usecols=use_cols)
data=[]
x=[]
y=None
iteration=1200
lr=1e-4
stop_lr=1e-6
for i in datas.columns:
t=list(datas[i].values)
if(i!='score'):
data.append(datas[i])
data.append(list(datas['score'].values))
data=np.array(data)
data=data.T
np.random.shuffle(data)
data1=data
data1=data1.T
print(np.shape(data1))
x=data1[0:7,:]
y=data1[7]
print(y)
all_loss=[]
module=Module(lr)
for i in range(100):
train_x = x[:, i * 20:(i + 1) * 20]
train_y = y[i * 20:(i + 1) * 20]
train_x=np.array(train_x)
# print(np.shape(train_x))
print("epoch:", i)
for j in range(iteration):
# print(train_y)
y_pre=module.forward(train_x)
# print("type_y_pre",y_pre)
loss=get_loss(y_pre,train_y)
# print("type_loss:",loss)
all_loss.append(loss)
module.compute_gradient(train_y)
module.backward()
if(j==0 or j==iteration-1):
print("iteration %d, loss = %3.3f" % (j, loss))
if len(all_loss) > 1:
s = converge_state(all_loss, len(all_loss) - 1)
if (s < stop_lr):
print("iteration %d, loss = %3.3f" % (j, loss))
break
test_x=x[:,2000:2200]
test_y=y[2000:2200]
test_x=np.array(test_x)
#print(np.shape(test_x))
Y_pre=[]
#print(len(test_x[0]))
for i in range(int(len(test_x[0])/20)):
x_now=test_x[:,i*20:(i+1)*20]
Y=module.forward(x_now)
#print(x_now)
for j in range(len(Y)):
Y_pre.append(Y[j])
#print(Y_pre)
Y_pre=np.array(Y_pre)
#print(np.shape(Y_pre))
f = plot.figure()
ax1 = plot.subplot2grid((1, 2), (0, 0))
ax2 = plot.subplot2grid((1, 2), (0, 1))
X_plot = np.linspace(0, 200, 200).reshape((1, len(test_x[0])))
ax1.scatter(X_plot, test_y, s=1, color='r')
ax1.scatter(X_plot, Y_pre, s=4, color='b')
loss_x = np.linspace(0, len(all_loss), len(all_loss))
ax2.plot(loss_x, all_loss, )
ax2.set_title("LOSS")
ax2.set_xlabel("iteration")
ax2.set_ylabel("loss")
plot.show()
训练数据集为一个大学评分数据,可用来进行多元线性回归来评估大学评分
点击获取数据集