用python实现多层感知机

最新推荐文章于 2024-08-21 09:00:04 发布

小小小小 ...鸟

最新推荐文章于 2024-08-21 09:00:04 发布

阅读量3k

点赞数 2

分类专栏：深度学习文章标签：神经网络

本文链接：https://blog.csdn.net/qixingderen/article/details/118576117

版权

深度学习专栏收录该内容

2 篇文章 0 订阅

订阅专栏

做了一个课程作业要求动手实现多层感知机，暑假期间时间空闲，发一篇关于感知机的博文。

感知机构造为输入层，输出层，隐含层。

在这里插入图片描述

线性层框架和网络模型：

import numpy as np
import math
from collections import OrderedDict

class Module:

    def __init__(self,lr=None):
        self.linear1=Linear(7,15,batch_size=20)
        self.linear1.first_layer=True
        self.linear1.add_Motivation('sigmoid')
        self.linear2=Linear(15,5,batch_size=20)
        self.linear2.add_Motivation('relu')
        self.linear3=Linear(5,1,batch_size=20)
        self.linear3.last_layer=True
        self.lr=1e-3
        if(lr!=None):
            self.lr=lr

    def forward(self,x):
        x=x.copy()
        x=self.linear1.forward(x)
        x=self.linear2.forward(x)
        x=self.linear3.forward(x)
        return x

    def compute_gradient(self,x):
        x=x.copy()
        self.linear3.compute_local_gradient(x)
        x=self.linear3.to_last.copy()
        self.linear2.compute_local_gradient(x)
        x=self.linear2.to_last.copy()
        self.linear1.compute_local_gradient(x)

    def backward(self):
        self.linear3.backward(self.lr)
        self.linear2.backward(self.lr)
        self.linear1.backward(self.lr)

class Linear:
    def __init__(self,in_feature,out_feature,batch_size=None,bias=None):
        self.in_feature=in_feature
        self.out_feature=out_feature
        self.Oin=None   #self.Oin=(in+1,batch_size)
        self.Oout=None   #self.Oout=(out,batch_size)
        self.Onet=None    #self.Onet=(out,batch_size)
        self.bias=True
        self.batch_size=1
        if(batch_size!=None):
            self.batch_size=batch_size
        if(bias):
            self.bias=bias
        self.Weights=np.random.randn(self.out_feature,self.in_feature+1)   #(out,in+1)
        self.local_gradient=None
        self.motivation=None
        self.last_layer=False
        self.to_last=None
        self.first_layer=None

    def sigmoid(self,x):
        return 1/(1+np.exp(-x))

    def relu(self,x):
        return np.maximum(0,x)

    def forward(self,x):        #x=(batch_size,in)
        self.Oin=x.copy()
        ones=np.ones((1,self.batch_size))
        #print(np.shape(ones))
        self.Oin=np.row_stack((self.Oin,ones))   #self.Oin=(in+1,batch_size)
        self.Onet=self.Weights.dot(self.Oin)       #self.Onet=(out,batch_size)
        #print(self.Onet)
        if(self.motivation==None):
            self.Oout=self.Onet.copy()
        elif(self.motivation=='sigmoid'):
            self.Oout=self.sigmoid(self.Onet)
        elif(self.motivation=='relu'):
            self.Oout=self.relu(self.Onet)
        #print(self.Oout)
        return self.Oout



    def add_Motivation(self,TT):
        if(TT=='sigmoid'):
            self.motivation='sigmoid'

        elif(TT=='relu'):
            self.motivation='relu'

    def compute_local_gradient(self,x):
        if(self.last_layer):   #x=(1,batch_size)
            tp=-(x-self.Oout)
            self.local_gradient=tp*(self.motivation_back)

        else:
            self.local_gradient= self.motivation_back * x   #self.local_gradient=(out,batch_size)

        to_l = []
        for i in range(self.batch_size):
            a=[]
            for j in range(self.in_feature):

                a.append(self.Weights[:,j]*self.local_gradient[:,i])
            to_l.append(np.sum(a,axis=1))
        to_l = np.array(to_l).T
        self.to_last = to_l.copy()  #to_last=(out,batch_size)

    def backward(self,lr):
        g=self.local_gradient.dot(self.Oin.T/self.batch_size)
        # print("weight",self.Weights)
        # print('g',g)
        self.Weights=self.Weights-g*lr


    @property
    def motivation_back(self):
       if(self.motivation):
            if(self.motivation=='sigmoid'):
                the_loss=self.sigmoid(self.Onet)*(1-self.sigmoid(self.Onet))
                return the_loss
            elif(self.motivation=='relu'):
                tp: object=self.Onet.copy()
                tp[tp>0]=1
                tp[tp<=0]=0
                return tp

       else:
            return np.ones(np.shape(self.Onet))

训练代码：

from Modul import Module
import numpy as np
import pandas as pd
import matplotlib.pyplot as plot


def get_loss(y_pre,label):
    this_loss=(y_pre-label)**2
    this_loss=np.sum(this_loss)/len(y_pre)
    return this_loss
def converge_state(loss_all, i):
    s = np.abs(loss_all[i - 1] - loss_all[i])/np.abs(loss_all[i])
    return s

if __name__=='__main__':
    use_cols=['quality_of_education','alumni_employment','quality_of_faculty','publications','influence'
              ,'citations','patents','score']
    datas=pd.read_csv('cwurData.csv',usecols=use_cols)
    data=[]
    x=[]
    y=None
    iteration=1200
    lr=1e-4
    stop_lr=1e-6
    for i in datas.columns:
        t=list(datas[i].values)
        if(i!='score'):
            data.append(datas[i])

    data.append(list(datas['score'].values))
    data=np.array(data)
    data=data.T
    np.random.shuffle(data)
    data1=data
    data1=data1.T
    print(np.shape(data1))
    x=data1[0:7,:]
    y=data1[7]
    print(y)
    all_loss=[]
    module=Module(lr)
    for i in range(100):
        train_x = x[:, i * 20:(i + 1) * 20]
        train_y = y[i * 20:(i + 1) * 20]
        train_x=np.array(train_x)
        # print(np.shape(train_x))
        print("epoch:", i)
        for j in range(iteration):
            # print(train_y)
            y_pre=module.forward(train_x)
            # print("type_y_pre",y_pre)
            loss=get_loss(y_pre,train_y)
            # print("type_loss:",loss)
            all_loss.append(loss)
            module.compute_gradient(train_y)
            module.backward()
            if(j==0 or j==iteration-1):
                print("iteration %d, loss = %3.3f" % (j, loss))
            if len(all_loss) > 1:
                s = converge_state(all_loss, len(all_loss) - 1)
                if (s < stop_lr):
                    print("iteration %d, loss = %3.3f" % (j, loss))
                    break

    test_x=x[:,2000:2200]
    test_y=y[2000:2200]
    test_x=np.array(test_x)
    #print(np.shape(test_x))
    Y_pre=[]
    #print(len(test_x[0]))
    for i in range(int(len(test_x[0])/20)):
        x_now=test_x[:,i*20:(i+1)*20]
        Y=module.forward(x_now)
        #print(x_now)
        for j in range(len(Y)):
            Y_pre.append(Y[j])
    #print(Y_pre)
    Y_pre=np.array(Y_pre)
    #print(np.shape(Y_pre))
    f = plot.figure()
    ax1 = plot.subplot2grid((1, 2), (0, 0))
    ax2 = plot.subplot2grid((1, 2), (0, 1))
    X_plot = np.linspace(0, 200, 200).reshape((1, len(test_x[0])))
    ax1.scatter(X_plot, test_y, s=1, color='r')
    ax1.scatter(X_plot, Y_pre, s=4, color='b')
    loss_x = np.linspace(0, len(all_loss), len(all_loss))
    ax2.plot(loss_x, all_loss, )
    ax2.set_title("LOSS")
    ax2.set_xlabel("iteration")
    ax2.set_ylabel("loss")
    plot.show()