神经网络--3.全连接加偏置b-加激活函数

最新推荐文章于 2022-11-04 17:15:19 发布

huanhuan59

最新推荐文章于 2022-11-04 17:15:19 发布

阅读量894

点赞数 1

分类专栏：神经网络全连接

本文链接：https://blog.csdn.net/huanhuan59/article/details/104484095

版权

神经网络全连接专栏收录该内容

4 篇文章 0 订阅

订阅专栏

1.代码：加上偏置加上激活函数Relu，softmax函数

以iris的三种类型花为例：其中x1,x2,x3,x4分别是花萼长，花萼宽，花瓣长，花瓣宽，（一共150个数据，其中前50数据是第一种品种，中间50个数据是第二种花品种，最后50个数据是第三种花品种），分别拿每个品种的35个数据混合打乱作为train，输入进去更新求得w和b，然后用各自的最后15个数据用来测试正确率。此程序正确率偏低，程序有待提高优化，仅供参考

代码：

# -*- coding: UTF-8 -*-
from math import exp
import numpy as np
import scipy
import pandas
import matplotlib as plt
import sklearn
import sklearn.datasets
import math
import random
from math import exp

"""
该数据集有3个类virginica，versicolor和setosa，每类50个样本；
每个样本是一个4维的特征向量,萼片长，萼片宽，花瓣长，花瓣宽；
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
"""
iris = sklearn.datasets.load_iris()
iris_key = iris.keys()
print(iris_key)
iris_data = iris['data']
iris_target = iris['target'].reshape(150, 1)
iris_data_target = np.hstack((iris_data, iris_target))

# train
iris_data_target_0 = iris_data_target[0:35]
iris_data_target_1 = iris_data_target[50:85]
iris_data_target_2 = iris_data_target[100:135]
iris_data_target_sum = np.vstack((iris_data_target_0, iris_data_target_1, iris_data_target_2))

iris_data_target_shuffle = np.random.permutation(iris_data_target_sum)
iris_data = iris_data_target_shuffle[:, 0:4]
iris_target = iris_data_target_shuffle[:, 4]
iris_target_names = iris['target_names']
iris_feature_names = iris['feature_names']
"""
print(iris_target)
print(iris_target_names)
print(iris_feature_names)
print("!finish!")
"""
alpha = 0.001

W0 = np.zeros((5, 4), dtype=np.float) + 1
W1 = np.zeros((4, 5), dtype=np.float) + 1
W2 = np.zeros((3, 4), dtype=np.float) + 1
S = np.zeros((3, 1), dtype=np.float)
dS_dZ3 = np.zeros((3, 1), dtype=np.float)

# 初始化 b
b0 = np.zeros((5, 1), dtype=np.float) + 1
b1 = np.zeros((4, 1), dtype=np.float) + 1
b2 = np.zeros((3, 1), dtype=np.float) + 1
data_shape = np.shape(iris_data)
data_len = data_shape[0]  # 150
feature_len = data_shape[1]  # 4

for k in range(100):
    # forward
    for i in range(100):
        # Layer 0
        feature_vector = iris_data[i, :]
        Z_1 = np.dot(W0, feature_vector.reshape((4, 1))) + b0  # 列向量 正确
        a_1 = 1 * (Z_1 > 0) * Z_1      # 激活函数
        #print("Z_1=",Z_1,"a_1=",a_1)

        # Layer 1
        Z_2 = np.dot(W1, a_1) + b1     # [4,5]*[5,1]---->[4,1]
        a_2 = 1 * (Z_2 > 0) * Z_2      # 激活函数

        a_2 = a_2.reshape((4, 1))

        # Layer 2
        Z_3 = np.dot(W2, a_2) + b2
        Z_3 = Z_3.reshape((3, 1))

        # Layer 3
        S_sum = math.exp(Z_3[0]) + math.exp(Z_3[1]) + math.exp(Z_3[2])
        S[0] = math.exp(Z_3[0]) / S_sum
        S[1] = math.exp(Z_3[1]) / S_sum
        S[2] = math.exp(Z_3[2]) / S_sum

        if iris_target[i] == 0:
            label = np.array([1, 0, 0])
        if iris_target[i] == 1:
            label = np.array([0, 1, 0])
        if iris_target[i] == 2:
            label = np.array([0, 0, 1])

        Loss = -(label[0] * math.log(S[0]) + label[1] * math.log(S[1]) + label[2] * math.log(S[2]))
        # print("Loss_Origin:", Loss)

        # backward
        # for i in range(3):
        # Layer 0
        if iris_target[i] == 0:
            dL_dS = - 1 / S[0]
        if iris_target[i] == 1:
            dL_dS = - 1 / S[1]
        if iris_target[i] == 2:
            dL_dS = - 1 / S[2]
        # print("dL_dS",dL_dS)

        # Layer 1
        if iris_target[i] == 0:
            dS_dZ3[0] = S[0] * (1 - S[0])
            dS_dZ3[1] = -S[0] * S[1]
            dS_dZ3[2] = -S[0] * S[2]

        if iris_target[i] == 1:
            dS_dZ3[0] = -S[0] * S[1]
            dS_dZ3[1] = S[1] * (1 - S[1])
            dS_dZ3[2] = -S[2] * S[1]

        if iris_target[i] == 2:
            dS_dZ3[0] = -S[0] * S[2]
            dS_dZ3[1] = -S[1] * S[2]
            dS_dZ3[2] = S[2] * (1 - S[2])

        dS_dZ3 = dS_dZ3.reshape((3, 1))
        # print("dS_dZ3",dS_dZ3)

        # Layer 2
        dZ3_dW2 = np.tile(a_2.reshape(1, 4), (3, 1))  # [3, 4]
        dL_dW2 = dL_dS * np.dot(dS_dZ3, a_2.reshape(1, 4))   # [3,1]*[1,4]---->[3,4]

        dZ3_db2 = np.tile(1, (3, 1))
        dL_db2 = dL_dS * dS_dZ3
        # print("dL_db2: ", dL_db2)


        # Layer 3
        dL_da2 = np.dot(W2.reshape(4, 3), dL_dS * dS_dZ3)  # [4,3]*[3,1]---->[4,1]
        da2_dZ2 = 1 * (a_2 > 0)   # a2对Z2求导，(a2==Z2)！=0时，导数为1，其他情况为0
        dL_dZ2 = dL_da2 * da2_dZ2   # (4,1)*(4,1)---->(4,1)
        dZ2_dW1 = np.tile(a_1.reshape(1, 5), (4, 1))   # [4,5]
        dL_dW1 = np.dot(dL_dZ2, a_1.reshape(1, 5))    # [4,1]*[1,5]---->[4,5]

        dZ3_db1 = np.tile(1, (4, 1))
        dL_db1 = dL_dZ2

        # Layer 4
        dL_da1 = np.dot(W1.reshape(5, 4), dL_dZ2)  # [5,4]*[4,1]----->[5,1]
        da1_dZ1 = 1 * (a_1 > 0)   # a1对Z1求导，(a1==Z1)！=0时，导数为1，其他情况为0
        dL_dZ1 = dL_da1 * da1_dZ1   # (5,1)*(5,1)=(5,1)
        dL_dW0 = np.dot(dL_dZ1, feature_vector.reshape((1, 4)))   # [5,1] *[1,4]---->[5,4]


        dZ2_db0 = np.tile(1, (5, 1))
        dL_db0 = dL_dZ1

        # update
        W0 = W0 - alpha * dL_dW0
        W1 = W1 - alpha * dL_dW1
        W2 = W2 - alpha * dL_dW2
        b0 = b0 - alpha * dL_db0
        b1 = b1 - alpha * dL_db1
        b2 = b2 - alpha * dL_db2



#############################################
# test
# test
iris_data_target_test0 = iris_data_target[35:50]
iris_data_target_test1 = iris_data_target[85:100]
iris_data_target_test2 = iris_data_target[135:150]
iris_data_target_test_sum = np.vstack((iris_data_target_test0, iris_data_target_test1, iris_data_target_test2))
#print("iris_data_target_test_sum:", iris_data_target_test_sum)

n = 0
false_nums = 0;
sample_test1 = iris_data_target_test_sum[:, 0:4]  # (45, 4)
sample_test1_target = iris_data_target_test_sum[:, 4]  # (45, 1)
for i in range(45):
    # Layer 0
    Z_1 = np.dot(W0, sample_test1[i].reshape((4, 1))) + b0  # 列向量 正确(5,4)*(4,1)+(5,1)--->(5,1)
    a_1 = 1 * (Z_1 > 0) * Z_1  # 激活函数


    # Layer 1
    Z_2 = np.dot(W1, a_1) + b1  # [4,5]*[5,1]---->[4,1]
    a_2 = 1 * (Z_2 > 0) * Z_2  # 激活函数
    a_2 = a_2.reshape((4, 1))


    # Layer 2
    Z_3 = np.dot(W2, a_2) + b2
    Z_3 = Z_3.reshape((3, 1))
    # Layer 3
    S_sum = math.exp(Z_3[0]) + math.exp(Z_3[1]) + math.exp(Z_3[2])
    S[0] = math.exp(Z_3[0]) / S_sum
    S[1] = math.exp(Z_3[1]) / S_sum
    S[2] = math.exp(Z_3[2]) / S_sum


    if S[0] > S[1] and S[0] > S[2]:
        iris_target_test = 0
    elif S[1] > S[0] and S[1] > S[2]:
        iris_target_test = 1
    else:
        iris_target_test = 2

    if sample_test1_target[i] == iris_target_test:
        n = n + 1
        print("i=", i, "n=", n, "iris_target_test=", iris_target_test, "sample_test1_target=", sample_test1_target[i])
    else:
        false_nums = false_nums + 1;
        print("i=",i, "false_nums=", false_nums);


right = n / 45
print("right", right)

# print("sample_test: ", sample_test)

huanhuan59

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
神经网络--3.全连接加偏置b-加激活函数

1.代码：加上偏置加上激活函数Relu，softmax函数以iris的三种类型花为例：其中x1,x2,x3,x4分别是花萼长，花萼宽，花瓣长，花瓣宽，（一共150个数据，其中前50数据是第一种品种，中间50个数据是第二种花品种，最后50个数据是第三种花品种），分别拿每个品种的35个数据混合打乱作为train，输入进去更新求得w和b，然后用各自的最后15个数据用来测试正确率。此程序正确率偏低，程...
复制链接

扫一扫