神经网络入门 第5章 实现多层神经网络BP算法














public class NeuralNetwork {
int[] shape;
int layers;
double[][][] weights;
double[][] bias;
double[][] zs;
double[][] xs;


    - shape数字表示每层网络神经元个数;

    - shape数组的长度就是layers,表示神经网络的层数; 

    - weights[ ][ ][ ]的三个维度分别表示 [层][神经元][对应输入];

    - bias[ ][ ]两个维度表示[层][神经元];

    - zs[ ][ ]数组用来存放每个神经元z=w*x+b的结果;

    -xs[ ][ ]数组涌来存放输入x(第一层)和后面每层的输出s(z)




public NeuralNetwork(int... shape) {
this.shape = shape;
layers = shape.length;
weights = new double[layers][][];
bias = new double[layers][];
//First layer is input layer, no weight
   weights[0] = new double[0][0];
bias[0] = new double[0];
zs = new double[layers][];
xs = new double[layers][];
for (int i = 1; i < layers; i++) {
weights[i] = new double[this.shape[i]][this.shape[i - 1]];
bias[i] = new double[this.shape[i]];


    因为第一层是输入层, 不要计算,所以我们把它的weight和bias设置为空数组。最后我们把w和b的初始值设置为随机数。这是因为如果一开始都是均匀的,所有神经元都是一样的,那就很难在训练中产生差别了,毕竟我们需要每个神经元去接近一个不同的函数。




double[] f(double[] in) {
zs[0] = xs[0] = in;
for (int i = 1; i < layers; i++) {
zs[i] = add(wx(xs[i - 1], weights[i]), bias[i]);
xs[i] = sigmoid(zs[i]);
return xs[layers - 1];

double sigmoid(double d) {
return 1.0 / (1.0 + exp(-d));

double[] sigmoid(double[] d) {
int length = d.length;
double[] v = new double[length];
for (int i = 0; i < length; i++) {
v[i] = sigmoid(d[i]);
return v;

double[] wx(double[] x, double[][] weight) {
int numberOfNeron = weight.length;
double[] wx = new double[numberOfNeron];
for (int i = 0; i < numberOfNeron; i++) {
wx[i] = dot(weight[i], x);//SUM(w*x)
return wx;







    1. 每层有多个神经元。我们把每一个神经元分别计算即可。输出层的每个输出都可以与正确答案比对得出偏差。

    2. 有多个层。最后一层我们很容易通过神经网络的输出和训练数据的结果相减来获取cost值,但是前面的层怎么获取cost呢?答案其实很简单,就像求w的导数一样,我们可以求最后一层的输入x的导数。前面我们讲过如何求函数 y=w*x + b 对w和b的导数。类似的,x的导数等于w。我们吧这个x的导数乘cost就是前一层的cost。这就是传说中的反向传播算法(BP, backpropagation)。在开始反向传播之前, 我们要调用f函数让整个网络计算一遍,以此获得最后一层的cost。这个过程叫做向前传播(Feed Forward)。


void train(double[] in, double[] expect, double rate) {
double[] y = f(in);
double[] cost = sub(expect, y);
double[][][] dw = new double[layers][][];
double[][] db = new double[layers][];
dw[0] = new double[0][0];
db[0] = new double[0];
for (int i = layers - 1; i > 0; i--) {
double[] sp = signmoidPrime(zs[i]);
cost = mul(cost, sp);
dw[i] = dw(xs[i - 1], cost);
db[i] = cost;
cost = dx(weights[i], cost);

weights = add(weights, mul(dw, rate));
bias = add(bias, mul(db, rate));



double[] dx(double[][] w, double[] c) {
int numberOfX = w[0].length;
double[] v = new double[numberOfX];
for (int i = 0; i < numberOfX; i++) {
for (int j = 0; j < c.length; j++) {
v[i] += w[j][i] * c[j];
return v;



double[][] dw(double[] x, double[] c) {
int numberOfNeuron = c.length;
int numberOfIn = x.length;
double[][] dw = new double[numberOfNeuron][numberOfIn];
for (int neuron = 0; neuron < numberOfNeuron; neuron++) {
for (int input = 0; input < numberOfIn; input++) {
dw[neuron][input] = c[neuron] * x[input];
return dw;








package com.luoxq.ann;

import static java.lang.Math.exp;

public class NeuralNetwork {
int[] shape;
int layers;
double[][][] weights;
double[][] bias;
double[][] zs;
double[][] xs;

public NeuralNetwork(int... shape) {
this.shape = shape;
layers = shape.length;
weights = new double[layers][][];
bias = new double[layers][];
//First layer is input layer, no weight
       weights[0] = new double[0][0];
bias[0] = new double[0];
zs = new double[layers][];
xs = new double[layers][];
for (int i = 1; i < layers; i++) {
weights[i] = new double[this.shape[i]][this.shape[i - 1]];
bias[i] = new double[this.shape[i]];

Random rand = new Random();

void fillRandom(double[] d) {
for (int i = 0; i < d.length; i++) {
d[i] = rand.nextGaussian();

void fillRandom(double[][] d) {
for (int i = 0; i < d.length; i++) {

void fillRandom(double[][][] d) {
for (int i = 0; i < d.length; i++) {
    double[] f(double[] in) {
zs[0] = xs[0] = in;
for (int i = 1; i < layers; i++) {
zs[i] = add(wx(xs[i - 1], weights[i]), bias[i]);
xs[i] = sigmoid(zs[i]);
return xs[layers - 1];

double sigmoid(double d) {
return 1.0 / (1.0 + exp(-d));

double[] sigmoid(double[] d) {
int length = d.length;
double[] v = new double[length];
for (int i = 0; i < length; i++) {
v[i] = sigmoid(d[i]);
return v;

double[] wx(double[] x, double[][] weight) {
int numberOfNeron = weight.length;
double[] wx = new double[numberOfNeron];
for (int i = 0; i < numberOfNeron; i++) {
wx[i] = dot(weight[i], x);//SUM(w*x)
return wx;

void train(double[] in, double[] expect, double rate) {
double[] y = f(in);
double[] cost = sub(expect, y);
double[][][] dw = new double[layers][][];
double[][] db = new double[layers][];
dw[0] = new double[0][0];
db[0] = new double[0];
for (int i = layers - 1; i > 0; i--) {
double[] sp = signmoidPrime(zs[i]);
cost = mul(cost, sp);
dw[i] = dw(xs[i - 1], cost);
db[i] = cost;
cost = dx(weights[i], cost);

weights = add(weights, mul(dw, rate));
bias = add(bias, mul(db, rate));

double[] signmoidPrime(double d[]) {
int length = d.length;
double[] v = new double[length];
for (int i = 0; i < length; i++) {
v[i] = sigmoidPrime(d[i]);
return v;

double sigmoidPrime(double d) {
return sigmoid(d) * (1 - sigmoid(d));

double[] sub(double[] a, double[] b) {
int len = a.length;
double[] v = new double[len];
for (int i = 0; i < len; i++) {
v[i] = a[i] - b[i];
return v;

//derivative of x is w*c and sum for each x
   double[] dx(double[][] w, double[] c) {
int numberOfX = w[0].length;
double[] v = new double[numberOfX];
for (int i = 0; i < numberOfX; i++) {
for (int j = 0; j < c.length; j++) {
v[i] += w[j][i] * c[j];
return v;

//derivative of w is x*c for each c and each x
   double[][] dw(double[] x, double[] c) {
int numberOfNeuron = c.length;
int numberOfIn = x.length;
double[][] dw = new double[numberOfNeuron][numberOfIn];
for (int neuron = 0; neuron < numberOfNeuron; neuron++) {
for (int input = 0; input < numberOfIn; input++) {
dw[neuron][input] = c[neuron] * x[input];
return dw;

   double[] mul(double[] v, double[] x) {
double[] d = new double[v.length];
for (int i = 0; i < v.length; i++) {
d[i] = v[i] * x[i];
return d;

double[][][] mul(double[][][] a, double b) {
double[][][] v = new double[a.length][][];
for (int i = 0; i < a.length; i++) {
v[i] = mul(a[i], b);
return v;

double[][] mul(double[][] a, double b) {
double[][] v = new double[a.length][];
for (int i = 0; i < a.length; i++) {
v[i] = mul(a[i], b);
return v;

double[] mul(double[] a, double b) {
double[] d = new double[a.length];
for (int i = 0; i < a.length; i++) {
d[i] = a[i] * b;
return d;

double[][][] add(double[][][] a, double[][][] b) {
double[][][] v = new double[a.length][][];
for (int i = 0; i < a.length; i++) {
v[i] = add(a[i], b[i]);
return v;

double[][] add(double[][] a, double[][] b) {
int length = a.length;
double[][] v = new double[length][];
for (int i = 0; i < length; i++) {
v[i] = add(a[i], b[i]);
return v;

double[] add(double[] a, double[] b) {
int length = a.length;
double[] v = new double[length];
for (int i = 0; i < length; i++) {
v[i] = a[i] + b[i];
return v;

double dot(double[] w, double[] x) {
double v = 0;
for (int i = 0; i < w.length; i++) {
v += w[i] * x[i];
return v;












    下面我们通过运行我们的神经网络代码来看看上面的过程。我们编写一个七段数码管的程序来使用我们的神经网络类。同时给我们的神经网络类加一个 dump方法来输出其w和b。



package com.luoxq.ann;

import java.text.NumberFormat;

public class Test7 {

static double[][] x = {
{1, 1, 1, 0, 1, 1, 1},
{0, 0, 1, 0, 0, 1, 0},
{1, 0, 1, 1, 1, 0, 1},
{1, 0, 1, 1, 0, 1, 1},
{0, 1, 1, 1, 0, 1, 0},
{1, 1, 0, 1, 0, 1, 1},
{1, 1, 0, 1, 1, 1, 1},
{1, 0, 1, 0, 0, 1, 0},
{1, 1, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 0, 1, 1}

static double[][] expect = {
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 1, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 1, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 1, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 1, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 1, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 1, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 1}

public static void main(String... args) {
NeuralNetwork nn = new NeuralNetwork(7, 10);

System.out.println("Testing: ");
double cost = getCost(nn);
System.out.println("Cost before train: " + cost);
System.out.println("Dump: \n" + nn.dump());
int epochs = 1000;
double rate = 10;

for (int epoch = 0; epoch < epochs; epoch++) {
for (int sample = 0; sample < x.length; sample++)
nn.train(x[sample], expect[sample], rate);
System.out.println("Testing: ");
cost = getCost(nn);
System.out.println("Cost after train: " + cost);
System.out.println("Dump: \n" + nn.dump());

static double getCost(NeuralNetwork nn) {
double cost = 0;
for (int i = 0; i < x.length; i++) {
double[] y = nn.f(x[i]);
System.out.println("output of " + i + ": " + toString(y));
double[] exp = expect[i];
cost += getCost(y, exp);
return cost;

static double getCost(double[] y, double[] exp) {
double cost = 0;
for (int j = 0; j < y.length; j++) {
double diff = Math.abs(y[j] - exp[j]);
cost += diff;
return cost;

static String toString(double[] d) {
NumberFormat nf = NumberFormat.getInstance();
StringBuilder sb = new StringBuilder();
for (double dd : d) {
return sb.toString();







output of 0: 0.03,0.15,0.84,0.27,0.97,0.84,0.06,0.82,0.33,0.88,

output of 1: 0.42,0.50,0.34,0.63,0.94,0.19,0.13,0.97,0.48,0.39,

output of 2: 0.51,0.69,0.88,0.09,0.97,0.68,0.39,0.82,0.38,0.92,

output of 3: 0.20,0.48,0.68,0.77,0.85,0.27,0.66,0.93,0.35,0.70,

output of 4: 0.16,0.73,0.53,0.83,0.80,0.23,0.08,0.92,0.11,0.84,

output of 5: 0.02,0.15,0.86,0.85,0.60,0.61,0.32,0.69,0.13,0.97,

output of 6: 0.05,0.29,0.94,0.57,0.94,0.87,0.23,0.59,0.10,0.99,

output of 7: 0.14,0.34,0.40,0.44,0.92,0.09,0.41,0.93,0.34,0.35,

output of 8: 0.08,0.51,0.88,0.48,0.96,0.77,0.18,0.78,0.14,0.95,

output of 9: 0.03,0.31,0.76,0.80,0.68,0.43,0.25,0.85,0.18,0.88,

Cost before train: 51.194078898042235



Neuron_0{weights: -1.47,-2.26,0.53,0.94,1.20,-0.22,-0.51,,bias:-0.6408124820167941}

Neuron_1{weights: -0.67,-0.75,0.93,1.74,0.85,-0.03,-1.15,,bias:-0.8842719020941838}

Neuron_2{weights: 0.28,0.42,-0.64,0.37,0.88,-0.34,0.76,,bias:0.31442792363574007}

Neuron_3{weights: -0.76,0.17,-0.37,0.91,-1.43,2.03,0.51,,bias:-1.1351774514157895}

Neuron_4{weights: -0.36,-0.98,0.34,-0.43,2.33,0.76,-0.27,,bias:1.711100711025143}

Neuron_5{weights: -0.82,0.73,-0.73,-0.44,1.47,-0.31,1.73,,bias:-0.4357646167318964}

Neuron_6{weights: 1.58,-1.74,-0.32,1.28,-0.45,0.64,-0.26,,bias:-2.265955204876941}

Neuron_7{weights: -0.86,-0.83,0.92,-0.21,-0.43,0.59,0.15,,bias:1.9655563173752257}

Neuron_8{weights: -0.56,-0.89,0.37,-1.10,-0.27,-0.42,1.12,,bias:-0.03699667305814675}

Neuron_9{weights: -0.16,1.19,-1.49,0.89,0.89,-0.66,0.55,,bias:1.6994223062815141}




output of 0: 0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.02,0.00,

output of 1: 0.00,0.99,0.00,0.01,0.01,0.00,0.00,0.01,0.00,0.00,

output of 2: 0.00,0.00,0.99,0.01,0.00,0.00,0.00,0.00,0.02,0.00,

output of 3: 0.00,0.00,0.00,0.99,0.00,0.00,0.00,0.01,0.00,0.01,

output of 4: 0.00,0.01,0.00,0.00,0.99,0.00,0.00,0.00,0.02,0.01,

output of 5: 0.00,0.00,0.00,0.01,0.00,0.99,0.01,0.00,0.00,0.01,

output of 6: 0.00,0.00,0.00,0.00,0.00,0.01,0.99,0.00,0.02,0.00,

output of 7: 0.00,0.01,0.00,0.01,0.00,0.00,0.00,0.99,0.00,0.00,

output of 8: 0.00,0.00,0.01,0.00,0.00,0.00,0.00,0.00,0.97,0.00,

output of 9: 0.00,0.00,0.00,0.01,0.00,0.01,0.00,0.00,0.00,0.98,

Cost after train: 0.4031423588581169



Neuron_0{weights: -2.03,-2.29,-1.44,0.38,0.66,-1.65,-1.06,,bias:-2.604111172261378}

Neuron_1{weights: -8.93,-4.98,2.19,-4.30,0.12,1.76,-3.36,,bias:0.3819736542834827}

Neuron_2{weights: -0.24,-3.06,0.33,1.09,3.72,-7.14,0.25,,bias:-0.2331994457380536}

Neuron_3{weights: -0.22,-9.42,-0.22,3.63,-7.17,1.86,5.71,,bias:-6.500570025353018}

Neuron_4{weights: -5.42,4.92,-0.58,4.43,-0.16,-1.89,-5.23,,bias:-2.255547223858806}

Neuron_5{weights: -0.52,1.54,-8.94,1.27,-8.94,-0.15,2.45,,bias:-0.30233681526958145}

Neuron_6{weights: 0.40,-1.18,-11.00,0.61,9.12,0.17,-0.97,,bias:-3.605913007762964}

Neuron_7{weights: 8.87,-3.42,-1.88,-4.26,-2.53,-1.71,-5.16,,bias:-0.8965156667897005}

Neuron_8{weights: -3.51,4.48,7.53,7.61,12.85,3.15,-1.70,,bias:-26.873685962813397}

Neuron_9{weights: 3.77,8.24,8.17,-1.33,-10.65,-9.58,4.56,,bias:-9.942385633642232}











