



隐含层中每一个值 H j H_j Hj都是由输入层的数据进行线性运算与非线性运算的结合得到的。
前向传播(Forward Propagation): 在前向传播中,神经网络将输入数据通过一系列的权重和激活函数的计算,逐层向前传递,最终生成预测结果。具体步骤如下:

  • 将输入数据传递给第一层(输入层),每个输入与对应的神经元相连接。
  • 对于每一层,计算该层的加权和,加权和等于前一层的输出与权重的乘积之和,并加上偏置项。
  • 对加权和进行激活函数的计算,例如Sigmoid、ReLU等,得到该层的输出。本例使用sigmod
  • 将该层的输出作为下一层的输入,继续进行加权和和激活函数的计算,直到达到输出层,输出最终的预测结果。


反向传播(Backward Propagation): 在反向传播过程中,通过计算损失函数的梯度,将误差从输出层传递回输入层,以便调整网络中的权重和偏置项。具体步骤如下:

  • 计算输出层的预测误差,根据预测结果与真实标签之间的差异。 通过使用链式法则,将输出层的误差传递回前一层,计算前一层的误差。本例使用均方误差
  • 更新网络中的权重和偏置项,以最小化损失函数。这可以通过梯度下降等优化算法来实现,其中每个权重和偏置项的更新方向与其对应的梯度成反方向。
    其中 p p p是待更新变量,另一个是学习率




package bp;

import weka.core.Instances;

import java.util.Arrays;
import java.util.Random;
public abstract class GeneralAnn {

     * The whole dataset.
    Instances dataset;

     * Number of layers. It is counted according to nodes instead of edges.
    int numLayers;

     * The number of nodes for each layer, e.g., [3, 4, 6, 2] means that there
     * are 3 input nodes (conditional attributes), 2 hidden layers with 4 and 6
     * nodes, respectively, and 2 class values (binary classification).
    int[] layerNumNodes;

     * Momentum coefficient.
    public double mobp;

     * Learning rate.
    public double learningRate;

     * For random number generation.
    Random random = new Random();

     * The first constructor.
     *  @param paraFilename
     *            The arff filename.
     * @param paraLayerNumNodes
     *            The number of nodes for each layer (may be different).
     * @param paraLearningRate
 *            Learning rate.
     * @param paraMobp
    public  GeneralAnn(String paraFilename, int[] paraLayerNumNodes, double paraLearningRate,
                           double paraMobp) {
        // Step 1. Read data.
        try {
            FileReader tempReader = new FileReader(paraFilename);
            dataset = new Instances(tempReader);
                // The last attribute is the decision class.
            dataset.setClassIndex(dataset.numAttributes() - 1);
        } catch (Exception ee) {
            System.out.println("Error occurred while trying to read \'" + paraFilename
                    + "\' in GeneralAnn constructor.\r\n" + ee);
        } // Of try

        // Step 2. Accept parameters.
        layerNumNodes = paraLayerNumNodes;
        numLayers = layerNumNodes.length;
        // Adjust if necessary.
        layerNumNodes[0] = dataset.numAttributes() - 1;
        layerNumNodes[numLayers - 1] = dataset.numClasses();
        learningRate = paraLearningRate;
        mobp = paraMobp;
    }//Of the first constructor

     * Forward prediction.
     * @param paraInput
     *            The input data of one instance.
     * @return The data at the output end.
    public abstract double[] forward(double[] paraInput);

     * Back propagation.
     * @param paraTarget
     *            For 3-class data, it is [0, 0, 1], [0, 1, 0] or [1, 0, 0].
    public abstract void backPropagation(double[] paraTarget);

     * Train using the dataset.
    public void train() {
        double[] tempInput = new double[dataset.numAttributes() - 1];
        double[] tempTarget = new double[dataset.numClasses()];
        for (int i = 0; i < dataset.numInstances(); i++) {
            // Fill the data.
            for (int j = 0; j < tempInput.length; j++) {
                tempInput[j] = dataset.instance(i).value(j);
            } // Of for j

            // Fill the class label.
            Arrays.fill(tempTarget, 0);
            tempTarget[(int) dataset.instance(i).classValue()] = 1;

            // Train with this instance.
        } // Of for i
    }// Of train

     * Get the index corresponding to the max value of the array.
     * @return the index.
    public static int argmax(double[] paraArray) {
        int resultIndex = -1;
        double tempMax = -1e10;
        for (int i = 0; i < paraArray.length; i++) {
            if (tempMax < paraArray[i]) {
                tempMax = paraArray[i];
                resultIndex = i;
            } // Of if
        } // Of for i

        return resultIndex;
    }// Of argmax

     * Test using the dataset.
     * @return The precision.
    public double test() {
        double[] tempInput = new double[dataset.numAttributes() - 1];
        double tempNumCorrect = 0;
        double[] tempPrediction;
        int tempPredictedClass = -1;

        for (int i = 0; i < dataset.numInstances(); i++) {
            // Fill the data.
            for (int j = 0; j < tempInput.length; j++) {
                tempInput[j] = dataset.instance(i).value(j);
            } // Of for j

            // Train with this instance.
            tempPrediction = forward(tempInput);
            //System.out.println("prediction: " + Arrays.toString(tempPrediction));
            tempPredictedClass = argmax(tempPrediction);
            if (tempPredictedClass == (int) dataset.instance(i).classValue()) {
            } // Of if
        } // Of for i

        System.out.println("Correct: " + tempNumCorrect + " out of " + dataset.numInstances());

        return tempNumCorrect / dataset.numInstances();
    }// Of test

}//Of class GeneralAnn



package bp;

 * Back-propagation neural networks. The code comes from
 * /s?__biz=MjM5MjAwODM4MA==&mid=402665740&idx=1&sn=18d84d72934e59ca8bcd828782172667
 * @author 彭渊 revised by

public class SimpleAnn extends GeneralAnn{

     * The value of each node that changes during the forward process. The first
     * dimension stands for the layer, and the second stands for the node.
    public double[][] layerNodeValues;

     * The error on each node that changes during the back-propagation process.
     * The first dimension stands for the layer, and the second stands for the
     * node.
    public double[][] layerNodeErrors;

     * The weights of edges. The first dimension stands for the layer, the
     * second stands for the node index of the layer, and the third dimension
     * stands for the node index of the next layer.
    public double[][][] edgeWeights;

     * The change of edge weights. It has the same size as edgeWeights.
    public double[][][] edgeWeightsDelta;

     * The first constructor.
     * @param paraFilename
     *            The arff filename.
     * @param paraLayerNumNodes
     *            The number of nodes for each layer (may be different).
     * @param paraLearningRate
     *            Learning rate.
     * @param paraMobp
     *            Momentum coefficient.
    public SimpleAnn(String paraFilename, int[] paraLayerNumNodes, double paraLearningRate,
                     double paraMobp) {
        super(paraFilename, paraLayerNumNodes, paraLearningRate, paraMobp);

        // Step 1. Across layer initialization.
        layerNodeValues = new double[numLayers][];
        layerNodeErrors = new double[numLayers][];
        edgeWeights = new double[numLayers - 1][][];
        edgeWeightsDelta = new double[numLayers - 1][][];

        // Step 2. Inner layer initialization.
        for (int l = 0; l < numLayers; l++) {
            layerNodeValues[l] = new double[layerNumNodes[l]];
            layerNodeErrors[l] = new double[layerNumNodes[l]];

            // One less layer because each edge crosses two layers.
            if (l + 1 == numLayers) {
            } // of if

            // In layerNumNodes[l] + 1, the last one is reserved for the offset.
            edgeWeights[l] = new double[layerNumNodes[l] + 1][layerNumNodes[l + 1]];
            edgeWeightsDelta[l] = new double[layerNumNodes[l] + 1][layerNumNodes[l + 1]];
            for (int j = 0; j < layerNumNodes[l] + 1; j++) {
                for (int i = 0; i < layerNumNodes[l + 1]; i++) {
                    // Initialize weights.
                    edgeWeights[l][j][i] = random.nextDouble();
                } // Of for i
            } // Of for j
        } // Of for l
    }// Of the constructor

     * Forward prediction.
     * @param paraInput
     *            The input data of one instance.
     * @return The data at the output end.
    public double[] forward(double[] paraInput) {
        // Initialize the input layer.
        for (int i = 0; i < layerNodeValues[0].length; i++) {
            layerNodeValues[0][i] = paraInput[i];
        } // Of for i

        // Calculate the node values of each layer.
        double z;
        for (int l = 1; l < numLayers; l++) {
            for (int j = 0; j < layerNodeValues[l].length; j++) {
                // Initialize according to the offset, which is always +1
                z = edgeWeights[l - 1][layerNodeValues[l - 1].length][j];
                // Weighted sum on all edges for this node.
                for (int i = 0; i < layerNodeValues[l - 1].length; i++) {
                    z += edgeWeights[l - 1][i][j] * layerNodeValues[l - 1][i];
                } // Of for i

                // Sigmoid activation.
                // This line should be changed for other activation functions.
                layerNodeValues[l][j] = 1 / (1 + Math.exp(-z));
            } // Of for j
        } // Of for l

        return layerNodeValues[numLayers - 1];
    }// Of forward

     * Back propagation and change the edge weights.
     * @param paraTarget
     *            For 3-class data, it is [0, 0, 1], [0, 1, 0] or [1, 0, 0].
    public void backPropagation(double[] paraTarget) {
        // Step 1. Initialize the output layer error.
        int l = numLayers - 1;
        for (int j = 0; j < layerNodeErrors[l].length; j++) {
            layerNodeErrors[l][j] = layerNodeValues[l][j] * (1 - layerNodeValues[l][j])
                    * (paraTarget[j] - layerNodeValues[l][j]);
        } // Of for j

        // Step 2. Back-propagation even for l == 0
        while (l > 0) {
            // Layer l, for each node.
            for (int j = 0; j < layerNumNodes[l]; j++) {
                double z = 0.0;
                // For each node of the next layer.
                for (int i = 0; i < layerNumNodes[l + 1]; i++) {
                    if (l > 0) {
                        z += layerNodeErrors[l + 1][i] * edgeWeights[l][j][i];
                    } // Of if

                    // Weight adjusting.
                    edgeWeightsDelta[l][j][i] = mobp * edgeWeightsDelta[l][j][i]
                            + learningRate * layerNodeErrors[l + 1][i] * layerNodeValues[l][j];
                    edgeWeights[l][j][i] += edgeWeightsDelta[l][j][i];
                    if (j == layerNumNodes[l] - 1) {
                        // Weight adjusting for the offset part.
                        edgeWeightsDelta[l][j + 1][i] = mobp * edgeWeightsDelta[l][j + 1][i]
                                + learningRate * layerNodeErrors[l + 1][i];
                        edgeWeights[l][j + 1][i] += edgeWeightsDelta[l][j + 1][i];
                    } // Of if
                } // Of for i

                // Record the error according to the differential of Sigmoid.
                // This line should be changed for other activation functions.
                layerNodeErrors[l][j] = layerNodeValues[l][j] * (1 - layerNodeValues[l][j]) * z;
            } // Of for j
        } // Of while
    }// Of backPropagation

     * Test the algorithm.
    public static void main(String[] args) {
        int[] tempLayerNodes = { 4, 8, 8, 3 };
        SimpleAnn tempNetwork = new SimpleAnn("C:\\Users\\hp\\Desktop\\deepLearning\\src\\main\\java\\resources\\iris.arff", tempLayerNodes, 0.01,

        for (int round = 0; round < 5000; round++) {
        } // Of for n

        double tempAccuracy = tempNetwork.test();
        System.out.println("The accuracy is: " + tempAccuracy);
    }// Of main
}// Of class SimpleAnn

  • 1
  • 6
    觉得还不错? 一键收藏
  • 0
Java神经网络是一种使用Java编程语言构建和实现神经网络模型。Encog是一个流行的Java机器学习框架,它提供了丰富的神经网络算法和工具,用于构建、训练和应用神经网络模型。通过Encog,你可以使用Java编写代码来定义神经网络的结构、选择适当的激活函数、设置权重和偏置值,并进行前向传播和反向传播的训练过程。 神经网络训练的过程包括以下几个主要步骤: 1. 初始化神经网络:根据问题的需求,初始化神经网络的结构和参数,包括输入层、隐藏层和输出层的神经元数量、连接权重和偏置值等。 2. 前向传播:将输入数据通过神经网络的连接权重和激活函数进行计算,得到输出结果。 3. 计算误差:将神经网络的输出结果与预期结果进行比较,计算误差值。 4. 反向传播:根据误差值,通过链式法则和梯度下降算法,更新神经网络的连接权重和偏置值,以减小误差。 5. 重复训练:反复进行前向传播和反向传播的训练过程,直到达到预设的训练次数或误差收敛的条件。 Encog提供了多种神经网络训练的方式,其中之一是传播训练,也称为反向传播算法。传播训练是一种基于梯度下降的监督学习方法,通过不断调整连接权重和偏置值,使神经网络的输出结果逼近预期结果。 在Java中,可以使用序列化机制来保存和加载训练好的神经网络模型。Java序列化是一种将对象转换为字节流的机制,可以将Encog的神经网络对象写入磁盘文件或传输到其他系统中进行使用。


  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助




当前余额3.43前往充值 >
领取后你会自动成为博主和红包主的粉丝 规则
钱包余额 0


