日撸代码300行：第71天（BP神经网络基础类）

本文链接：https://blog.csdn.net/u010619558/article/details/132841164

代码来自闵老师”日撸 Java 三百行（71-80天）：

日撸 Java 三百行（71-80天，BP 神经网络）_闵帆的博客-CSDN博客

实现了一个抽象类，包含两个抽象方法forward和backPropagation。

argmax方法是为了找出当前数组的最大值，用于输出层确定最终标签。

方法train用来训练网络，有反向传递环节。

test用于测试，计算分类精度，没有反向传递。

package machinelearning.ann;

import java.io.FileReader;
import java.util.Arrays;
import java.util.Random;

import weka.core.Instances;

/**
 * 
 * General ANN. Two methods are abstract: forward and backPropagation.
 * 
 * @author WX873
 *
 */
public abstract class GeneralAnn {
	
	/**
	 * The whole dataset.
	 */
	Instances dataset;
	
	/**
	 * Number of layers. It is counted according to nodes instead of edges.
	 */
	int numLayers;
	
	/**
	 * The number of nodes for each layer, e.g., [3, 4, 6, 2] means that there
	 * are 3 input nodes (conditional attributes), 2 hidden layers with 4 and 6
	 * nodes, respectively, and 2 class values (binary classification).
	 */
	int[] layerNumNodes;
	
	/**
	 * Momentum coefficient.
	 */
	public double mobp;
	
	/**
	 * Learning rate.
	 */
	public double learningRate;
	
	/**
	 * For random number generation.
	 */
	Random random = new Random();
	
	/**
	 * ********************************************************************
	 * The first constructor.
	 * 
	 * @param paraFilename     The arff filename.
	 * @param paraLayerNumNodes   The number of nodes for each layer (may be different).
	 * @param paraLearningRate    Learning rate.
	 * @param paraMobp           Momentum coefficient.
	 * ********************************************************************
	 */
	public GeneralAnn(String paraFilename, int[] paraLayerNumNodes, double paraLearningRate, double paraMobp) {
		// Step 1. Read data.
		try {
			FileReader tempReader = new FileReader(paraFilename);
			dataset = new Instances(tempReader);
			// The last attribute is the decision class.
			dataset.setClassIndex(dataset.numAttributes() - 1);
			tempReader.close();
		} catch (Exception e) {
			// TODO: handle exception
			System.out.println("Error occurred while trying to read \'" + paraFilename
					+ "\' in GeneralAnn constructor.\r\n" + e);
		}//of try
		
		// Step 2. Accept parameters.
		layerNumNodes = paraLayerNumNodes;
		numLayers = layerNumNodes.length;      //
		// Adjust if necessary.
		layerNumNodes[0] = dataset.numAttributes() - 1;    //第一层节点数等于条件属性个数
		layerNumNodes[numLayers - 1] = dataset.numClasses();   //最后一层节点数等于类别个数
		learningRate = paraLearningRate;
		mobp = paraMobp;
		
	}//of the first constructor
	
	/**
	 * ******************************************
	 * Forward prediction.
	 * 
	 * @param paraInput   The input data of one instance.
	 * @return   The data at the output end.
	 * ******************************************
	 */
	public abstract double[] forward(double[] paraInput);
	
	/**
	 * ***********************************************
	 * Back propagation.
	 * 
	 * @param paraTarget  For 3-class data, it is [0, 0, 1], [0, 1, 0] or [1, 0, 0].
	 * ***********************************************
	 */
	public abstract void backPropagation(double[] paraTarget);
	
	/**
	 * ************************************************
	 * Train using the dataset.
	 * ************************************************
	 */
	public void train() {
		double[] tempInput = new double[dataset.numAttributes() - 1];
		double[] tempTarget = new double[dataset.numClasses()];
		for (int i = 0; i < dataset.numInstances(); i++) {
			//Fill the data.
			for (int j = 0; j < tempInput.length; j++) {
				tempInput[j] = dataset.instance(i).value(j);
			}//of for j
			
			// Fill the class label.
			Arrays.fill(tempTarget, 0);
			tempTarget[(int) dataset.instance(i).classValue()] = 1;  //标签对应的位置为1，其他位置为0；
			
			//Train with this instance.
			forward(tempInput);
			backPropagation(tempTarget);
		}//of for i
	}//of train
	
	/**
	 * **************************************************
	 * Get the index corresponding to the max value of the array.
	 * @param paraArray
	 * @return  The index.
	 * **************************************************
	 */
	public static int argmax(double[] paraArray) {
		int resultIndex = -1;
		double tempMax = -1e10;
		for (int i = 0; i < paraArray.length; i++) {
			if (tempMax < paraArray[i]) {
				tempMax = paraArray[i];
				resultIndex = i;
			}//of if
		}//of for i
		
		return resultIndex;
	}//of argmax
	
	/**
	 * *****************************************************
	 * Test using the dataset.
	 * 
	 * @return  The precision.
	 * *****************************************************
	 */
	public double test() {
		double[] tempInput = new double[dataset.numAttributes() - 1];
		
		double tempNumCorrect = 0;
		double[] tempPrediction;
		int tempPredictedClass = -1;
		
		for (int i = 0; i < dataset.numInstances(); i++) {
			//Fill the data.
			for (int j = 0; j < tempInput.length; j++) {
				tempInput[j] = dataset.instance(i).value(j);
			}//of for j
			
			// Train with this instance.
			tempPrediction = forward(tempInput);
			tempPredictedClass = argmax(tempPrediction);
			if (tempPredictedClass == (int)dataset.instance(i).classValue()) {
				tempNumCorrect++;
			}//of if
		}//of for i
		
		System.out.println("Correct: " + tempNumCorrect + " out of " + dataset.numInstances());
		
		return tempNumCorrect / dataset.numInstances();
	}//of test
}//of GeneralAnn