71-80天

谨言慎行324

于 2021-07-31 00:04:01 发布

阅读量132

点赞数

本文链接：https://blog.csdn.net/qq_58459200/article/details/119258057

版权

第 71 天: BP神经网络基础类 (数据读取与基本结构)

代码如下：

package days71_80;

import java.io.FileReader;
import java.util.Arrays;
import java.util.Random;

import weka.core.Instances;


	/**
	 * The whole dataset.
	 */
	Instances dataset;

	/**
	 * Number of layers. It is counted according to nodes instead of edges.
	 */
	int numLayers;

	int[] layerNumNodes;

	/**
	 * Momentum coefficient.
	 */
	public double mobp;

	/**
	 * Learning rate.
	 */
	public double learningRate;

	/**
	 * For random number generation.
	 */
	Random random = new Random();

	public GeneralAnn(String paraFilename, int[] paraLayerNumNodes, double paraLearningRate,
			double paraMobp) {
		// Step 1. Read data.
		try {
			FileReader tempReader = new FileReader(paraFilename);
			dataset = new Instances(tempReader);
			// The last attribute is the decision class.
			dataset.setClassIndex(dataset.numAttributes() - 1);
			tempReader.close();
		} catch (Exception ee) {
			System.out.println("Error occurred while trying to read \'" + paraFilename
					+ "\' in GeneralAnn constructor.\r\n" + ee);
			System.exit(0);
		} // Of try

		// Step 2. Accept parameters.
		layerNumNodes = paraLayerNumNodes;
		numLayers = layerNumNodes.length;
		// Adjust if necessary.
		layerNumNodes[0] = dataset.numAttributes() - 1;
		layerNumNodes[numLayers - 1] = dataset.numClasses();
		learningRate = paraLearningRate;
		mobp = paraMobp;	
	}//Of the first constructor	
	
	
	public abstract double[] forward(double[] paraInput);

	
	public abstract void backPropagation(double[] paraTarget);

	/**
	 ********************
	 * Train using the dataset.
	 ********************
	 */
	public void train() {
		double[] tempInput = new double[dataset.numAttributes() - 1];
		double[] tempTarget = new double[dataset.numClasses()];
		for (int i = 0; i < dataset.numInstances(); i++) {
			// Fill the data.
			for (int j = 0; j < tempInput.length; j++) {
				tempInput[j] = dataset.instance(i).value(j);
			} // Of for j

			// Fill the class label.
			Arrays.fill(tempTarget, 0);
			tempTarget[(int) dataset.instance(i).classValue()] = 1;

			// Train with this instance.
			forward(tempInput);
			backPropagation(tempTarget);
		} // Of for i
	}// Of train


	public static int argmax(double[] paraArray) {
		int resultIndex = -1;
		double tempMax = -1e10;
		for (int i = 0; i < paraArray.length; i++) {
			if (tempMax < paraArray[i]) {
				tempMax = paraArray[i];
				resultIndex = i;
			} // Of if
		} // Of for i

		return resultIndex;
	}// Of argmax

	
	public double test() {
		double[] tempInput = new double[dataset.numAttributes() - 1];

		double tempNumCorrect = 0;
		double[] tempPrediction;
		int tempPredictedClass = -1;

		for (int i = 0; i < dataset.numInstances(); i++) {
			// Fill the data.
			for (int j = 0; j < tempInput.length; j++) {
				tempInput[j] = dataset.instance(i).value(j);
			} // Of for j

			// Train with this instance.
			tempPrediction = forward(tempInput);
			//System.out.println("prediction: " + Arrays.toString(tempPrediction));
			tempPredictedClass = argmax(tempPrediction);
			if (tempPredictedClass == (int) dataset.instance(i).classValue()) {
				tempNumCorrect++;
			} // Of if
		} // Of for i

		System.out.println("Correct: " + tempNumCorrect + " out of " + dataset.numInstances());

		return tempNumCorrect / dataset.numInstances();
	}// Of test
}//Of class GeneralAnn

第 72 天: 固定激活函数的BP神经网络 (1. 网络结构理解)

网络结构和数据通过几个数组确定. 需要结合程序的运行来理解它们.

1.layerNumNodes 表示网络基本结构. 如: [3, 4, 6, 2] 表示:
a) 输入端口有 3 个，即数据有 3 个条件属性. 如果与实际数据不符, 代码会自动纠正, 见 GeneralAnn.java 81 行.
b) 输出端口有 2 个, 即数据的决策类别数为 2. 如果与实际数据不符, 代码会自动纠正, 见 GeneralAnn.java 82 行. 对于分类问题, 数据是哪个类别, 对应于输出值最大的端口.
c) 有两个中间层, 分别为 4 个和 6 个节点.

2.layerNodeValues 表示各网络节点的值. 如上例, 网络的节点有 4 层, 即 layerNodeValues.length 为 4. 总结点数为 3 + 4 + 6 + 2 − 15 3 + 4 + 6 + 2 - 153+4+6+2−15 个, 即 layerNodeValues[0].length = 3, layerNodeValues[1].length = 4, layerNodeValues[2].length = 6, layerNodeValues[3].length = 2. Java 支持这种不规则的矩阵 (不同行的列数不同), 因为二维矩阵被当作一维向量的一维向量.
3.ayerNodeErrors 表示各网络节点上的误差. 该数组大小于 layerNodeValues 一致.

4.edgeWeights 表示各条边的权重. 由于两层之间的边为多对多关系 (二维数组), 多个层的边就成了三维数组. 例如, 上面例子的第 0 层就应该有 ( 3 + 1 ) × 4 = 16 (3+1) \times 4 = 16(3+1)×4=16 条边, 这里 + 1 +1+1 表示有偏移量 offset. 总共的层数为 4 − 1 = 3 4 - 1 = 34−1=3, 即边的层数要比节点层数少 1. 这也是写程序过程中非常容易出错的地方.
5.edgeWeightsDelta 与 edgeWeights 具有相同大小, 它辅助后者进行调整.
下面才是核心代码.

package days71_80;


public class SimpleAnn extends GeneralAnn{

	/**
	 * The value of each node that changes during the forward process. The first
	 * dimension stands for the layer, and the second stands for the node.
	 */
	public double[][] layerNodeValues;

	/**
	 * The error on each node that changes during the back-propagation process.
	 * The first dimension stands for the layer, and the second stands for the
	 * node.
	 */
	public double[][] layerNodeErrors;

	/**
	 * The weights of edges. The first dimension stands for the layer, the
	 * second stands for the node index of the layer, and the third dimension
	 * stands for the node index of the next layer.
	 */
	public double[][][] edgeWeights;

	/**
	 * The change of edge weights. It has the same size as edgeWeights.
	 */
	public double[][][] edgeWeightsDelta;


	public SimpleAnn(String paraFilename, int[] paraLayerNumNodes, double paraLearningRate,
			double paraMobp) {
		super(paraFilename, paraLayerNumNodes, paraLearningRate, paraMobp);

		// Step 1. Across layer initialization.
		layerNodeValues = new double[numLayers][];
		layerNodeErrors = new double[numLayers][];
		edgeWeights = new double[numLayers - 1][][];
		edgeWeightsDelta = new double[numLayers - 1][][];

		// Step 2. Inner layer initialization.
		for (int l = 0; l < numLayers; l++) {
			layerNodeValues[l] = new double[layerNumNodes[l]];
			layerNodeErrors[l] = new double[layerNumNodes[l]];

			// One less layer because each edge crosses two layers.
			if (l + 1 == numLayers) {
				break;
			} // of if

			// In layerNumNodes[l] + 1, the last one is reserved for the offset.
			edgeWeights[l] = new double[layerNumNodes[l] + 1][layerNumNodes[l + 1]];
			edgeWeightsDelta[l] = new double[layerNumNodes[l] + 1][layerNumNodes[l + 1]];
			for (int j = 0; j < layerNumNodes[l] + 1; j++) {
				for (int i = 0; i < layerNumNodes[l + 1]; i++) {
					// Initialize weights.
					edgeWeights[l][j][i] = random.nextDouble();
				} // Of for i
			} // Of for j
		} // Of for l
	}// Of the constructor

	
	public double[] forward(double[] paraInput) {
		// Initialize the input layer.
		for (int i = 0; i < layerNodeValues[0].length; i++) {
			layerNodeValues[0][i] = paraInput[i];
		} // Of for i

		// Calculate the node values of each layer.
		double z;
		for (int l = 1; l < numLayers; l++) {
			for (int j = 0; j < layerNodeValues[l].length; j++) {
				// Initialize according to the offset, which is always +1
				z = edgeWeights[l - 1][layerNodeValues[l - 1].length][j];
				// Weighted sum on all edges for this node.
				for (int i = 0; i < layerNodeValues[l - 1].length; i++) {
					z += edgeWeights[l - 1][i][j] * layerNodeValues[l - 1][i];
				} // Of for i

				// Sigmoid activation.
				// This line should be changed for other activation functions.
				layerNodeValues[l][j] = 1 / (1 + Math.exp(-z));
			} // Of for j
		} // Of for l

		return layerNodeValues[numLayers - 1];
	}// Of forward


	public void backPropagation(double[] paraTarget) {
		// Step 1. Initialize the output layer error.
		int l = numLayers - 1;
		for (int j = 0; j < layerNodeErrors[l].length; j++) {
			layerNodeErrors[l][j] = layerNodeValues[l][j] * (1 - layerNodeValues[l][j])
					* (paraTarget[j] - layerNodeValues[l][j]);
		} // Of for j

		// Step 2. Back-propagation even for l == 0
		while (l > 0) {
			l--;
			// Layer l, for each node.
			for (int j = 0; j < layerNumNodes[l]; j++) {
				double z = 0.0;
				// For each node of the next layer.
				for (int i = 0; i < layerNumNodes[l + 1]; i++) {
					if (l > 0) {
						z += layerNodeErrors[l + 1][i] * edgeWeights[l][j][i];
					} // Of if

					// Weight adjusting.
					edgeWeightsDelta[l][j][i] = mobp * edgeWeightsDelta[l][j][i]
							+ learningRate * layerNodeErrors[l + 1][i] * layerNodeValues[l][j];
					edgeWeights[l][j][i] += edgeWeightsDelta[l][j][i];
					if (j == layerNumNodes[l] - 1) {
						// Weight adjusting for the offset part.
						edgeWeightsDelta[l][j + 1][i] = mobp * edgeWeightsDelta[l][j + 1][i]
								+ learningRate * layerNodeErrors[l + 1][i];
						edgeWeights[l][j + 1][i] += edgeWeightsDelta[l][j + 1][i];
					} // Of if
				} // Of for i

				// Record the error according to the differential of Sigmoid.
				// This line should be changed for other activation functions.
				layerNodeErrors[l][j] = layerNodeValues[l][j] * (1 - layerNodeValues[l][j]) * z;
			} // Of for j
		} // Of while
	}// Of backPropagation


	public static void main(String[] args) {
		int[] tempLayerNodes = { 4, 8, 8, 3 };
		SimpleAnn tempNetwork = new SimpleAnn("D:/data/iris.arff", tempLayerNodes, 0.01,
				0.6);

		for (int round = 0; round < 5000; round++) {
			tempNetwork.train();
		} // Of for n

		double tempAccuray = tempNetwork.test();
		System.out.println("The accuracy is: " + tempAccuray);
	}// Of main
}// Of class SimpleAnn

第 73 天: 固定激活函数的BP神经网络 (2. 训练与测试过程理解)

1.Forward 就是利用当前网络对一条数据进行预测的过程.

2.BackPropagation 就是根据误差进行网络权重调节的过程.

3.训练的时候需要前向与后向, 测试的时候只需要前向.

4.这里只实现了 sigmoid 激活函数, 反向传播时的导数与正向传播时的激活函数相对应. 如果要换激活函数, 需要两个地方同时换.

神经网络这块还没有很好的理解，目前在看教程

第 74 天: 通用BP神经网络 (1. 集中管理激活函数)

激活与求导是一个, 前者用于 forward, 后者用于 back-propagation.
有很多的激活函数, 它们的设计有相应准则, 如分段可导.
查资料补充几个未实现的激活函数.
进一步测试.

代码如下:

package days71_80;



public class Activator {
	/**
	 * Arc tan.
	 */
	public final char ARC_TAN = 'a';

	/**
	 * Elu.
	 */
	public final char ELU = 'e';

	/**
	 * Gelu.
	 */
	public final char GELU = 'g';

	/**
	 * Hard logistic.
	 */
	public final char HARD_LOGISTIC = 'h';

	/**
	 * Identity.
	 */
	public final char IDENTITY = 'i';

	/**
	 * Leaky relu, also known as parametric relu.
	 */
	public final char LEAKY_RELU = 'l';

	/**
	 * Relu.
	 */
	public final char RELU = 'r';

	/**
	 * Soft sign.
	 */
	public final char SOFT_SIGN = 'o';

	/**
	 * Sigmoid.
	 */
	public final char SIGMOID = 's';

	/**
	 * Tanh.
	 */
	public final char TANH = 't';

	/**
	 * Soft plus.
	 */
	public final char SOFT_PLUS = 'u';

	/**
	 * Swish.
	 */
	public final char SWISH = 'w';

	/**
	 * The activator.
	 */
	private char activator;

	/**
	 * Alpha for elu.
	 */
	double alpha;

	/**
	 * Beta for leaky relu.
	 */
	double beta;

	/**
	 * Gamma for leaky relu.
	 */
	double gamma;


	public Activator(char paraActivator) {
		activator = paraActivator;
	}// Of the first constructor

	/**
	 *********************
	 * Setter.
	 *********************
	 */
	public void setActivator(char paraActivator) {
		activator = paraActivator;
	}// Of setActivator

	/**
	 *********************
	 * Getter.
	 *********************
	 */
	public char getActivator() {
		return activator;
	}// Of getActivator

	/**
	 *********************
	 * Setter.
	 *********************
	 */
	void setAlpha(double paraAlpha) {
		alpha = paraAlpha;
	}// Of setAlpha

	/**
	 *********************
	 * Setter.
	 *********************
	 */
	void setBeta(double paraBeta) {
		beta = paraBeta;
	}// Of setBeta

	/**
	 *********************
	 * Setter.
	 *********************
	 */
	void setGamma(double paraGamma) {
		gamma = paraGamma;
	}// Of setGamma

	/**
	 *********************
	 * Activate according to the activation function.
	 *********************
	 */
	public double activate(double paraValue) {
		double resultValue = 0;
		switch (activator) {
		case ARC_TAN:
			resultValue = Math.atan(paraValue);
			break;
		case ELU:
			if (paraValue >= 0) {
				resultValue = paraValue;
			} else {
				resultValue = alpha * (Math.exp(paraValue) - 1);
			} // Of if
			break;
		// case GELU:
		// resultValue = ?;
		// break;
		// case HARD_LOGISTIC:
		// resultValue = ?;
		// break;
		case IDENTITY:
			resultValue = paraValue;
			break;
		case LEAKY_RELU:
			if (paraValue >= 0) {
				resultValue = paraValue;
			} else {
				resultValue = alpha * paraValue;
			} // Of if
			break;
		case SOFT_SIGN:
			if (paraValue >= 0) {
				resultValue = paraValue / (1 + paraValue);
			} else {
				resultValue = paraValue / (1 - paraValue);
			} // Of if
			break;
		case SOFT_PLUS:
			resultValue = Math.log(1 + Math.exp(paraValue));
			break;
		case RELU:
			if (paraValue >= 0) {
				resultValue = paraValue;
			} else {
				resultValue = 0;
			} // Of if
			break;
		case SIGMOID:
			resultValue = 1 / (1 + Math.exp(-paraValue));
			break;
		case TANH:
			resultValue = 2 / (1 + Math.exp(-2 * paraValue)) - 1;
			break;
		// case SWISH:
		// resultValue = ?;
		// break;
		default:
			System.out.println("Unsupported activator: " + activator);
			System.exit(0);
		}// Of switch

		return resultValue;
	}// Of activate

	public double derive(double paraValue, double paraActivatedValue) {
		double resultValue = 0;
		switch (activator) {
		case ARC_TAN:
			resultValue = 1 / (paraValue * paraValue + 1);
			break;
		case ELU:
			if (paraValue >= 0) {
				resultValue = 1;
			} else {
				resultValue = alpha * (Math.exp(paraValue) - 1) + alpha;
			} // Of if
			break;
		// case GELU:
		// resultValue = ?;
		// break;
		// case HARD_LOGISTIC:
		// resultValue = ?;
		// break;
		case IDENTITY:
			resultValue = 1;
			break;
		case LEAKY_RELU:
			if (paraValue >= 0) {
				resultValue = 1;
			} else {
				resultValue = alpha;
			} // Of if
			break;
		case SOFT_SIGN:
			if (paraValue >= 0) {
				resultValue = 1 / (1 + paraValue) / (1 + paraValue);
			} else {
				resultValue = 1 / (1 - paraValue) / (1 - paraValue);
			} // Of if
			break;
		case SOFT_PLUS:
			resultValue = 1 / (1 + Math.exp(-paraValue));
			break;
		case RELU: // Updated
			if (paraValue >= 0) {
				resultValue = 1;
			} else {
				resultValue = 0;
			} // Of if
			break;
		case SIGMOID: // Updated
			resultValue = paraActivatedValue * (1 - paraActivatedValue);
			break;
		case TANH: // Updated
			resultValue = 1 - paraActivatedValue * paraActivatedValue;
			break;
		// case SWISH:
		// resultValue = ?;
		// break;
		default:
			System.out.println("Unsupported activator: " + activator);
			System.exit(0);
		}// Of switch

		return resultValue;
	}// Of derive

	/**
	 *********************
	 * Overrides the method claimed in Object.
	 *********************
	 */
	public String toString() {
		String resultString = "Activator with function '" + activator + "'";
		resultString += "\r\n alpha = " + alpha + ", beta = " + beta + ", gamma = " + gamma;

		return resultString;
	}// Of toString

	/**
	 ********************
	 * Test the class.
	 ********************
	 */
	public static void main(String[] args) {
		Activator tempActivator = new Activator('s');
		double tempValue = 0.6;
		double tempNewValue;
		tempNewValue = tempActivator.activate(tempValue);
		System.out.println("After activation: " + tempNewValue);

		tempNewValue = tempActivator.derive(tempValue, tempNewValue);
		System.out.println("After derive: " + tempNewValue);
	}// Of main
}// Of class Activator

运行结果：

谨言慎行324

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
71-80天

第 71 天: BP神经网络基础类 (数据读取与基本结构)代码如下：package days71_80;import java.io.FileReader;import java.util.Arrays;import java.util.Random;import weka.core.Instances; /** * The whole dataset. */ Instances dataset; /** * Number of layers. It is c.
复制链接

扫一扫