学习来源:日撸 Java 三百行(81-90天,CNN 卷积神经网络)_闵帆的博客——CSDN博客
数学操作
一、CNN网络结构
上图为CNN的一个例子,convolution为卷积操作;pooling为池化;fully connected为全连接。
1. 输入层
在处理图像的CNN中,输入层一般代表了一张图片的像素矩阵。
2. 卷积层
卷积层是CNN最重要的部分。与传统的全连接层不同,卷积层中每个节点的输入是上一层神经网络的一小块。以下为卷积层结构示意图:
如上图所示,Input X为输入,Kernel W为卷积核,Output Y为卷积后的结果。卷及操作是从输入X中取一个2x2的矩阵 ,与卷积矩阵 做运算:
3. 池化层
卷积操作过后,对应每有一个卷积核都会得到一张特征图片。尽管数据量比卷积前的图片少了很多,但还是过于庞大,因此通过池化操作来进一步减少数据量。
池化分为两种:最大池化和平均池化。顾名思义,最大池化就是取最大值,平均池化就是取平均值。以最大池化为例:
如上图所示,对于左边的红色2x2矩阵,取最大值6作为代表;对绿色的2x2矩阵取最大值8作为代表,以此类推得到右边池化后的2x2矩阵。
4. 全连接层
全连接层一般包含输出层。在经过多轮卷积和池化处理后,在CNN的最后一般会由全连接层来给出最后的分类结果。使用softmax可以得到当前样例属于不同种类的概率。
二、常用数学操作
1. interface Operator定义了一个算子,主要目的是在对矩阵的每个元素进行一些运算时可以简化代码,增加代码的复用性。以one value为例,可以实现对矩阵的每个元素进行 操作,得到新的矩阵。
2. interface OperatorOnTwo与上一个算子类似,不同之处是支持两个操作数。因此可以简化矩阵的基本运算代码。
3. rot180可以将矩阵旋转180度,通过两次两次翻转实现。
4. convnValid是卷积操作,convnFull为其逆向操作。
5. scaleMatrix为平均值池化,kronecker是池化的逆向操作。
代码如下:
package JavaDay27;
import java.io.Serializable;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
/**
* Math operations.
*
* Adopted from cnn-master.
*
* @author Ke-Xiong Wang.
*/
public class MathUtils {
/**
* An interface for different on-demand operators.
*/
public interface Operator extends Serializable {
public double process(double value);
}// Of interfact Operator
/**
* The one-minus-the-value operator.
*/
public static final Operator one_value = new Operator() {
private static final long serialVersionUID = 3752139491940330714L;
@Override
public double process(double value) {
return 1 - value;
}// Of process
};
/**
* The sigmoid operator.
*/
public static final Operator sigmoid = new Operator() {
private static final long serialVersionUID = -1952718905019847589L;
@Override
public double process(double value) {
return 1 / (1 + Math.pow(Math.E, -value));
}// Of process
};
/**
* An interface for operations with two operators.
*/
interface OperatorOnTwo extends Serializable {
public double process(double a, double b);
}// Of interface OperatorOnTwo
/**
* Plus.
*/
public static final OperatorOnTwo plus = new OperatorOnTwo() {
private static final long serialVersionUID = -6298144029766839945L;
@Override
public double process(double a, double b) {
return a + b;
}// Of process
};
/**
* Multiply.
*/
public static OperatorOnTwo multiply = new OperatorOnTwo() {
private static final long serialVersionUID = -7053767821858820698L;
@Override
public double process(double a, double b) {
return a * b;
}// Of process
};
/**
* Minus.
*/
public static OperatorOnTwo minus = new OperatorOnTwo() {
private static final long serialVersionUID = 7346065545555093912L;
@Override
public double process(double a, double b) {
return a - b;
}// Of process
};
/**
***********************
* Print a matrix
***********************
*/
public static void printMatrix(double[][] matrix) {
for (int i = 0; i < matrix.length; i++) {
String line = Arrays.toString(matrix[i]);
line = line.replaceAll(", ", "\t");
System.out.println(line);
} // Of for i
System.out.println();
}// Of printMatrix
/**
***********************
* Rotate the matrix 180 degrees.
***********************
*/
public static double[][] rot180(double[][] matrix) {
matrix = cloneMatrix(matrix);
int m = matrix.length;
int n = matrix[0].length;
for (int i = 0; i < m; i++) {
for (int j = 0; j < n / 2; j++) {
double tmp = matrix[i][j];
matrix[i][j] = matrix[i][n - 1 - j];
matrix[i][n - 1 - j] = tmp;
}
}
for (int j = 0; j < n; j++) {
for (int i = 0; i < m / 2; i++) {
double tmp = matrix[i][j];
matrix[i][j] = matrix[m - 1 - i][j];
matrix[m - 1 - i][j] = tmp;
}
}
return matrix;
}// Of rot180
private static Random myRandom = new Random(2);
/**
***********************
* Generate a random matrix with the given size. Each value takes value in
* [-0.005, 0.095].
***********************
*/
public static double[][] randomMatrix(int x, int y, boolean b) {
double[][] matrix = new double[x][y];
// int tag = 1;
for (int i = 0; i < x; i++) {
for (int j = 0; j < y; j++) {
matrix[i][j] = (myRandom.nextDouble() - 0.05) / 10;
} // Of for j
} // Of for i
return matrix;
}// Of randomMatrix
/**
***********************
* Generate a random array with the given length. Each value takes value in
* [-0.005, 0.095].
***********************
*/
public static double[] randomArray(int len) {
double[] data = new double[len];
for (int i = 0; i < len; i++) {
//data[i] = myRandom.nextDouble() / 10 - 0.05;
data[i] = 0;
} // Of for i
return data;
}// Of randomArray
/**
***********************
* Generate a random perm with the batch size.
***********************
*/
public static int[] randomPerm(int size, int batchSize) {
Set<Integer> set = new HashSet<Integer>();
while (set.size() < batchSize) {
set.add(myRandom.nextInt(size));
}
int[] randPerm = new int[batchSize];
int i = 0;
for (Integer value : set)
randPerm[i++] = value;
return randPerm;
}// Of randomPerm
/**
***********************
* Clone a matrix. Do not use it reference directly.
***********************
*/
public static double[][] cloneMatrix(final double[][] matrix) {
final int m = matrix.length;
int n = matrix[0].length;
final double[][] outMatrix = new double[m][n];
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
outMatrix[i][j] = matrix[i][j];
} // Of for j
} // Of for i
return outMatrix;
}// Of cloneMatrix
/**
***********************
* Matrix operation with the given operator on single operand.
***********************
*/
public static double[][] matrixOp(final double[][] ma, Operator operator) {
final int m = ma.length;
int n = ma[0].length;
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
ma[i][j] = operator.process(ma[i][j]);
} // Of for j
} // Of for i
return ma;
}// Of matrixOp
/**
***********************
* Matrix operation with the given operator on two operands.
***********************
*/
public static double[][] matrixOp(final double[][] ma, final double[][] mb,
final Operator operatorA, final Operator operatorB, OperatorOnTwo operator) {
final int m = ma.length;
int n = ma[0].length;
if (m != mb.length || n != mb[0].length)
throw new RuntimeException("ma.length:" + ma.length + " mb.length:" +
mb.length);
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
double a = ma[i][j];
if (operatorA != null)
a = operatorA.process(a);
double b = mb[i][j];
if (operatorB != null)
b = operatorB.process(b);
mb[i][j] = operator.process(a, b);
} // Of for j
} // Of for i
return mb;
}// Of matrixOp
/**
***********************
* Extend the matrix to a bigger one (a number of times).
***********************
*/
public static double[][] kronecker(final double[][] matrix, final Size scale) {
final int m = matrix.length;
int n = matrix[0].length;
final double[][] outMatrix = new double[m * scale.width][n * scale.height];
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
for (int ki = i * scale.width; ki < (i + 1) * scale.width; ki++) {
for (int kj = j * scale.height; kj < (j + 1) * scale.height; kj++) {
outMatrix[ki][kj] = matrix[i][j];
}
}
}
}
return outMatrix;
}// Of kronecker
/**
***********************
* Scale the matrix.
***********************
*/
public static double[][] scaleMatrix(final double[][] matrix, final Size scale) {
int m = matrix.length;
int n = matrix[0].length;
final int sm = m / scale.width;
final int sn = n / scale.height;
final double[][] outMatrix = new double[sm][sn];
if (sm * scale.width != m || sn * scale.height != n)
throw new RuntimeException("scale matrix");
final int size = scale.width * scale.height;
for (int i = 0; i < sm; i++) {
for (int j = 0; j < sn; j++) {
double sum = 0.0;
for (int si = i * scale.width; si < (i + 1) * scale.width; si++) {
for (int sj = j * scale.height; sj < (j + 1) * scale.height; sj++) {
sum += matrix[si][sj];
} // Of for sj
} // Of for si
outMatrix[i][j] = sum / size;
} // Of for j
} // Of for i
return outMatrix;
}// Of scaleMatrix
/**
***********************
* Convolution full to obtain a bigger size. It is used in back-propagation.
***********************
*/
public static double[][] convnFull(double[][] matrix, final double[][] kernel) {
int m = matrix.length;
int n = matrix[0].length;
final int km = kernel.length;
final int kn = kernel[0].length;
final double[][] extendMatrix = new double[m + 2 * (km - 1)][n + 2 * (kn - 1)];
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
extendMatrix[i + km - 1][j + kn - 1] = matrix[i][j];
} // Of for j
} // Of for i
return convnValid(extendMatrix, kernel);
}// Of convnFull
/**
***********************
* Convolution operation, from a given matrix and a kernel, sliding and sum
* to obtain the result matrix. It is used in forward.
***********************
*/
public static double[][] convnValid(final double[][] matrix, double[][] kernel) {
// kernel = rot180(kernel);
int m = matrix.length;
int n = matrix[0].length;
final int km = kernel.length;
final int kn = kernel[0].length;
int kns = n - kn + 1;
final int kms = m - km + 1;
final double[][] outMatrix = new double[kms][kns];
for (int i = 0; i < kms; i++) {
for (int j = 0; j < kns; j++) {
double sum = 0.0;
for (int ki = 0; ki < km; ki++) {
for (int kj = 0; kj < kn; kj++)
sum += matrix[i + ki][j + kj] * kernel[ki][kj];
}
outMatrix[i][j] = sum;
}
}
return outMatrix;
}// Of convnValid
/**
***********************
* Convolution on a tensor.
***********************
*/
public static double[][] convnValid(final double[][][][] matrix, int mapNoX,
double[][][][] kernel, int mapNoY) {
int m = matrix.length;
int n = matrix[0][mapNoX].length;
int h = matrix[0][mapNoX][0].length;
int km = kernel.length;
int kn = kernel[0][mapNoY].length;
int kh = kernel[0][mapNoY][0].length;
int kms = m - km + 1;
int kns = n - kn + 1;
int khs = h - kh + 1;
if (matrix.length != kernel.length)
throw new RuntimeException("length");
final double[][][] outMatrix = new double[kms][kns][khs];
for (int i = 0; i < kms; i++) {
for (int j = 0; j < kns; j++)
for (int k = 0; k < khs; k++) {
double sum = 0.0;
for (int ki = 0; ki < km; ki++) {
for (int kj = 0; kj < kn; kj++)
for (int kk = 0; kk < kh; kk++) {
sum += matrix[i + ki][mapNoX][j + kj][k + kk]
* kernel[ki][mapNoY][kj][kk];
}
}
outMatrix[i][j][k] = sum;
}
}
return outMatrix[0];
}// Of convnValid
/**
***********************
* The sigmod operation.
***********************
*/
public static double sigmod(double x) {
return 1 / (1 + Math.pow(Math.E, -x));
}// Of sigmod
/**
***********************
* Sum all values of a matrix.
***********************
*/
public static double sum(double[][] error) {
int m = error.length;
int n = error[0].length;
double sum = 0.0;
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
sum += error[i][j];
}
}
return sum;
}// Of sum
/**
***********************
* Ad hoc sum.
***********************
*/
public static double[][] sum(double[][][][] errors, int j) {
int m = errors[0][j].length;
int n = errors[0][j][0].length;
double[][] result = new double[m][n];
for (int mi = 0; mi < m; mi++) {
for (int nj = 0; nj < n; nj++) {
double sum = 0;
for (int i = 0; i < errors.length; i++)
sum += errors[i][j][mi][nj];
result[mi][nj] = sum;
}
}
return result;
}// Of sum
/**
***********************
* Get the index of the maximal value for the final classification.
***********************
*/
public static int getMaxIndex(double[] out) {
double max = out[0];
int index = 0;
for (int i = 1; i < out.length; i++)
if (out[i] > max) {
max = out[i];
index = i;
}
return index;
}// Of getMaxIndex
}// Of MathUtils