原博文:minfanphd
任务计划
第81天:数据集读取与存储
- 使用了ArrayList,该类对于数据操作比较方便,但会稍微影响效率。
- 对读入的数据进行处理,通过字符分割切分数据。
- 主要作用就是数据集的读取和存储,主要完成的编码就是对数据文件的处理,格式化,存储。
package MachineLearning.cnn;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* @description:数据集
* @learner: Qing Zhang
* @time: 08
*/
public class Dataset {
//所有的样本由list存储
private List<Instance> instances;
//标签下标
private int labelIndex;
//最大标签(标签从0开始)
private double maxLabel = -1;
/**
* @Description: 构造函数
* @Param: []
* @return:
*/
public Dataset() {
labelIndex = -1;
instances = new ArrayList<Instance>();
}
/**
* @Description: 第二个构造函数
* @Param: [paraFilename:文件名, paraSplitSign:分割符,常为',', paraLabelIndex: 标签下标,常为最后一列]
* @return:
*/
public Dataset(String paraFilename, String paraSplitSign, int paraLabelIndex) {
instances = new ArrayList<Instance>();
labelIndex = paraLabelIndex;
File tempFile = new File(paraFilename);
try {
BufferedReader tempReader = new BufferedReader(new FileReader(tempFile));
String tempLine;
while ((tempLine = tempReader.readLine()) != null) {
String[] tempDatum = tempLine.split(paraSplitSign);
if (tempDatum.length == 0) {
continue;
}
double[] tempData = new double[tempDatum.length];
for (int i = 0; i < tempDatum.length; i++)
tempData[i] = Double.parseDouble(tempDatum[i]);
Instance tempInstance = new Instance(tempData);
append(tempInstance);
}
tempReader.close();
} catch (IOException e) {
e.printStackTrace();
System.out.println("Unable to load " + paraFilename);
System.exit(0);
}
}
/**
* @Description: 添加一个样本
* @Param: [paraInstance]
* @return: void
*/
public void append(Instance paraInstance) {
instances.add(paraInstance);
}
/**
* @Description: 添加一个由double类型标签组成的样本
* @Param: [paraAttributes, paraLabel]
* @return: void
*/
public void append(double[] paraAttributes, Double paraLabel) {
instances.add(new Instance(paraAttributes, paraLabel));
}
/**
* @Description: 通过下标获取样本
* @Param: [paraIndex]
* @return: MachineLearning.cnn.Dataset.Instance
*/
public Instance getInstance(int paraIndex) {
return instances.get(paraIndex);
}
/**
* @Description: 获取样本数量
* @Param: []
* @return: int
*/
public int size() {
return instances.size();
}
/**
* @Description: 获取属性数量
* @Param: [paraIndex]
* @return: double[]
*/
public double[] getAttributes(int paraIndex) {
return instances.get(paraIndex).getAttributes();
}
/**
* @Description: 获取样本标签
* @Param: [paraIndex]
* @return: java.lang.Double
*/
public Double getLabel(int paraIndex) {
return instances.get(paraIndex).getLabel();
}
public static void main(String args[]) {
Dataset tempData = new Dataset("F:\\研究生\\研0\\学习\\Java_Study\\data_set\\train.format", ",", 784);
Instance tempInstance = tempData.getInstance(0);
System.out.println("The first instance is: " + tempInstance);
}
/**
***********************
* 一个样本类
***********************
*/
public class Instance {
//条件属性
private double[] attributes;
//标签
private Double label;
/**
* @Description: 第一个构造函数
* @Param: [paraAttrs, paraLabel]
* @return:
*/
private Instance(double[] paraAttrs, Double paraLabel) {
attributes = paraAttrs;
label = paraLabel;
}
/**
* @Description: 第二个构造函数
* @Param: [paraData]
* @return:
*/
public Instance(double[] paraData) {
if (labelIndex == -1)
// 无标签
attributes = paraData;
else {
label = paraData[labelIndex];
if (label > maxLabel) {
// 它是一个新标签
maxLabel = label;
}
if (labelIndex == 0) {
// 第一列为标签
attributes = Arrays.copyOfRange(paraData, 1, paraData.length);
} else {
// 最后一列为标签
attributes = Arrays.copyOfRange(paraData, 0, paraData.length - 1);
}
}
}
/**
* @Description: 获取属性数量
* @Param: []
* @return: double[]
*/
public double[] getAttributes() {
return attributes;
}
/**
* @Description: 获取标签
* @Param: []
* @return: java.lang.Double
*/
public Double getLabel() {
if (labelIndex == -1)
return null;
return label;
}
public String toString(){
return Arrays.toString(attributes) + ", " + label;
}
}
}
第82天:数学操作
package MachineLearning.cnn;
import java.io.Serializable;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
/**
* @description: 数学操作
* @learner: Qing Zhang
* @time: 08
*/
public class MathUtils {
/**
* @Description: 一个为不同需求操作提供的接口
* @Param:
* @return:
*/
public interface Operator extends Serializable {
public double process(double value);
}
/**
* @Description: 1-value的运算
* @Param:
* @return:
*/
public static final Operator one_value = new Operator() {
private static final long serialVersionUID = 3752139491940330714L;
@Override
public double process(double value) {
return 1 - value;
}
};
/**
* @Description: sigmoid 运算
* @Param:
* @return:
*/
public static final Operator sigmoid = new Operator() {
private static final long serialVersionUID = -1952718905019847589L;
@Override
public double process(double value) {
return 1 / (1 + Math.pow(Math.E, -value));
}
};
/**
* @Description: 用于两个值运算的接口
* @Param:
* @return:
*/
interface OperatorOnTwo extends Serializable {
public double process(double a, double b);
}
/**
* @Description: +
* @Param:
* @return:
*/
public static final OperatorOnTwo plus = new OperatorOnTwo() {
private static final long serialVersionUID = -6298144029766839945L;
@Override
public double process(double a, double b) {
return a + b;
}
};
/**
* @Description: *
* @Param:
* @return:
*/
public static OperatorOnTwo multiply = new OperatorOnTwo() {
private static final long serialVersionUID = -7053767821858820698L;
@Override
public double process(double a, double b) {
return a * b;
}
};
/**
* @Description: -
* @Param:
* @return:
*/
public static OperatorOnTwo minus = new OperatorOnTwo() {
private static final long serialVersionUID = 7346065545555093912L;
@Override
public double process(double a, double b) {
return a - b;
}
};
/**
* @Description: 输出一个矩阵
* @Param: [matrix]
* @return: void
*/
public static void printMatrix(double[][] matrix) {
for (int i = 0; i < matrix.length; i++) {
String line = Arrays.toString(matrix[i]);
line = line.replaceAll(", ", "\t");
System.out.println(line);
}
System.out.println();
}
/**
* @Description: 旋转矩阵180degree
* @Param: [matrix]
* @return: double[][]
*/
public static double[][] rot180(double[][] matrix) {
matrix = cloneMatrix(matrix);
int m = matrix.length;
int n = matrix[0].length;
//先将前1/2列与后1/2列对称交换顺序
for (int i = 0; i < m; i++) {
for (int j = 0; j < n / 2; j++) {
double tmp = matrix[i][j];
matrix[i][j] = matrix[i][n - 1 - j];
matrix[i][n - 1 - j] = tmp;
}
}
//再将前1/2行与后1/2行对称交换顺序
for (int j = 0; j < n; j++) {
for (int i = 0; i < m / 2; i++) {
double tmp = matrix[i][j];
matrix[i][j] = matrix[m - 1 - i][j];
matrix[m - 1 - i][j] = tmp;
}
}
return matrix;
}
private static Random myRandom = new Random(2);
/**
* @Description: 通过给定的尺寸产生一个随机矩阵
* 矩阵中每个值的区间为 [-0.005, 0.095].
* @Param: [x, y, b]
* @return: double[][]
*/
public static double[][] randomMatrix(int x, int y, boolean b) {
double[][] matrix = new double[x][y];
for (int i = 0; i < x; i++) {
for (int j = 0; j < y; j++) {
matrix[i][j] = (myRandom.nextDouble() - 0.05) / 10;
}
}
return matrix;
}
/**
* @Description: 通过给定的长度产生一个随机的数组
* 数组中每个值的取值范围为 [-0.005, 0.095].
* @Param: [len]
* @return: double[]
*/
public static double[] randomArray(int len) {
double[] data = new double[len];
for (int i = 0; i < len; i++) {
//data[i] = myRandom.nextDouble() / 10 - 0.05;
data[i] = 0;
}
return data;
}
/**
* @Description: 根据batchSize产生一个随机的perm
* @Param: [size, batchSize]
* @return: int[]
*/
public static int[] randomPerm(int size, int batchSize) {
Set<Integer> set = new HashSet<Integer>();
while (set.size() < batchSize) {
set.add(myRandom.nextInt(size));
}
int[] randPerm = new int[batchSize];
int i = 0;
for (Integer value : set)
randPerm[i++] = value;
return randPerm;
}
/**
* @Description: 克隆一个矩阵。目的是避免直接使用其引用
* @Param: [matrix]
* @return: double[][]
*/
public static double[][] cloneMatrix(final double[][] matrix) {
final int m = matrix.length;
int n = matrix[0].length;
final double[][] outMatrix = new double[m][n];
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
outMatrix[i][j] = matrix[i][j];
}
}
return outMatrix;
}
/**
* @Description: 用给定运算符对单个操作数进行矩阵运算。
* @Param: [ma, operator]
* @return: double[][]
*/
public static double[][] matrixOp(final double[][] ma, Operator operator) {
final int m = ma.length;
int n = ma[0].length;
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
ma[i][j] = operator.process(ma[i][j]);
}
}
return ma;
}
/**
* @Description: 用给定运算符对两个操作数进行矩阵运算。
* @Param: [ma, mb, operatorA, operatorB, operator]
* @return: double[][]
*/
public static double[][] matrixOp(final double[][] ma, final double[][] mb,
final Operator operatorA, final Operator operatorB, OperatorOnTwo operator) {
final int m = ma.length;
int n = ma[0].length;
if (m != mb.length || n != mb[0].length)
throw new RuntimeException("ma.length:" + ma.length + " mb.length:" + mb.length);
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
double a = ma[i][j];
if (operatorA != null)
a = operatorA.process(a);
double b = mb[i][j];
if (operatorB != null)
b = operatorB.process(b);
mb[i][j] = operator.process(a, b);
}
}
return mb;
}
/**
* @Description: 扩充矩阵,将多出来的空间用之前的数据填充
* 池化的逆向操作
* @Param: [matrix, scale]
* @return: double[][]
*/
public static double[][] kronecker(final double[][] matrix, final Size scale) {
final int m = matrix.length;
int n = matrix[0].length;
final double[][] outMatrix = new double[m * scale.width][n * scale.height];
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
for (int ki = i * scale.width; ki < (i + 1) * scale.width; ki++) {
for (int kj = j * scale.height; kj < (j + 1) * scale.height; kj++) {
outMatrix[ki][kj] = matrix[i][j];
}
}
}
}
return outMatrix;
}
/**
* @Description: 放缩矩阵,均值池化
* @Param: [matrix, scale]
* @return: double[][]
*/
public static double[][] scaleMatrix(final double[][] matrix, final Size scale) {
int m = matrix.length;
int n = matrix[0].length;
final int sm = m / scale.width;
final int sn = n / scale.height;
final double[][] outMatrix = new double[sm][sn];
if (sm * scale.width != m || sn * scale.height != n)
throw new RuntimeException("scale matrix");
final int size = scale.width * scale.height;
for (int i = 0; i < sm; i++) {
for (int j = 0; j < sn; j++) {
double sum = 0.0;
for (int si = i * scale.width; si < (i + 1) * scale.width; si++) {
for (int sj = j * scale.height; sj < (j + 1) * scale.height; sj++) {
sum += matrix[si][sj];
}
}
outMatrix[i][j] = sum / size;
}
}
return outMatrix;
}
/**
* @Description: 反卷积以获得一个更大的尺寸。用于反向传播
* @Param: [matrix, kernel]
* @return: double[][]
*/
public static double[][] convnFull(double[][] matrix, final double[][] kernel) {
int m = matrix.length;
int n = matrix[0].length;
final int km = kernel.length;
final int kn = kernel[0].length;
final double[][] extendMatrix = new double[m + 2 * (km - 1)][n + 2 * (kn - 1)];
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
extendMatrix[i + km - 1][j + kn - 1] = matrix[i][j];
}
}
return convnValid(extendMatrix, kernel);
}
/**
* @Description: 卷积运算,由给定的矩阵和核,滑动和求和得到矩阵的结果。它用于前向传播。
* @Param: [matrix, kernel]
* @return: double[][]
*/
public static double[][] convnValid(final double[][] matrix, double[][] kernel) {
// kernel = rot180(kernel);
int m = matrix.length;
int n = matrix[0].length;
final int km = kernel.length;
final int kn = kernel[0].length;
int kns = n - kn + 1;
final int kms = m - km + 1;
final double[][] outMatrix = new double[kms][kns];
for (int i = 0; i < kms; i++) {
for (int j = 0; j < kns; j++) {
double sum = 0.0;
for (int ki = 0; ki < km; ki++) {
for (int kj = 0; kj < kn; kj++)
sum += matrix[i + ki][j + kj] * kernel[ki][kj];
}
outMatrix[i][j] = sum;
}
}
return outMatrix;
}
/**
* @Description: 在张量上卷积
* @Param: [matrix, mapNoX, kernel, mapNoY]
* @return: double[][]
*/
public static double[][] convnValid(final double[][][][] matrix, int mapNoX,
double[][][][] kernel, int mapNoY) {
int m = matrix.length;
int n = matrix[0][mapNoX].length;
int h = matrix[0][mapNoX][0].length;
int km = kernel.length;
int kn = kernel[0][mapNoY].length;
int kh = kernel[0][mapNoY][0].length;
int kms = m - km + 1;
int kns = n - kn + 1;
int khs = h - kh + 1;
if (matrix.length != kernel.length)
throw new RuntimeException("length");
final double[][][] outMatrix = new double[kms][kns][khs];
for (int i = 0; i < kms; i++) {
for (int j = 0; j < kns; j++)
for (int k = 0; k < khs; k++) {
double sum = 0.0;
for (int ki = 0; ki < km; ki++) {
for (int kj = 0; kj < kn; kj++)
for (int kk = 0; kk < kh; kk++) {
sum += matrix[i + ki][mapNoX][j + kj][k + kk]
* kernel[ki][mapNoY][kj][kk];
}
}
outMatrix[i][j][k] = sum;
}
}
return outMatrix[0];
}
/**
* @Description: sigmoid 运算
* @Param: [x]
* @return: double
*/
public static double sigmod(double x) {
return 1 / (1 + Math.pow(Math.E, -x));
}
/**
* @Description: 矩阵所有数值求和
* @Param: [error]
* @return: double
*/
public static double sum(double[][] error) {
int m = error.length;
int n = error[0].length;
double sum = 0.0;
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
sum += error[i][j];
}
}
return sum;
}
/**
* @Description: Ad hoc sum.
* @Param: [errors, j]
* @return: double[][]
*/
public static double[][] sum(double[][][][] errors, int j) {
int m = errors[0][j].length;
int n = errors[0][j][0].length;
double[][] result = new double[m][n];
for (int mi = 0; mi < m; mi++) {
for (int nj = 0; nj < n; nj++) {
double sum = 0;
for (int i = 0; i < errors.length; i++)
sum += errors[i][j][mi][nj];
result[mi][nj] = sum;
}
}
return result;
}
/**
* @Description: 得到最终分类的最大值的索引。
* @Param: [out]
* @return: int
*/
public static int getMaxIndex(double[] out) {
double max = out[0];
int index = 0;
for (int i = 1; i < out.length; i++)
if (out[i] > max) {
max = out[i];
index = i;
}
return index;
}
/**
* @Description: 测试rot180方法
* @Param: []
* @return: void
*/
private static void rot180UnitTest(){
double[][] matrix = {{1,2},{3,4}};
double[][] rotMatrix = rot180(matrix);
System.out.println(Arrays.deepToString(rotMatrix));
}
public static void main(String[] args) {
rot180UnitTest();
}
}
第83天:数学操作 (续)
这里设置了一个二维数组去测试rot180
/**
* @Description: 测试rot180方法
* @Param: []
* @return: void
*/
private static void rot180UnitTest(){
double[][] matrix = {{1,2},{3,4}};
double[][] rotMatrix = rot180(matrix);
System.out.println(Arrays.deepToString(rotMatrix));
}
public static void main(String[] args) {
rot180UnitTest();
}
第84天:网络结构与参数
主要理解多维数组的维度的意义。
package MachineLearning.cnn;
/**
* @description:Cnn网络层
* @learner: Qing Zhang
* @time: 08
*/
public class CnnLayer {
//网络层的类型
LayerTypeEnum type;
//输出映射的数量
int outMapNum;
//映射尺寸
Size mapSize;
//卷积核尺寸
Size kernelSize;
//放缩尺寸
Size scaleSize;
//类标签的下标
int classNum = -1;
//卷积核。维度: [front map][out map][width][height].
private double[][][][] kernel;
//偏差
private double[] bias;
//输出映射. 维度:
//[batchSize][outMapNum][mapSize.width][mapSize.height].
private double[][][][] outMaps;
//错误
private double[][][][] errors;
//用于批处理
private static int recordInBatch = 0;
/**
* @Description: 第一个构造函数
* @Param: [
* paraType:
* When the type is CONVOLUTION, it is the out map number. when
* the type is OUTPUT, it is the class number.
* paraNum,
* paraSize:
* When the type is INPUT, it is the map size; when the type is
* CONVOLUTION, it is the kernel size; when the type is SAMPLING,
* it is the scale size.
* ]
* @return:
*/
public CnnLayer(LayerTypeEnum paraType, int paraNum, Size paraSize) {
type = paraType;
switch (type) {
case INPUT:
outMapNum = 1;
mapSize = paraSize; // No deep copy.
break;
case CONVOLUTION:
outMapNum = paraNum;
kernelSize = paraSize;
break;
case SAMPLING:
scaleSize = paraSize;
break;
case OUTPUT:
classNum = paraNum;
mapSize = new Size(1, 1);
outMapNum = classNum;
break;
default:
System.out.println("Internal error occurred in AbstractLayer.java constructor.");
}
}
/**
* @Description: 初始化卷积核
* @Param: [paraFrontMapNum]
* @return: void
*/
public void initKernel(int paraFrontMapNum) {
kernel = new double[paraFrontMapNum][outMapNum][][];
for (int i = 0; i < paraFrontMapNum; i++) {
for (int j = 0; j < outMapNum; j++) {
kernel[i][j] = MathUtils.randomMatrix(kernelSize.width, kernelSize.height, true);
}
}
}
/**
* @Description: 初始化输出内核。代码被修改为调用initKernel(int)。
* @Param: [paraFrontMapNum, paraSize]
* @return: void
*/
public void initOutputKernel(int paraFrontMapNum, Size paraSize) {
kernelSize = paraSize;
initKernel(paraFrontMapNum);
}
/**
***********************
* Initialize the bias. No parameter. "int frontMapNum" is claimed however
* not used.
***********************
*/
/**
* @Description: 初始化偏差. 无参数.
* "int frontMapNum" 被声明但并未被使用
* @Param: []
* @return: void
*/
public void initBias() {
bias = MathUtils.randomArray(outMapNum);
}
/**
* @Description: 初始化错误
* @Param: [paraBatchSize]
* @return: void
*/
public void initErrors(int paraBatchSize) {
errors = new double[paraBatchSize][outMapNum][mapSize.width][mapSize.height];
}
/**
* @Description: 初始化输出映射
* @Param: [paraBatchSize]
* @return: void
*/
public void initOutMaps(int paraBatchSize) {
outMaps = new double[paraBatchSize][outMapNum][mapSize.width][mapSize.height];
}
/**
* @Description: 准备新一轮(a new batch)
* @Param: []
* @return: void
*/
public static void prepareForNewBatch() {
recordInBatch = 0;
}
/**
* @Description: 准备创建新纪录
* @Param: []
* @return: void
*/
public static void prepareForNewRecord() {
recordInBatch++;
}
/**
* @Description: 设置输出映射的值
* @Param: [paraMapNo, paraX, paraY, paraValue]
* @return: void
*/
public void setMapValue(int paraMapNo, int paraX, int paraY, double paraValue) {
outMaps[recordInBatch][paraMapNo][paraX][paraY] = paraValue;
}
/**
* @Description: 设置整个映射的值
* @Param: [paraMapNo, paraOutMatrix]
* @return: void
*/
public void setMapValue(int paraMapNo, double[][] paraOutMatrix) {
outMaps[recordInBatch][paraMapNo] = paraOutMatrix;
}
/**
* @Description: 获得映射尺寸
* @Param: []
* @return: MachineLearning.cnn.Size
*/
public Size getMapSize() {
return mapSize;
}
/**
* @Description: 设置映射尺寸
* @Param: [paraMapSize]
* @return: void
*/
public void setMapSize(Size paraMapSize) {
mapSize = paraMapSize;
}
/**
* @Description: 获得网络类型
* @Param: []
* @return: MachineLearning.cnn.LayerTypeEnum
*/
public LayerTypeEnum getType() {
return type;
}
/**
* @Description: 获得输出映射的数量
* @Param: []
* @return: int
*/
public int getOutMapNum() {
return outMapNum;
}
/**
* @Description: 设置输出映射的数量
* @Param: [paraOutMapNum]
* @return: void
*/
public void setOutMapNum(int paraOutMapNum) {
outMapNum = paraOutMapNum;
}
/**
* @Description: 获得卷积核的尺寸
* @Param: []
* @return: MachineLearning.cnn.Size
*/
public Size getKernelSize() {
return kernelSize;
}
/**
* @Description: 获得放缩尺寸
* @Param: []
* @return: MachineLearning.cnn.Size
*/
public Size getScaleSize() {
return scaleSize;
}
/**
* @Description: 获得映射
* @Param: [paraIndex]
* @return: double[][]
*/
public double[][] getMap(int paraIndex) {
return outMaps[recordInBatch][paraIndex];
}
/**
* @Description: 获得卷积核
* @Param: [paraFrontMap, paraOutMap]
* @return: double[][]
*/
public double[][] getKernel(int paraFrontMap, int paraOutMap) {
return kernel[paraFrontMap][paraOutMap];
}
/**
* @Description: 设置单个错误
* @Param: [paraMapNo, paraMapX, paraMapY, paraValue]
* @return: void
*/
public void setError(int paraMapNo, int paraMapX, int paraMapY, double paraValue) {
errors[recordInBatch][paraMapNo][paraMapX][paraMapY] = paraValue;
}
/**
* @Description: 设置单个错误矩阵
* @Param: [paraMapNo, paraMatrix]
* @return: void
*/
public void setError(int paraMapNo, double[][] paraMatrix) {
errors[recordInBatch][paraMapNo] = paraMatrix;
}
/**
* @Description: 获得单个错误矩阵
* @Param: [paraMapNo]
* @return: double[][]
*/
public double[][] getError(int paraMapNo) {
return errors[recordInBatch][paraMapNo];
}
/**
* @Description: 获得整个错误张量
* @Param: []
* @return: double[][][][]
*/
public double[][][][] getErrors() {
return errors;
}
/**
* @Description: 设置一个卷积核
* @Param: [paraLastMapNo, paraMapNo, paraKernel]
* @return: void
*/
public void setKernel(int paraLastMapNo, int paraMapNo, double[][] paraKernel) {
kernel[paraLastMapNo][paraMapNo] = paraKernel;
}
/**
* @Description: 获得偏差
* @Param: [paraMapNo]
* @return: double
*/
public double getBias(int paraMapNo) {
return bias[paraMapNo];
}
/**
* @Description: 设置偏差
* @Param: [paraMapNo, paraValue]
* @return: void
*/
public void setBias(int paraMapNo, double paraValue) {
bias[paraMapNo] = paraValue;
}// Of setBias
/**
* @Description: 获得输出映射
* @Param: []
* @return: double[][][][]
*/
public double[][][][] getMaps() {
return outMaps;
}
/**
* @Description: 获得错误
* @Param: [paraRecordId, paraMapNo]
* @return: double[][]
*/
public double[][] getError(int paraRecordId, int paraMapNo) {
return errors[paraRecordId][paraMapNo];
}
/**
* @Description: 获得映射
* @Param: [paraRecordId, paraMapNo]
* @return: double[][]
*/
public double[][] getMap(int paraRecordId, int paraMapNo) {
return outMaps[paraRecordId][paraMapNo];
}
/**
* @Description: 获得类数量
* @Param: []
* @return: int
*/
public int getClassNum() {
return classNum;
}
/**
* @Description: 获得整个卷积核的张量
* @Param: []
* @return: double[][][][]
*/
public double[][][][] getKernel() {
return kernel;
}
}
多层管理:
package MachineLearning.cnn;
import java.util.ArrayList;
import java.util.List;
/**
* @description:多层管理
* @learner: Qing Zhang
* @time: 08
*/
public class LayerBuilder {
//网络层
private List<CnnLayer> layers;
/**
* @Description: 第一个构造函数
* @Param: []
* @return:
*/
public LayerBuilder() {
layers = new ArrayList<CnnLayer>();
}
/**
* @Description: 第二个构造函数
* @Param: [paraLayer]
* @return:
*/
public LayerBuilder(CnnLayer paraLayer) {
this();
layers.add(paraLayer);
}
/**
* @Description: 添加新网络层
* @Param: [paraLayer]
* @return: void
*/
public void addLayer(CnnLayer paraLayer) {
layers.add(paraLayer);
}
/**
* @Description: 获得具体的层
* @Param: [paraIndex]
* @return: MachineLearning.cnn.CnnLayer
*/
public CnnLayer getLayer(int paraIndex) throws RuntimeException{
if (paraIndex >= layers.size()) {
throw new RuntimeException("CnnLayer " + paraIndex + " is out of range: "
+ layers.size() + ".");
}
return layers.get(paraIndex);
}
/**
* @Description: 获得输出层
* @Param: []
* @return: MachineLearning.cnn.CnnLayer
*/
public CnnLayer getOutputLayer() {
return layers.get(layers.size() - 1);
}
/**
* @Description: 获得网络层的数量
* @Param: []
* @return: int
*/
public int getNumLayers() {
return layers.size();
}
}
第85天:网络构建 (1. 代码抄写)
- initOperators 又初始化若干的算子. 注意到它们与已经初始化的成员变量有关. 这种灵活的方式 (interface) 还是值得学习的.
- ALPHA 和 LAMBDA 是超参数, 可以自己设置.
- setup 进行整个网络的初始化.
- forward 和 backPropagation 与 ANN 同理, 但运算不同了.
- 一批数据进行 forward 和 backPropagation 后, 才进行一次 updateParameters.
package MachineLearning.cnn;
import java.util.Arrays;
import MachineLearning.cnn.Dataset.Instance;
import MachineLearning.cnn.MathUtils.Operator;
/**
* @description:Cnn
* @learner: Qing Zhang
* @time: 08
*/
public class FullCnn {
//变化值
private static double ALPHA = 0.85;
//A constant.
public static double LAMBDA = 0;
//管理层
private static LayerBuilder layerBuilder;
//一次测试使用的样本数量
private int batchSize;
//通过给定值划分 batchSize
private Operator divideBatchSize;
//用给定值乘以alpha
private Operator multiplyAlpha;
//用给定值乘以alpha和lambda
private Operator multiplyLambda;
/**
* @Description: 第一个构造函数
* @Param: [paraLayerBuilder, paraBatchSize]
* @return:
*/
public FullCnn(LayerBuilder paraLayerBuilder, int paraBatchSize) {
layerBuilder = paraLayerBuilder;
batchSize = paraBatchSize;
setup();
initOperators();
}
/**
* @Description: 使用临时类初始化操作符。
* @Param: []
* @return: void
*/
private void initOperators() {
divideBatchSize = new Operator() {
private static final long serialVersionUID = 7424011281732651055L;
@Override
public double process(double value) {
return value / batchSize;
}// Of process
};
multiplyAlpha = new Operator() {
private static final long serialVersionUID = 5761368499808006552L;
@Override
public double process(double value) {
return value * ALPHA;
}// Of process
};
multiplyLambda = new Operator() {
private static final long serialVersionUID = 4499087728362870577L;
@Override
public double process(double value) {
return value * (1 - LAMBDA * ALPHA);
}// Of process
};
}
/**
* @Description: 通过层构建器设置
* 网络初始化
* @Param: []
* @return: void
*/
public void setup() {
CnnLayer tempInputLayer = layerBuilder.getLayer(0);
tempInputLayer.initOutMaps(batchSize);
for (int i = 1; i < layerBuilder.getNumLayers(); i++) {
CnnLayer tempLayer = layerBuilder.getLayer(i);
CnnLayer tempFrontLayer = layerBuilder.getLayer(i - 1);
int tempFrontMapNum = tempFrontLayer.getOutMapNum();
switch (tempLayer.getType()) {
case INPUT:
// Should not be input. Maybe an error should be thrown out.
//不应该被输入。也许应该抛出一个错误。
break;
case CONVOLUTION:
tempLayer.setMapSize(
tempFrontLayer.getMapSize().subtract(tempLayer.getKernelSize(), 1));
tempLayer.initKernel(tempFrontMapNum);
tempLayer.initBias();
tempLayer.initErrors(batchSize);
tempLayer.initOutMaps(batchSize);
break;
case SAMPLING:
tempLayer.setOutMapNum(tempFrontMapNum);
tempLayer.setMapSize(tempFrontLayer.getMapSize().divide(tempLayer.getScaleSize()));
tempLayer.initErrors(batchSize);
tempLayer.initOutMaps(batchSize);
break;
case OUTPUT:
tempLayer.initOutputKernel(tempFrontMapNum, tempFrontLayer.getMapSize());
tempLayer.initBias();
tempLayer.initErrors(batchSize);
tempLayer.initOutMaps(batchSize);
break;
}
}
}
/**
* @Description: 前向计算
* @Param: [instance]
* @return: void
*/
private void forward(Instance instance) {
setInputLayerOutput(instance);
for (int l = 1; l < layerBuilder.getNumLayers(); l++) {
CnnLayer tempCurrentLayer = layerBuilder.getLayer(l);
CnnLayer tempLastLayer = layerBuilder.getLayer(l - 1);
switch (tempCurrentLayer.getType()) {
case CONVOLUTION:
setConvolutionOutput(tempCurrentLayer, tempLastLayer);
break;
case SAMPLING:
setSampOutput(tempCurrentLayer, tempLastLayer);
break;
case OUTPUT:
setConvolutionOutput(tempCurrentLayer, tempLastLayer);
break;
default:
break;
}
}
}
/**
* @Description: 设置在层输出。给定一条记录,将其值复制到输入映射中
* @Param: [paraRecord]
* @return: void
*/
private void setInputLayerOutput(Instance paraRecord) {
CnnLayer tempInputLayer = layerBuilder.getLayer(0);
Size tempMapSize = tempInputLayer.getMapSize();
double[] tempAttributes = paraRecord.getAttributes();
if (tempAttributes.length != tempMapSize.width * tempMapSize.height)
throw new RuntimeException("input record does not match the map size.");
for (int i = 0; i < tempMapSize.width; i++) {
for (int j = 0; j < tempMapSize.height; j++) {
tempInputLayer.setMapValue(0, i, j, tempAttributes[tempMapSize.height * i + j]);
}
}
}
/**
* @Description: 根据上一层的输出计算卷积输出。
* @Param: [paraLayer, paraLastLayer]
* @return: void
*/
private void setConvolutionOutput(final CnnLayer paraLayer, final CnnLayer paraLastLayer) {
// int mapNum = paraLayer.getOutMapNum();
final int lastMapNum = paraLastLayer.getOutMapNum();
// 注意: paraLayer.getOutMapNum() 也许并不正确
for (int j = 0; j < paraLayer.getOutMapNum(); j++) {
double[][] tempSumMatrix = null;
for (int i = 0; i < lastMapNum; i++) {
double[][] lastMap = paraLastLayer.getMap(i);
double[][] kernel = paraLayer.getKernel(i, j);
if (tempSumMatrix == null) {
// On the first map.
tempSumMatrix = MathUtils.convnValid(lastMap, kernel);
} else {
// Sum up convolution maps
tempSumMatrix = MathUtils.matrixOp(MathUtils.convnValid(lastMap, kernel),
tempSumMatrix, null, null, MathUtils.plus);
}
}
// 激活
final double bias = paraLayer.getBias(j);
tempSumMatrix = MathUtils.matrixOp(tempSumMatrix, new Operator() {
private static final long serialVersionUID = 2469461972825890810L;
@Override
public double process(double value) {
return MathUtils.sigmod(value + bias);
}
});
paraLayer.setMapValue(j, tempSumMatrix);
}
}
/**
* @Description: 根据上一层的输出计算卷积输出。
* @Param: [paraLayer, paraLastLayer]
* @return: void
*/
private void setSampOutput(final CnnLayer paraLayer, final CnnLayer paraLastLayer) {
// int tempLastMapNum = paraLastLayer.getOutMapNum();
// Attention: paraLayer.outMapNum may not be right.
for (int i = 0; i < paraLayer.outMapNum; i++) {
double[][] lastMap = paraLastLayer.getMap(i);
Size scaleSize = paraLayer.getScaleSize();
double[][] sampMatrix = MathUtils.scaleMatrix(lastMap, scaleSize);
paraLayer.setMapValue(i, sampMatrix);
}
}
/**
* @Description: 训练cnn
* @Param: [paraDataset, paraRounds]
* @return: void
*/
public void train(Dataset paraDataset, int paraRounds) {
for (int t = 0; t < paraRounds; t++) {
System.out.println("Iteration: " + t);
int tempNumEpochs = paraDataset.size() / batchSize;
if (paraDataset.size() % batchSize != 0)
tempNumEpochs++;
// logger.info("第{}次迭代,epochsNum: {}", t, epochsNum);
double tempNumCorrect = 0;
int tempCount = 0;
for (int i = 0; i < tempNumEpochs; i++) {
int[] tempRandomPerm = MathUtils.randomPerm(paraDataset.size(), batchSize);
CnnLayer.prepareForNewBatch();
for (int index : tempRandomPerm) {
boolean isRight = train(paraDataset.getInstance(index));
if (isRight)
tempNumCorrect++;
tempCount++;
CnnLayer.prepareForNewRecord();
}
updateParameters();
if (i % 50 == 0) {
System.out.print("..");
if (i + 50 > tempNumEpochs)
System.out.println();
}
}
double p = 1.0 * tempNumCorrect / tempCount;
if (t % 10 == 1 && p > 0.96) {
ALPHA = 0.001 + ALPHA * 0.9;
// logger.info("设置 alpha = {}", ALPHA);
}
System.out.println("Training precision: " + p);
// logger.info("计算精度: {}/{}={}.", right, count, p);
}
}
/**
* @Description: 用一条记录训练cnn
* @Param: [paraRecord]
* @return: boolean
*/
private boolean train(Instance paraRecord) {
forward(paraRecord);
boolean result = backPropagation(paraRecord);
return result;
}
/**
* @Description: 反向传播
* @Param: [paraRecord]
* @return: boolean
*/
private boolean backPropagation(Instance paraRecord) {
boolean result = setOutputLayerErrors(paraRecord);
setHiddenLayerErrors();
return result;
}
/**
* @Description: 更新参数
* @Param: []
* @return: void
*/
private void updateParameters() {
for (int l = 1; l < layerBuilder.getNumLayers(); l++) {
CnnLayer layer = layerBuilder.getLayer(l);
CnnLayer lastLayer = layerBuilder.getLayer(l - 1);
switch (layer.getType()) {
case CONVOLUTION:
case OUTPUT:
updateKernels(layer, lastLayer);
updateBias(layer, lastLayer);
break;
default:
break;
}
}
}
/**
* @Description: 更新偏差
* @Param: [paraLayer, paraLastLayer]
* @return: void
*/
private void updateBias(final CnnLayer paraLayer, CnnLayer paraLastLayer) {
final double[][][][] errors = paraLayer.getErrors();
// int mapNum = paraLayer.getOutMapNum();
// Attention: getOutMapNum() may not be correct.
for (int j = 0; j < paraLayer.getOutMapNum(); j++) {
double[][] error = MathUtils.sum(errors, j);
double deltaBias = MathUtils.sum(error) / batchSize;
double bias = paraLayer.getBias(j) + ALPHA * deltaBias;
paraLayer.setBias(j, bias);
}
}
/**
* @Description: 更新卷积核
* @Param: [paraLayer, paraLastLayer]
* @return: void
*/
private void updateKernels(final CnnLayer paraLayer, final CnnLayer paraLastLayer) {
// int mapNum = paraLayer.getOutMapNum();
int tempLastMapNum = paraLastLayer.getOutMapNum();
// Attention: getOutMapNum() may not be right
for (int j = 0; j < paraLayer.getOutMapNum(); j++) {
for (int i = 0; i < tempLastMapNum; i++) {
double[][] tempDeltaKernel = null;
for (int r = 0; r < batchSize; r++) {
double[][] error = paraLayer.getError(r, j);
if (tempDeltaKernel == null)
tempDeltaKernel = MathUtils.convnValid(paraLastLayer.getMap(r, i), error);
else {
tempDeltaKernel = MathUtils.matrixOp(
MathUtils.convnValid(paraLastLayer.getMap(r, i), error),
tempDeltaKernel, null, null, MathUtils.plus);
}
}
tempDeltaKernel = MathUtils.matrixOp(tempDeltaKernel, divideBatchSize);
if (!rangeCheck(tempDeltaKernel, -10, 10)) {
System.exit(0);
}
double[][] kernel = paraLayer.getKernel(i, j);
tempDeltaKernel = MathUtils.matrixOp(kernel, tempDeltaKernel, multiplyLambda,
multiplyAlpha, MathUtils.plus);
paraLayer.setKernel(i, j, tempDeltaKernel);
}
}
}
/**
* @Description: 设置所有隐藏层的错误。
* @Param: []
* @return: void
*/
private void setHiddenLayerErrors() {
// System.out.println("setHiddenLayerErrors");
for (int l = layerBuilder.getNumLayers() - 2; l > 0; l--) {
CnnLayer layer = layerBuilder.getLayer(l);
CnnLayer nextLayer = layerBuilder.getLayer(l + 1);
// System.out.println("layertype = " + layer.getType());
switch (layer.getType()) {
case SAMPLING:
setSamplingErrors(layer, nextLayer);
break;
case CONVOLUTION:
setConvolutionErrors(layer, nextLayer);
break;
default:
break;
}
}
}
/**
* @Description: 设置采样层的错误。
* @Param: [paraLayer, paraNextLayer]
* @return: void
*/
private void setSamplingErrors(final CnnLayer paraLayer, final CnnLayer paraNextLayer) {
// int mapNum = layer.getOutMapNum();
int tempNextMapNum = paraNextLayer.getOutMapNum();
// Attention: getOutMapNum() may not be correct
for (int i = 0; i < paraLayer.getOutMapNum(); i++) {
double[][] sum = null;
for (int j = 0; j < tempNextMapNum; j++) {
double[][] nextError = paraNextLayer.getError(j);
double[][] kernel = paraNextLayer.getKernel(i, j);
if (sum == null) {
sum = MathUtils.convnFull(nextError, MathUtils.rot180(kernel));
} else {
sum = MathUtils.matrixOp(
MathUtils.convnFull(nextError, MathUtils.rot180(kernel)), sum, null,
null, MathUtils.plus);
}
}
paraLayer.setError(i, sum);
if (!rangeCheck(sum, -2, 2)) {
System.out.println(
"setSampErrors, error out of range.\r\n" + Arrays.deepToString(sum));
}
}
}
/**
* @Description: 设置采样层的错误。
* @Param: [paraLayer, paraNextLayer]
* @return: void
*/
private void setConvolutionErrors(final CnnLayer paraLayer, final CnnLayer paraNextLayer) {
// System.out.println("setConvErrors");
for (int m = 0; m < paraLayer.getOutMapNum(); m++) {
Size tempScale = paraNextLayer.getScaleSize();
double[][] tempNextLayerErrors = paraNextLayer.getError(m);
double[][] tempMap = paraLayer.getMap(m);
double[][] tempOutMatrix = MathUtils.matrixOp(tempMap, MathUtils.cloneMatrix(tempMap),
null, MathUtils.one_value, MathUtils.multiply);
tempOutMatrix = MathUtils.matrixOp(tempOutMatrix,
MathUtils.kronecker(tempNextLayerErrors, tempScale), null, null,
MathUtils.multiply);
paraLayer.setError(m, tempOutMatrix);
// System.out.println("range check nextError");
if (!rangeCheck(tempNextLayerErrors, -10, 10)) {
System.out.println("setConvErrors, nextError out of range:\r\n"
+ Arrays.deepToString(tempNextLayerErrors));
System.out.println("the new errors are:\r\n" + Arrays.deepToString(tempOutMatrix));
System.exit(0);
}
if (!rangeCheck(tempOutMatrix, -10, 10)) {
System.out.println("setConvErrors, error out of range.");
System.exit(0);
}
}
}
/**
* @Description: 设置采样层的错误。
* @Param: [paraRecord]
* @return: boolean
*/
private boolean setOutputLayerErrors(Instance paraRecord) {
CnnLayer tempOutputLayer = layerBuilder.getOutputLayer();
int tempMapNum = tempOutputLayer.getOutMapNum();
double[] tempTarget = new double[tempMapNum];
double[] tempOutMaps = new double[tempMapNum];
for (int m = 0; m < tempMapNum; m++) {
double[][] outmap = tempOutputLayer.getMap(m);
tempOutMaps[m] = outmap[0][0];
}
int tempLabel = paraRecord.getLabel().intValue();
tempTarget[tempLabel] = 1;
// Log.i(record.getLable() + "outmaps:" +
// Util.fomart(outmaps)
// + Arrays.toString(target));
for (int m = 0; m < tempMapNum; m++) {
tempOutputLayer.setError(m, 0, 0,
tempOutMaps[m] * (1 - tempOutMaps[m]) * (tempTarget[m] - tempOutMaps[m]));
}
return tempLabel == MathUtils.getMaxIndex(tempOutMaps);
}
/**
* @Description: 设置网络
* @Param: [paraBatchSize]
* @return: void
*/
public void setup(int paraBatchSize) {
CnnLayer tempInputLayer = layerBuilder.getLayer(0);
tempInputLayer.initOutMaps(paraBatchSize);
for (int i = 1; i < layerBuilder.getNumLayers(); i++) {
CnnLayer tempLayer = layerBuilder.getLayer(i);
CnnLayer tempLastLayer = layerBuilder.getLayer(i - 1);
int tempLastMapNum = tempLastLayer.getOutMapNum();
switch (tempLayer.getType()) {
case INPUT:
break;
case CONVOLUTION:
tempLayer.setMapSize(
tempLastLayer.getMapSize().subtract(tempLayer.getKernelSize(), 1));
tempLayer.initKernel(tempLastMapNum);
tempLayer.initBias();
tempLayer.initErrors(paraBatchSize);
tempLayer.initOutMaps(paraBatchSize);
break;
case SAMPLING:
tempLayer.setOutMapNum(tempLastMapNum);
tempLayer.setMapSize(tempLastLayer.getMapSize().divide(tempLayer.getScaleSize()));
tempLayer.initErrors(paraBatchSize);
tempLayer.initOutMaps(paraBatchSize);
break;
case OUTPUT:
tempLayer.initOutputKernel(tempLastMapNum, tempLastLayer.getMapSize());
tempLayer.initBias();
tempLayer.initErrors(paraBatchSize);
tempLayer.initOutMaps(paraBatchSize);
break;
}
}
}
/**
* @Description: 预测数据集
* @Param: [paraDataset]
* @return: int[]
*/
public int[] predict(Dataset paraDataset) {
System.out.println("Predicting ... ");
CnnLayer.prepareForNewBatch();
int[] resultPredictions = new int[paraDataset.size()];
double tempCorrect = 0.0;
Instance tempRecord;
for (int i = 0; i < paraDataset.size(); i++) {
tempRecord = paraDataset.getInstance(i);
forward(tempRecord);
CnnLayer outputLayer = layerBuilder.getOutputLayer();
int tempMapNum = outputLayer.getOutMapNum();
double[] tempOut = new double[tempMapNum];
for (int m = 0; m < tempMapNum; m++) {
double[][] outmap = outputLayer.getMap(m);
tempOut[m] = outmap[0][0];
}
resultPredictions[i] = MathUtils.getMaxIndex(tempOut);
if (resultPredictions[i] == tempRecord.getLabel().intValue()) {
tempCorrect++;
}
}
System.out.println("Accuracy: " + tempCorrect / paraDataset.size());
return resultPredictions;
}
/**
* @Description: 范围检查,仅用于调试。
* @Param: [paraMatrix, paraLowerBound, paraUpperBound]
* @return: boolean
*/
public boolean rangeCheck(double[][] paraMatrix, double paraLowerBound, double paraUpperBound) {
for (int i = 0; i < paraMatrix.length; i++) {
for (int j = 0; j < paraMatrix[0].length; j++) {
if ((paraMatrix[i][j] < paraLowerBound) || (paraMatrix[i][j] > paraUpperBound)) {
System.out.println("" + paraMatrix[i][j] + " out of range (" + paraLowerBound
+ ", " + paraUpperBound + ")\r\n");
return false;
}
}
}
return true;
}
public static void main(String[] args) {
LayerBuilder builder = new LayerBuilder();
// Input layer, the maps are 28*28
builder.addLayer(new CnnLayer(LayerTypeEnum.INPUT, -1, new Size(28, 28)));
// Convolution output has size 24*24, 24=28+1-5
builder.addLayer(new CnnLayer(LayerTypeEnum.CONVOLUTION, 6, new Size(5, 5)));
// Sampling output has size 12*12,12=24/2
builder.addLayer(new CnnLayer(LayerTypeEnum.SAMPLING, -1, new Size(2, 2)));
// Convolution output has size 8*8, 8=12+1-5
builder.addLayer(new CnnLayer(LayerTypeEnum.CONVOLUTION, 12, new Size(5, 5)));
// Sampling output has size4×4,4=8/2
builder.addLayer(new CnnLayer(LayerTypeEnum.SAMPLING, -1, new Size(2, 2)));
// output layer, digits 0 - 9.
builder.addLayer(new CnnLayer(LayerTypeEnum.OUTPUT, 10, null));
// Construct the full CNN.
FullCnn tempCnn = new FullCnn(builder, 10);
Dataset tempTrainingSet = new Dataset("F:\\研究生\\研0\\学习\\Java_Study\\data_set\\train.format", ",", 784);
// Train the model.
tempCnn.train(tempTrainingSet, 10);
// tempCnn.predict(tempTrainingSet);
}
}
第86天:网络构建 (2. 代码理解)
CNN 的结构与 ANN 其实是一样的.
- 根据 main 里面的设置, 各层节点数依次是 1, 6, 6, 12, 12, 10. 这里的每个节点存储一个矩阵.
- 这里的 6 和 12 为超参数, 可以自己设置.
- 卷积层中, 每条边对应一个卷积核. 池化层不需要参数.
- 第一层只有 1 个节点, 是因为它为 binary 的. 如果是彩色图 (RGB), 通道就有三层, 则第一层应该有 3 个节点.
- 最后一层有 10 个节点, 因为 0-9 共 10 种可能的数字. 它们均为 1*1 的矩阵, 分类时通过比较哪个值最大, 确定是哪一类, 这和 ANN 也是一致的.
主要理解里面的一些术语的概念:
卷积:
池化:
第87天:实验
- 可以看出, 虽然网络不大, 但是效果不错. 用测试集效果也相当.
- 自己另外找一个数据集, 构建网络, 进行实验并给出结果.
这里后面有待补充
第88天:总结 (01 - 10 天: 基本语法)
其实就是对 java 语言的一些特性以及相应的程序设计进行了一定的梳理,通过这些知识点可以方便对后面进阶的操作进行理解。
第89天:总结 (11 - 50 天: 数据结构)
数据结构是一个程序的组成之一,程序=数据结构+算法,因此了解并熟练应用数据结构是一个程序猿必备的技能,这关系到程序的性能问题。
第90天:总结 (51 - 87 天: 机器学习)
这一阶段主要了解了相应的机器学习算法用程序代码去实现的过程,这一过程有助于理解算法的思想原理,而不是仅仅作为一个调包侠,要知其所以然。但是经过了解发现机器学习的相应算法了解起来还是比较轻松的,但是到了深度学习这一块,对神经网络的理解感觉不是很透彻,特别是反向传播那一块,不是特别理解误差函数的具体作用,只是明白它应该是为了不断的循环找到最优解。