Java学习之日撸代码300行(81-90天,CNN 卷积神经网络)

原博文:minfanphd

第81天:数据集读取与存储

  1. 使用了ArrayList,该类对于数据操作比较方便,但会稍微影响效率。
  2. 对读入的数据进行处理,通过字符分割切分数据。
  3. 主要作用就是数据集的读取和存储,主要完成的编码就是对数据文件的处理,格式化,存储。
package MachineLearning.cnn;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
 * @description:数据集
 * @learner: Qing Zhang
 * @time: 08
 */
public class Dataset {

    //所有的样本由list存储
    private List<Instance> instances;


    //标签下标
    private int labelIndex;

    //最大标签(标签从0开始)
    private double maxLabel = -1;

    /**
    * @Description: 构造函数
    * @Param: []
    * @return:
    */
    public Dataset() {
        labelIndex = -1;
        instances = new ArrayList<Instance>();
    }

    
    /**
    * @Description: 第二个构造函数
    * @Param: [paraFilename:文件名, paraSplitSign:分割符,常为',', paraLabelIndex: 标签下标,常为最后一列]
    * @return:
    */
    public Dataset(String paraFilename, String paraSplitSign, int paraLabelIndex) {
        instances = new ArrayList<Instance>();
        labelIndex = paraLabelIndex;

        File tempFile = new File(paraFilename);
        try {
            BufferedReader tempReader = new BufferedReader(new FileReader(tempFile));
            String tempLine;
            while ((tempLine = tempReader.readLine()) != null) {
                String[] tempDatum = tempLine.split(paraSplitSign);
                if (tempDatum.length == 0) {
                    continue;
                }

                double[] tempData = new double[tempDatum.length];
                for (int i = 0; i < tempDatum.length; i++)
                    tempData[i] = Double.parseDouble(tempDatum[i]);
                Instance tempInstance = new Instance(tempData);
                append(tempInstance);
            } 
            tempReader.close();
        } catch (IOException e) {
            e.printStackTrace();
            System.out.println("Unable to load " + paraFilename);
            System.exit(0);
        }
    }

    /** 
    * @Description: 添加一个样本
    * @Param: [paraInstance]
    * @return: void
    */
    public void append(Instance paraInstance) {
        instances.add(paraInstance);
    }


    /**
    * @Description: 添加一个由double类型标签组成的样本
    * @Param: [paraAttributes, paraLabel]
    * @return: void
    */
    public void append(double[] paraAttributes, Double paraLabel) {
        instances.add(new Instance(paraAttributes, paraLabel));
    }

    /**
    * @Description: 通过下标获取样本
    * @Param: [paraIndex]
    * @return: MachineLearning.cnn.Dataset.Instance
    */
    public Instance getInstance(int paraIndex) {
        return instances.get(paraIndex);
    }

    /**
    * @Description: 获取样本数量
    * @Param: []
    * @return: int
    */
    public int size() {
        return instances.size();
    }

    /**
    * @Description: 获取属性数量
    * @Param: [paraIndex]
    * @return: double[]
    */
    public double[] getAttributes(int paraIndex) {
        return instances.get(paraIndex).getAttributes();
    }

    /**
    * @Description: 获取样本标签
    * @Param: [paraIndex]
    * @return: java.lang.Double
    */
    public Double getLabel(int paraIndex) {
        return instances.get(paraIndex).getLabel();
    }


    public static void main(String args[]) {
        Dataset tempData = new Dataset("F:\\研究生\\研0\\学习\\Java_Study\\data_set\\train.format", ",", 784);
        Instance tempInstance = tempData.getInstance(0);
        System.out.println("The first instance is: " + tempInstance);
    }

    /**
     ***********************
     * 一个样本类
     ***********************
     */
    public class Instance {


        //条件属性
        private double[] attributes;

        //标签
        private Double label;

        /**
        * @Description: 第一个构造函数
        * @Param: [paraAttrs, paraLabel]
        * @return:
        */
        private Instance(double[] paraAttrs, Double paraLabel) {
            attributes = paraAttrs;
            label = paraLabel;
        }

        /**
        * @Description: 第二个构造函数
        * @Param: [paraData]
        * @return:
        */
        public Instance(double[] paraData) {
            if (labelIndex == -1)
                // 无标签
                attributes = paraData;
            else {
                label = paraData[labelIndex];
                if (label > maxLabel) {
                    // 它是一个新标签
                    maxLabel = label;
                }

                if (labelIndex == 0) {
                    // 第一列为标签
                    attributes = Arrays.copyOfRange(paraData, 1, paraData.length);
                } else {
                    // 最后一列为标签
                    attributes = Arrays.copyOfRange(paraData, 0, paraData.length - 1);
                }
            }
        }

        /**
        * @Description: 获取属性数量
        * @Param: []
        * @return: double[]
        */
        public double[] getAttributes() {
            return attributes;
        }

        /**
        * @Description: 获取标签
        * @Param: []
        * @return: java.lang.Double
        */
        public Double getLabel() {
            if (labelIndex == -1)
                return null;
            return label;
        }


        public String toString(){
            return Arrays.toString(attributes) + ", " + label;
        }
    }
}

在这里插入图片描述

第82天:数学操作

package MachineLearning.cnn;

import java.io.Serializable;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;

/**
 * @description: 数学操作
 * @learner: Qing Zhang
 * @time: 08
 */
public class MathUtils {

    /**
    * @Description: 一个为不同需求操作提供的接口
    * @Param:
    * @return:
    */
    public interface Operator extends Serializable {
        public double process(double value);
    }


    /**
    * @Description: 1-value的运算
    * @Param:
    * @return:
    */
    public static final Operator one_value = new Operator() {
        private static final long serialVersionUID = 3752139491940330714L;

        @Override
        public double process(double value) {
            return 1 - value;
        }
    };

    /**
    * @Description: sigmoid 运算
    * @Param:
    * @return:
    */
    public static final Operator sigmoid = new Operator() {
        private static final long serialVersionUID = -1952718905019847589L;

        @Override
        public double process(double value) {
            return 1 / (1 + Math.pow(Math.E, -value));
        }
    };

    /**
    * @Description: 用于两个值运算的接口
    * @Param:
    * @return:
    */
    interface OperatorOnTwo extends Serializable {
        public double process(double a, double b);
    }

    /**
    * @Description: +
    * @Param:
    * @return:
    */
    public static final OperatorOnTwo plus = new OperatorOnTwo() {
        private static final long serialVersionUID = -6298144029766839945L;

        @Override
        public double process(double a, double b) {
            return a + b;
        }
    };

    /**
    * @Description: *
    * @Param:
    * @return:
    */
    public static OperatorOnTwo multiply = new OperatorOnTwo() {

        private static final long serialVersionUID = -7053767821858820698L;

        @Override
        public double process(double a, double b) {
            return a * b;
        }
    };

    /**
    * @Description: -
    * @Param:
    * @return:
    */
    public static OperatorOnTwo minus = new OperatorOnTwo() {

        private static final long serialVersionUID = 7346065545555093912L;

        @Override
        public double process(double a, double b) {
            return a - b;
        }
    };

    /**
    * @Description: 输出一个矩阵
    * @Param: [matrix]
    * @return: void
    */
    public static void printMatrix(double[][] matrix) {
        for (int i = 0; i < matrix.length; i++) {
            String line = Arrays.toString(matrix[i]);
            line = line.replaceAll(", ", "\t");
            System.out.println(line);
        }
        System.out.println();
    }

    /**
    * @Description: 旋转矩阵180degree
    * @Param: [matrix]
    * @return: double[][]
    */
    public static double[][] rot180(double[][] matrix) {
        matrix = cloneMatrix(matrix);
        int m = matrix.length;
        int n = matrix[0].length;
        //先将前1/2列与后1/2列对称交换顺序
        for (int i = 0; i < m; i++) {
            for (int j = 0; j < n / 2; j++) {
                double tmp = matrix[i][j];
                matrix[i][j] = matrix[i][n - 1 - j];
                matrix[i][n - 1 - j] = tmp;
            }
        }
        //再将前1/2行与后1/2行对称交换顺序
        for (int j = 0; j < n; j++) {
            for (int i = 0; i < m / 2; i++) {
                double tmp = matrix[i][j];
                matrix[i][j] = matrix[m - 1 - i][j];
                matrix[m - 1 - i][j] = tmp;
            }
        }
        return matrix;
    }


    private static Random myRandom = new Random(2);


    /**
    * @Description: 通过给定的尺寸产生一个随机矩阵
     * 矩阵中每个值的区间为 [-0.005, 0.095].
    * @Param: [x, y, b]
    * @return: double[][]
    */
    public static double[][] randomMatrix(int x, int y, boolean b) {
        double[][] matrix = new double[x][y];

        for (int i = 0; i < x; i++) {
            for (int j = 0; j < y; j++) {
                matrix[i][j] = (myRandom.nextDouble() - 0.05) / 10;
            }
        }
        return matrix;
    }

    
    /** 
    * @Description: 通过给定的长度产生一个随机的数组
     * 数组中每个值的取值范围为 [-0.005, 0.095].
    * @Param: [len]
    * @return: double[]
    */
    public static double[] randomArray(int len) {
        double[] data = new double[len];
        for (int i = 0; i < len; i++) {
            //data[i] = myRandom.nextDouble() / 10 - 0.05;
            data[i] = 0;
        }
        return data;
    }

    
    /** 
    * @Description: 根据batchSize产生一个随机的perm
    * @Param: [size, batchSize]
    * @return: int[]
    */
    public static int[] randomPerm(int size, int batchSize) {
        Set<Integer> set = new HashSet<Integer>();
        while (set.size() < batchSize) {
            set.add(myRandom.nextInt(size));
        }
        int[] randPerm = new int[batchSize];
        int i = 0;
        for (Integer value : set)
            randPerm[i++] = value;
        return randPerm;
    }

    
    /** 
    * @Description: 克隆一个矩阵。目的是避免直接使用其引用
    * @Param: [matrix]
    * @return: double[][]
    */
    public static double[][] cloneMatrix(final double[][] matrix) {
        final int m = matrix.length;
        int n = matrix[0].length;
        final double[][] outMatrix = new double[m][n];

        for (int i = 0; i < m; i++) {
            for (int j = 0; j < n; j++) {
                outMatrix[i][j] = matrix[i][j];
            }
        } 
        return outMatrix;
    }

    
    /** 
    * @Description: 用给定运算符对单个操作数进行矩阵运算。
    * @Param: [ma, operator]
    * @return: double[][]
    */
    public static double[][] matrixOp(final double[][] ma, Operator operator) {
        final int m = ma.length;
        int n = ma[0].length;
        for (int i = 0; i < m; i++) {
            for (int j = 0; j < n; j++) {
                ma[i][j] = operator.process(ma[i][j]);
            } 
        } 
        return ma;
    }


    /** 
    * @Description: 用给定运算符对两个操作数进行矩阵运算。
    * @Param: [ma, mb, operatorA, operatorB, operator]
    * @return: double[][]
    */
    public static double[][] matrixOp(final double[][] ma, final double[][] mb,
                                      final Operator operatorA, final Operator operatorB, OperatorOnTwo operator) {
        final int m = ma.length;
        int n = ma[0].length;
        if (m != mb.length || n != mb[0].length)
            throw new RuntimeException("ma.length:" + ma.length + "  mb.length:" + mb.length);

        for (int i = 0; i < m; i++) {
            for (int j = 0; j < n; j++) {
                double a = ma[i][j];
                if (operatorA != null)
                    a = operatorA.process(a);
                double b = mb[i][j];
                if (operatorB != null)
                    b = operatorB.process(b);
                mb[i][j] = operator.process(a, b);
            }
        }
        return mb;
    }


    /**
    * @Description: 扩充矩阵,将多出来的空间用之前的数据填充
     * 池化的逆向操作
    * @Param: [matrix, scale]
    * @return: double[][]
    */
    public static double[][] kronecker(final double[][] matrix, final Size scale) {
        final int m = matrix.length;
        int n = matrix[0].length;
        final double[][] outMatrix = new double[m * scale.width][n * scale.height];

        for (int i = 0; i < m; i++) {
            for (int j = 0; j < n; j++) {
                for (int ki = i * scale.width; ki < (i + 1) * scale.width; ki++) {
                    for (int kj = j * scale.height; kj < (j + 1) * scale.height; kj++) {
                        outMatrix[ki][kj] = matrix[i][j];
                    }
                }
            }
        }
        return outMatrix;
    }

    /**
    * @Description: 放缩矩阵,均值池化
    * @Param: [matrix, scale]
    * @return: double[][]
    */
    public static double[][] scaleMatrix(final double[][] matrix, final Size scale) {
        int m = matrix.length;
        int n = matrix[0].length;
        final int sm = m / scale.width;
        final int sn = n / scale.height;
        final double[][] outMatrix = new double[sm][sn];
        if (sm * scale.width != m || sn * scale.height != n)
            throw new RuntimeException("scale matrix");
        final int size = scale.width * scale.height;
        for (int i = 0; i < sm; i++) {
            for (int j = 0; j < sn; j++) {
                double sum = 0.0;
                for (int si = i * scale.width; si < (i + 1) * scale.width; si++) {
                    for (int sj = j * scale.height; sj < (j + 1) * scale.height; sj++) {
                        sum += matrix[si][sj];
                    }
                }
                outMatrix[i][j] = sum / size;
            }
        }
        return outMatrix;
    }


    /** 
    * @Description: 反卷积以获得一个更大的尺寸。用于反向传播
    * @Param: [matrix, kernel]
    * @return: double[][]
    */
    public static double[][] convnFull(double[][] matrix, final double[][] kernel) {
        int m = matrix.length;
        int n = matrix[0].length;
        final int km = kernel.length;
        final int kn = kernel[0].length;
        final double[][] extendMatrix = new double[m + 2 * (km - 1)][n + 2 * (kn - 1)];
        for (int i = 0; i < m; i++) {
            for (int j = 0; j < n; j++) {
                extendMatrix[i + km - 1][j + kn - 1] = matrix[i][j];
            }
        }
        return convnValid(extendMatrix, kernel);
    }


    /** 
    * @Description: 卷积运算,由给定的矩阵和核,滑动和求和得到矩阵的结果。它用于前向传播。
    * @Param: [matrix, kernel]
    * @return: double[][]
    */
    public static double[][] convnValid(final double[][] matrix, double[][] kernel) {
        // kernel = rot180(kernel);
        int m = matrix.length;
        int n = matrix[0].length;
        final int km = kernel.length;
        final int kn = kernel[0].length;
        int kns = n - kn + 1;
        final int kms = m - km + 1;
        final double[][] outMatrix = new double[kms][kns];

        for (int i = 0; i < kms; i++) {
            for (int j = 0; j < kns; j++) {
                double sum = 0.0;
                for (int ki = 0; ki < km; ki++) {
                    for (int kj = 0; kj < kn; kj++)
                        sum += matrix[i + ki][j + kj] * kernel[ki][kj];
                }
                outMatrix[i][j] = sum;

            }
        }
        return outMatrix;
    }
    
    /** 
    * @Description: 在张量上卷积
    * @Param: [matrix, mapNoX, kernel, mapNoY]
    * @return: double[][]
    */
    public static double[][] convnValid(final double[][][][] matrix, int mapNoX,
                                        double[][][][] kernel, int mapNoY) {
        int m = matrix.length;
        int n = matrix[0][mapNoX].length;
        int h = matrix[0][mapNoX][0].length;
        int km = kernel.length;
        int kn = kernel[0][mapNoY].length;
        int kh = kernel[0][mapNoY][0].length;
        int kms = m - km + 1;
        int kns = n - kn + 1;
        int khs = h - kh + 1;
        if (matrix.length != kernel.length)
            throw new RuntimeException("length");
        final double[][][] outMatrix = new double[kms][kns][khs];
        for (int i = 0; i < kms; i++) {
            for (int j = 0; j < kns; j++)
                for (int k = 0; k < khs; k++) {
                    double sum = 0.0;
                    for (int ki = 0; ki < km; ki++) {
                        for (int kj = 0; kj < kn; kj++)
                            for (int kk = 0; kk < kh; kk++) {
                                sum += matrix[i + ki][mapNoX][j + kj][k + kk]
                                        * kernel[ki][mapNoY][kj][kk];
                            }
                    }
                    outMatrix[i][j][k] = sum;
                }
        }
        return outMatrix[0];
    }

    /**
    * @Description: sigmoid 运算
    * @Param: [x]
    * @return: double
    */
    public static double sigmod(double x) {
        return 1 / (1 + Math.pow(Math.E, -x));
    }


    /**
    * @Description: 矩阵所有数值求和
    * @Param: [error]
    * @return: double
    */
    public static double sum(double[][] error) {
        int m = error.length;
        int n = error[0].length;
        double sum = 0.0;
        for (int i = 0; i < m; i++) {
            for (int j = 0; j < n; j++) {
                sum += error[i][j];
            }
        }
        return sum;
    }

    /**
    * @Description: Ad hoc sum.
    * @Param: [errors, j]
    * @return: double[][]
    */
    public static double[][] sum(double[][][][] errors, int j) {
        int m = errors[0][j].length;
        int n = errors[0][j][0].length;
        double[][] result = new double[m][n];
        for (int mi = 0; mi < m; mi++) {
            for (int nj = 0; nj < n; nj++) {
                double sum = 0;
                for (int i = 0; i < errors.length; i++)
                    sum += errors[i][j][mi][nj];
                result[mi][nj] = sum;
            }
        }
        return result;
    }


    /** 
    * @Description: 得到最终分类的最大值的索引。
    * @Param: [out]
    * @return: int
    */
    public static int getMaxIndex(double[] out) {
        double max = out[0];
        int index = 0;
        for (int i = 1; i < out.length; i++)
            if (out[i] > max) {
                max = out[i];
                index = i;
            }
        return index;
    }


    /**
    * @Description: 测试rot180方法
    * @Param: []
    * @return: void
    */
    private static void rot180UnitTest(){
        double[][] matrix = {{1,2},{3,4}};
        double[][] rotMatrix = rot180(matrix);
        System.out.println(Arrays.deepToString(rotMatrix));
    }

    public static void main(String[] args) {
        rot180UnitTest();
    }
}

第83天:数学操作 (续)

这里设置了一个二维数组去测试rot180
在这里插入图片描述

/**
    * @Description: 测试rot180方法
    * @Param: []
    * @return: void
    */
    private static void rot180UnitTest(){
        double[][] matrix = {{1,2},{3,4}};
        double[][] rotMatrix = rot180(matrix);
        System.out.println(Arrays.deepToString(rotMatrix));
    }



    public static void main(String[] args) {
        rot180UnitTest();
    }

第84天:网络结构与参数

主要理解多维数组的维度的意义。

package MachineLearning.cnn;

/**
 * @description:Cnn网络层
 * @learner: Qing Zhang
 * @time: 08
 */
public class CnnLayer {

    //网络层的类型
    LayerTypeEnum type;


    //输出映射的数量
    int outMapNum;


    //映射尺寸
    Size mapSize;

    //卷积核尺寸
    Size kernelSize;

    //放缩尺寸
    Size scaleSize;


    //类标签的下标
    int classNum = -1;


    //卷积核。维度: [front map][out map][width][height].
    private double[][][][] kernel;


    //偏差
    private double[] bias;


    //输出映射. 维度:
    //[batchSize][outMapNum][mapSize.width][mapSize.height].
    private double[][][][] outMaps;


    //错误
    private double[][][][] errors;

    //用于批处理
    private static int recordInBatch = 0;


    /**
    * @Description: 第一个构造函数
    * @Param: [
     * paraType:
     *            When the type is CONVOLUTION, it is the out map number. when
     *            the type is OUTPUT, it is the class number.
     * paraNum,
     * paraSize:
     *            When the type is INPUT, it is the map size; when the type is
     *            CONVOLUTION, it is the kernel size; when the type is SAMPLING,
     *            it is the scale size.
     * ]
    * @return: 
    */
    public CnnLayer(LayerTypeEnum paraType, int paraNum, Size paraSize) {
        type = paraType;
        switch (type) {
            case INPUT:
                outMapNum = 1;
                mapSize = paraSize; // No deep copy.
                break;
            case CONVOLUTION:
                outMapNum = paraNum;
                kernelSize = paraSize;
                break;
            case SAMPLING:
                scaleSize = paraSize;
                break;
            case OUTPUT:
                classNum = paraNum;
                mapSize = new Size(1, 1);
                outMapNum = classNum;
                break;
            default:
                System.out.println("Internal error occurred in AbstractLayer.java constructor.");
        }
    }

    
    /** 
    * @Description: 初始化卷积核
    * @Param: [paraFrontMapNum]
    * @return: void
    */
    public void initKernel(int paraFrontMapNum) {
        kernel = new double[paraFrontMapNum][outMapNum][][];
        for (int i = 0; i < paraFrontMapNum; i++) {
            for (int j = 0; j < outMapNum; j++) {
                kernel[i][j] = MathUtils.randomMatrix(kernelSize.width, kernelSize.height, true);
            }
        } 
    }


    /** 
    * @Description: 初始化输出内核。代码被修改为调用initKernel(int)。
    * @Param: [paraFrontMapNum, paraSize]
    * @return: void
    */
    public void initOutputKernel(int paraFrontMapNum, Size paraSize) {
        kernelSize = paraSize;
        initKernel(paraFrontMapNum);
    }

    /**
     ***********************
     * Initialize the bias. No parameter. "int frontMapNum" is claimed however
     * not used.
     ***********************
     */
    /** 
    * @Description: 初始化偏差. 无参数.
     * "int frontMapNum" 被声明但并未被使用
    * @Param: []
    * @return: void
    */
    public void initBias() {
        bias = MathUtils.randomArray(outMapNum);
    }

    
    /** 
    * @Description: 初始化错误
    * @Param: [paraBatchSize]
    * @return: void
    */
    public void initErrors(int paraBatchSize) {
        errors = new double[paraBatchSize][outMapNum][mapSize.width][mapSize.height];
    }

    
    /** 
    * @Description: 初始化输出映射
    * @Param: [paraBatchSize]
    * @return: void
    */
    public void initOutMaps(int paraBatchSize) {
        outMaps = new double[paraBatchSize][outMapNum][mapSize.width][mapSize.height];
    }

    
    /** 
    * @Description: 准备新一轮(a new batch)
    * @Param: []
    * @return: void
    */
    public static void prepareForNewBatch() {
        recordInBatch = 0;
    }


    /** 
    * @Description: 准备创建新纪录
    * @Param: []
    * @return: void
    */
    public static void prepareForNewRecord() {
        recordInBatch++;
    }


    /**
    * @Description: 设置输出映射的值
    * @Param: [paraMapNo, paraX, paraY, paraValue]
    * @return: void
    */
    public void setMapValue(int paraMapNo, int paraX, int paraY, double paraValue) {
        outMaps[recordInBatch][paraMapNo][paraX][paraY] = paraValue;
    }


    /**
    * @Description: 设置整个映射的值
    * @Param: [paraMapNo, paraOutMatrix]
    * @return: void
    */
    public void setMapValue(int paraMapNo, double[][] paraOutMatrix) {
        outMaps[recordInBatch][paraMapNo] = paraOutMatrix;
    }

    /**
    * @Description: 获得映射尺寸
    * @Param: []
    * @return: MachineLearning.cnn.Size
    */
    public Size getMapSize() {
        return mapSize;
    }


    /**
    * @Description: 设置映射尺寸
    * @Param: [paraMapSize]
    * @return: void
    */
    public void setMapSize(Size paraMapSize) {
        mapSize = paraMapSize;
    }

    /**
    * @Description: 获得网络类型
    * @Param: []
    * @return: MachineLearning.cnn.LayerTypeEnum
    */
    public LayerTypeEnum getType() {
        return type;
    }

    /**
    * @Description: 获得输出映射的数量
    * @Param: []
    * @return: int
    */
    public int getOutMapNum() {
        return outMapNum;
    }

    /**
    * @Description: 设置输出映射的数量
    * @Param: [paraOutMapNum]
    * @return: void
    */
    public void setOutMapNum(int paraOutMapNum) {
        outMapNum = paraOutMapNum;
    }

    /**
    * @Description: 获得卷积核的尺寸
    * @Param: []
    * @return: MachineLearning.cnn.Size
    */
    public Size getKernelSize() {
        return kernelSize;
    }

    /**
    * @Description: 获得放缩尺寸
    * @Param: []
    * @return: MachineLearning.cnn.Size
    */
    public Size getScaleSize() {
        return scaleSize;
    }

    /**
    * @Description: 获得映射
    * @Param: [paraIndex]
    * @return: double[][]
    */
    public double[][] getMap(int paraIndex) {
        return outMaps[recordInBatch][paraIndex];
    }


    /**
    * @Description: 获得卷积核
    * @Param: [paraFrontMap, paraOutMap]
    * @return: double[][]
    */
    public double[][] getKernel(int paraFrontMap, int paraOutMap) {
        return kernel[paraFrontMap][paraOutMap];
    }

    /**
    * @Description: 设置单个错误
    * @Param: [paraMapNo, paraMapX, paraMapY, paraValue]
    * @return: void
    */
    public void setError(int paraMapNo, int paraMapX, int paraMapY, double paraValue) {
        errors[recordInBatch][paraMapNo][paraMapX][paraMapY] = paraValue;
    }


    /**
    * @Description: 设置单个错误矩阵
    * @Param: [paraMapNo, paraMatrix]
    * @return: void
    */
    public void setError(int paraMapNo, double[][] paraMatrix) {
        errors[recordInBatch][paraMapNo] = paraMatrix;
    }

    /**
    * @Description: 获得单个错误矩阵
    * @Param: [paraMapNo]
    * @return: double[][]
    */
    public double[][] getError(int paraMapNo) {
        return errors[recordInBatch][paraMapNo];
    }


    /**
    * @Description: 获得整个错误张量
    * @Param: []
    * @return: double[][][][]
    */
    public double[][][][] getErrors() {
        return errors;
    }

    /**
    * @Description: 设置一个卷积核
    * @Param: [paraLastMapNo, paraMapNo, paraKernel]
    * @return: void
    */
    public void setKernel(int paraLastMapNo, int paraMapNo, double[][] paraKernel) {
        kernel[paraLastMapNo][paraMapNo] = paraKernel;
    }

    /**
    * @Description: 获得偏差
    * @Param: [paraMapNo]
    * @return: double
    */
    public double getBias(int paraMapNo) {
        return bias[paraMapNo];
    }


    /**
    * @Description: 设置偏差
    * @Param: [paraMapNo, paraValue]
    * @return: void
    */
    public void setBias(int paraMapNo, double paraValue) {
        bias[paraMapNo] = paraValue;
    }// Of setBias

    /**
    * @Description: 获得输出映射
    * @Param: []
    * @return: double[][][][]
    */
    public double[][][][] getMaps() {
        return outMaps;
    }

    /**
    * @Description: 获得错误
    * @Param: [paraRecordId, paraMapNo]
    * @return: double[][]
    */
    public double[][] getError(int paraRecordId, int paraMapNo) {
        return errors[paraRecordId][paraMapNo];
    }

    /**
    * @Description: 获得映射
    * @Param: [paraRecordId, paraMapNo]
    * @return: double[][]
    */
    public double[][] getMap(int paraRecordId, int paraMapNo) {
        return outMaps[paraRecordId][paraMapNo];
    }


    /**
    * @Description: 获得类数量
    * @Param: []
    * @return: int
    */
    public int getClassNum() {
        return classNum;
    }


    /**
    * @Description: 获得整个卷积核的张量
    * @Param: []
    * @return: double[][][][]
    */
    public double[][][][] getKernel() {
        return kernel;
    }
}

多层管理:

package MachineLearning.cnn;

import java.util.ArrayList;
import java.util.List;
/**
 * @description:多层管理
 * @learner: Qing Zhang
 * @time: 08
 */
public class LayerBuilder {
    
    //网络层
    private List<CnnLayer> layers;

    /** 
    * @Description: 第一个构造函数
    * @Param: []
    * @return: 
    */
    public LayerBuilder() {
        layers = new ArrayList<CnnLayer>();
    }

    /** 
    * @Description: 第二个构造函数
    * @Param: [paraLayer]
    * @return: 
    */
    public LayerBuilder(CnnLayer paraLayer) {
        this();
        layers.add(paraLayer);
    }

    /** 
    * @Description: 添加新网络层
    * @Param: [paraLayer]
    * @return: void
    */
    public void addLayer(CnnLayer paraLayer) {
        layers.add(paraLayer);
    }

    
    /** 
    * @Description: 获得具体的层
    * @Param: [paraIndex]
    * @return: MachineLearning.cnn.CnnLayer
    */
    public CnnLayer getLayer(int paraIndex) throws RuntimeException{
        if (paraIndex >= layers.size()) {
            throw new RuntimeException("CnnLayer " + paraIndex + " is out of range: "
                    + layers.size() + ".");
        }

        return layers.get(paraIndex);
    }

    /** 
    * @Description: 获得输出层
    * @Param: []
    * @return: MachineLearning.cnn.CnnLayer
    */
    public CnnLayer getOutputLayer() {
        return layers.get(layers.size() - 1);
    }

    
    /** 
    * @Description: 获得网络层的数量
    * @Param: []
    * @return: int
    */
    public int getNumLayers() {
        return layers.size();
    }
}

第85天:网络构建 (1. 代码抄写)

  1. initOperators 又初始化若干的算子. 注意到它们与已经初始化的成员变量有关. 这种灵活的方式 (interface) 还是值得学习的.
  2. ALPHA 和 LAMBDA 是超参数, 可以自己设置.
  3. setup 进行整个网络的初始化.
  4. forward 和 backPropagation 与 ANN 同理, 但运算不同了.
  5. 一批数据进行 forward 和 backPropagation 后, 才进行一次 updateParameters.
package MachineLearning.cnn;

import java.util.Arrays;
import MachineLearning.cnn.Dataset.Instance;
import MachineLearning.cnn.MathUtils.Operator;

/**
 * @description:Cnn
 * @learner: Qing Zhang
 * @time: 08
 */
public class FullCnn {

    //变化值
    private static double ALPHA = 0.85;

    //A constant.
    public static double LAMBDA = 0;

    //管理层
    private static LayerBuilder layerBuilder;

    //一次测试使用的样本数量
    private int batchSize;

    //通过给定值划分 batchSize
    private Operator divideBatchSize;

    //用给定值乘以alpha
    private Operator multiplyAlpha;

    //用给定值乘以alpha和lambda
    private Operator multiplyLambda;

    /** 
    * @Description: 第一个构造函数
    * @Param: [paraLayerBuilder, paraBatchSize]
    * @return: 
    */
    public FullCnn(LayerBuilder paraLayerBuilder, int paraBatchSize) {
        layerBuilder = paraLayerBuilder;
        batchSize = paraBatchSize;
        setup();
        initOperators();
    }

    
    /** 
    * @Description: 使用临时类初始化操作符。
    * @Param: []
    * @return: void
    */
    private void initOperators() {
        divideBatchSize = new Operator() {
            private static final long serialVersionUID = 7424011281732651055L;

            @Override
            public double process(double value) {
                return value / batchSize;
            }// Of process
        };

        multiplyAlpha = new Operator() {
            private static final long serialVersionUID = 5761368499808006552L;

            @Override
            public double process(double value) {
                return value * ALPHA;
            }// Of process
        };

        multiplyLambda = new Operator() {
            private static final long serialVersionUID = 4499087728362870577L;

            @Override
            public double process(double value) {
                return value * (1 - LAMBDA * ALPHA);
            }// Of process
        };
    }


    /** 
    * @Description: 通过层构建器设置
     * 网络初始化
    * @Param: []
    * @return: void
    */
    public void setup() {
        CnnLayer tempInputLayer = layerBuilder.getLayer(0);
        tempInputLayer.initOutMaps(batchSize);

        for (int i = 1; i < layerBuilder.getNumLayers(); i++) {
            CnnLayer tempLayer = layerBuilder.getLayer(i);
            CnnLayer tempFrontLayer = layerBuilder.getLayer(i - 1);
            int tempFrontMapNum = tempFrontLayer.getOutMapNum();
            switch (tempLayer.getType()) {
                case INPUT:
                    // Should not be input. Maybe an error should be thrown out.
                    //不应该被输入。也许应该抛出一个错误。
                    break;
                case CONVOLUTION:
                    tempLayer.setMapSize(
                            tempFrontLayer.getMapSize().subtract(tempLayer.getKernelSize(), 1));
                    tempLayer.initKernel(tempFrontMapNum);
                    tempLayer.initBias();
                    tempLayer.initErrors(batchSize);
                    tempLayer.initOutMaps(batchSize);
                    break;
                case SAMPLING:
                    tempLayer.setOutMapNum(tempFrontMapNum);
                    tempLayer.setMapSize(tempFrontLayer.getMapSize().divide(tempLayer.getScaleSize()));
                    tempLayer.initErrors(batchSize);
                    tempLayer.initOutMaps(batchSize);
                    break;
                case OUTPUT:
                    tempLayer.initOutputKernel(tempFrontMapNum, tempFrontLayer.getMapSize());
                    tempLayer.initBias();
                    tempLayer.initErrors(batchSize);
                    tempLayer.initOutMaps(batchSize);
                    break;
            }
        } 
    }

    /** 
    * @Description: 前向计算
    * @Param: [instance]
    * @return: void
    */
    private void forward(Instance instance) {
        setInputLayerOutput(instance);
        for (int l = 1; l < layerBuilder.getNumLayers(); l++) {
            CnnLayer tempCurrentLayer = layerBuilder.getLayer(l);
            CnnLayer tempLastLayer = layerBuilder.getLayer(l - 1);
            switch (tempCurrentLayer.getType()) {
                case CONVOLUTION:
                    setConvolutionOutput(tempCurrentLayer, tempLastLayer);
                    break;
                case SAMPLING:
                    setSampOutput(tempCurrentLayer, tempLastLayer);
                    break;
                case OUTPUT:
                    setConvolutionOutput(tempCurrentLayer, tempLastLayer);
                    break;
                default:
                    break;
            }
        } 
    }


    /** 
    * @Description: 设置在层输出。给定一条记录,将其值复制到输入映射中
    * @Param: [paraRecord]
    * @return: void
    */
    private void setInputLayerOutput(Instance paraRecord) {
        CnnLayer tempInputLayer = layerBuilder.getLayer(0);
        Size tempMapSize = tempInputLayer.getMapSize();
        double[] tempAttributes = paraRecord.getAttributes();
        if (tempAttributes.length != tempMapSize.width * tempMapSize.height)
            throw new RuntimeException("input record does not match the map size.");

        for (int i = 0; i < tempMapSize.width; i++) {
            for (int j = 0; j < tempMapSize.height; j++) {
                tempInputLayer.setMapValue(0, i, j, tempAttributes[tempMapSize.height * i + j]);
            }
        }
    }

   
    /** 
    * @Description: 根据上一层的输出计算卷积输出。
    * @Param: [paraLayer, paraLastLayer]
    * @return: void
    */
    private void setConvolutionOutput(final CnnLayer paraLayer, final CnnLayer paraLastLayer) {
        // int mapNum = paraLayer.getOutMapNum();
        final int lastMapNum = paraLastLayer.getOutMapNum();

        // 注意: paraLayer.getOutMapNum() 也许并不正确
        for (int j = 0; j < paraLayer.getOutMapNum(); j++) {
            double[][] tempSumMatrix = null;
            for (int i = 0; i < lastMapNum; i++) {
                double[][] lastMap = paraLastLayer.getMap(i);
                double[][] kernel = paraLayer.getKernel(i, j);
                if (tempSumMatrix == null) {
                    // On the first map.
                    tempSumMatrix = MathUtils.convnValid(lastMap, kernel);
                } else {
                    // Sum up convolution maps
                    tempSumMatrix = MathUtils.matrixOp(MathUtils.convnValid(lastMap, kernel),
                            tempSumMatrix, null, null, MathUtils.plus);
                } 
            }

            // 激活
            final double bias = paraLayer.getBias(j);
            tempSumMatrix = MathUtils.matrixOp(tempSumMatrix, new Operator() {
                private static final long serialVersionUID = 2469461972825890810L;

                @Override
                public double process(double value) {
                    return MathUtils.sigmod(value + bias);
                }

            });

            paraLayer.setMapValue(j, tempSumMatrix);
        }
    }


    /** 
    * @Description: 根据上一层的输出计算卷积输出。
    * @Param: [paraLayer, paraLastLayer]
    * @return: void
    */
    private void setSampOutput(final CnnLayer paraLayer, final CnnLayer paraLastLayer) {
        // int tempLastMapNum = paraLastLayer.getOutMapNum();

        // Attention: paraLayer.outMapNum may not be right.
        for (int i = 0; i < paraLayer.outMapNum; i++) {
            double[][] lastMap = paraLastLayer.getMap(i);
            Size scaleSize = paraLayer.getScaleSize();
            double[][] sampMatrix = MathUtils.scaleMatrix(lastMap, scaleSize);
            paraLayer.setMapValue(i, sampMatrix);
        }
    }

    /**
    * @Description: 训练cnn
    * @Param: [paraDataset, paraRounds]
    * @return: void
    */
    public void train(Dataset paraDataset, int paraRounds) {
        for (int t = 0; t < paraRounds; t++) {
            System.out.println("Iteration: " + t);
            int tempNumEpochs = paraDataset.size() / batchSize;
            if (paraDataset.size() % batchSize != 0)
                tempNumEpochs++;
            // logger.info("第{}次迭代,epochsNum: {}", t, epochsNum);
            double tempNumCorrect = 0;
            int tempCount = 0;
            for (int i = 0; i < tempNumEpochs; i++) {
                int[] tempRandomPerm = MathUtils.randomPerm(paraDataset.size(), batchSize);
                CnnLayer.prepareForNewBatch();

                for (int index : tempRandomPerm) {
                    boolean isRight = train(paraDataset.getInstance(index));
                    if (isRight)
                        tempNumCorrect++;
                    tempCount++;
                    CnnLayer.prepareForNewRecord();
                }

                updateParameters();
                if (i % 50 == 0) {
                    System.out.print("..");
                    if (i + 50 > tempNumEpochs)
                        System.out.println();
                }
            }
            double p = 1.0 * tempNumCorrect / tempCount;
            if (t % 10 == 1 && p > 0.96) {
                ALPHA = 0.001 + ALPHA * 0.9;
                // logger.info("设置 alpha = {}", ALPHA);
            }
            System.out.println("Training precision: " + p);
            // logger.info("计算精度: {}/{}={}.", right, count, p);
        }
    }


    /**
    * @Description: 用一条记录训练cnn
    * @Param: [paraRecord]
    * @return: boolean
    */
    private boolean train(Instance paraRecord) {
        forward(paraRecord);
        boolean result = backPropagation(paraRecord);
        return result;
    }

    /**
    * @Description: 反向传播
    * @Param: [paraRecord]
    * @return: boolean
    */
    private boolean backPropagation(Instance paraRecord) {
        boolean result = setOutputLayerErrors(paraRecord);
        setHiddenLayerErrors();
        return result;
    }

    /** 
    * @Description: 更新参数
    * @Param: []
    * @return: void
    */
    private void updateParameters() {
        for (int l = 1; l < layerBuilder.getNumLayers(); l++) {
            CnnLayer layer = layerBuilder.getLayer(l);
            CnnLayer lastLayer = layerBuilder.getLayer(l - 1);
            switch (layer.getType()) {
                case CONVOLUTION:
                case OUTPUT:
                    updateKernels(layer, lastLayer);
                    updateBias(layer, lastLayer);
                    break;
                default:
                    break;
            }
        } 
    }

    
    /** 
    * @Description: 更新偏差
    * @Param: [paraLayer, paraLastLayer]
    * @return: void
    */
    private void updateBias(final CnnLayer paraLayer, CnnLayer paraLastLayer) {
        final double[][][][] errors = paraLayer.getErrors();
        // int mapNum = paraLayer.getOutMapNum();

        // Attention: getOutMapNum() may not be correct.
        for (int j = 0; j < paraLayer.getOutMapNum(); j++) {
            double[][] error = MathUtils.sum(errors, j);
            double deltaBias = MathUtils.sum(error) / batchSize;
            double bias = paraLayer.getBias(j) + ALPHA * deltaBias;
            paraLayer.setBias(j, bias);
        } 
    }

    /** 
    * @Description: 更新卷积核
    * @Param: [paraLayer, paraLastLayer]
    * @return: void
    */
    private void updateKernels(final CnnLayer paraLayer, final CnnLayer paraLastLayer) {
        // int mapNum = paraLayer.getOutMapNum();
        int tempLastMapNum = paraLastLayer.getOutMapNum();

        // Attention: getOutMapNum() may not be right
        for (int j = 0; j < paraLayer.getOutMapNum(); j++) {
            for (int i = 0; i < tempLastMapNum; i++) {
                double[][] tempDeltaKernel = null;
                for (int r = 0; r < batchSize; r++) {
                    double[][] error = paraLayer.getError(r, j);
                    if (tempDeltaKernel == null)
                        tempDeltaKernel = MathUtils.convnValid(paraLastLayer.getMap(r, i), error);
                    else {
                        tempDeltaKernel = MathUtils.matrixOp(
                                MathUtils.convnValid(paraLastLayer.getMap(r, i), error),
                                tempDeltaKernel, null, null, MathUtils.plus);
                    }
                }

                tempDeltaKernel = MathUtils.matrixOp(tempDeltaKernel, divideBatchSize);
                if (!rangeCheck(tempDeltaKernel, -10, 10)) {
                    System.exit(0);
                } 
                double[][] kernel = paraLayer.getKernel(i, j);
                tempDeltaKernel = MathUtils.matrixOp(kernel, tempDeltaKernel, multiplyLambda,
                        multiplyAlpha, MathUtils.plus);
                paraLayer.setKernel(i, j, tempDeltaKernel);
            } 
        } 
    }


    /** 
    * @Description: 设置所有隐藏层的错误。
    * @Param: []
    * @return: void
    */
    private void setHiddenLayerErrors() {
        // System.out.println("setHiddenLayerErrors");
        for (int l = layerBuilder.getNumLayers() - 2; l > 0; l--) {
            CnnLayer layer = layerBuilder.getLayer(l);
            CnnLayer nextLayer = layerBuilder.getLayer(l + 1);
            // System.out.println("layertype = " + layer.getType());
            switch (layer.getType()) {
                case SAMPLING:
                    setSamplingErrors(layer, nextLayer);
                    break;
                case CONVOLUTION:
                    setConvolutionErrors(layer, nextLayer);
                    break;
                default:
                    break;
            }
        }
    }


    /**
    * @Description: 设置采样层的错误。
    * @Param: [paraLayer, paraNextLayer]
    * @return: void
    */
    private void setSamplingErrors(final CnnLayer paraLayer, final CnnLayer paraNextLayer) {
        // int mapNum = layer.getOutMapNum();
        int tempNextMapNum = paraNextLayer.getOutMapNum();
        // Attention: getOutMapNum() may not be correct
        for (int i = 0; i < paraLayer.getOutMapNum(); i++) {
            double[][] sum = null;
            for (int j = 0; j < tempNextMapNum; j++) {
                double[][] nextError = paraNextLayer.getError(j);
                double[][] kernel = paraNextLayer.getKernel(i, j);
                if (sum == null) {
                    sum = MathUtils.convnFull(nextError, MathUtils.rot180(kernel));
                } else {
                    sum = MathUtils.matrixOp(
                            MathUtils.convnFull(nextError, MathUtils.rot180(kernel)), sum, null,
                            null, MathUtils.plus);
                }
            }
            paraLayer.setError(i, sum);

            if (!rangeCheck(sum, -2, 2)) {
                System.out.println(
                        "setSampErrors, error out of range.\r\n" + Arrays.deepToString(sum));
            }
        }
    }


    /**
    * @Description: 设置采样层的错误。
    * @Param: [paraLayer, paraNextLayer]
    * @return: void
    */
    private void setConvolutionErrors(final CnnLayer paraLayer, final CnnLayer paraNextLayer) {
        // System.out.println("setConvErrors");
        for (int m = 0; m < paraLayer.getOutMapNum(); m++) {
            Size tempScale = paraNextLayer.getScaleSize();
            double[][] tempNextLayerErrors = paraNextLayer.getError(m);
            double[][] tempMap = paraLayer.getMap(m);
            double[][] tempOutMatrix = MathUtils.matrixOp(tempMap, MathUtils.cloneMatrix(tempMap),
                    null, MathUtils.one_value, MathUtils.multiply);
            tempOutMatrix = MathUtils.matrixOp(tempOutMatrix,
                    MathUtils.kronecker(tempNextLayerErrors, tempScale), null, null,
                    MathUtils.multiply);
            paraLayer.setError(m, tempOutMatrix);

            // System.out.println("range check nextError");
            if (!rangeCheck(tempNextLayerErrors, -10, 10)) {
                System.out.println("setConvErrors, nextError out of range:\r\n"
                        + Arrays.deepToString(tempNextLayerErrors));
                System.out.println("the new errors are:\r\n" + Arrays.deepToString(tempOutMatrix));

                System.exit(0);
            }

            if (!rangeCheck(tempOutMatrix, -10, 10)) {
                System.out.println("setConvErrors, error out of range.");
                System.exit(0);
            }
        }
    }

    /**
    * @Description: 设置采样层的错误。
    * @Param: [paraRecord]
    * @return: boolean
    */
    private boolean setOutputLayerErrors(Instance paraRecord) {
        CnnLayer tempOutputLayer = layerBuilder.getOutputLayer();
        int tempMapNum = tempOutputLayer.getOutMapNum();

        double[] tempTarget = new double[tempMapNum];
        double[] tempOutMaps = new double[tempMapNum];
        for (int m = 0; m < tempMapNum; m++) {
            double[][] outmap = tempOutputLayer.getMap(m);
            tempOutMaps[m] = outmap[0][0];
        }

        int tempLabel = paraRecord.getLabel().intValue();
        tempTarget[tempLabel] = 1;
        // Log.i(record.getLable() + "outmaps:" +
        // Util.fomart(outmaps)
        // + Arrays.toString(target));
        for (int m = 0; m < tempMapNum; m++) {
            tempOutputLayer.setError(m, 0, 0,
                    tempOutMaps[m] * (1 - tempOutMaps[m]) * (tempTarget[m] - tempOutMaps[m]));
        }

        return tempLabel == MathUtils.getMaxIndex(tempOutMaps);
    }


    /**
    * @Description: 设置网络
    * @Param: [paraBatchSize]
    * @return: void
    */
    public void setup(int paraBatchSize) {
        CnnLayer tempInputLayer = layerBuilder.getLayer(0);
        tempInputLayer.initOutMaps(paraBatchSize);

        for (int i = 1; i < layerBuilder.getNumLayers(); i++) {
            CnnLayer tempLayer = layerBuilder.getLayer(i);
            CnnLayer tempLastLayer = layerBuilder.getLayer(i - 1);
            int tempLastMapNum = tempLastLayer.getOutMapNum();
            switch (tempLayer.getType()) {
                case INPUT:
                    break;
                case CONVOLUTION:
                    tempLayer.setMapSize(
                            tempLastLayer.getMapSize().subtract(tempLayer.getKernelSize(), 1));
                    tempLayer.initKernel(tempLastMapNum);
                    tempLayer.initBias();
                    tempLayer.initErrors(paraBatchSize);
                    tempLayer.initOutMaps(paraBatchSize);
                    break;
                case SAMPLING:
                    tempLayer.setOutMapNum(tempLastMapNum);
                    tempLayer.setMapSize(tempLastLayer.getMapSize().divide(tempLayer.getScaleSize()));
                    tempLayer.initErrors(paraBatchSize);
                    tempLayer.initOutMaps(paraBatchSize);
                    break;
                case OUTPUT:
                    tempLayer.initOutputKernel(tempLastMapNum, tempLastLayer.getMapSize());
                    tempLayer.initBias();
                    tempLayer.initErrors(paraBatchSize);
                    tempLayer.initOutMaps(paraBatchSize);
                    break;
            }
        }
    }

    /**
    * @Description: 预测数据集
    * @Param: [paraDataset]
    * @return: int[]
    */
    public int[] predict(Dataset paraDataset) {
        System.out.println("Predicting ... ");
        CnnLayer.prepareForNewBatch();

        int[] resultPredictions = new int[paraDataset.size()];
        double tempCorrect = 0.0;

        Instance tempRecord;
        for (int i = 0; i < paraDataset.size(); i++) {
            tempRecord = paraDataset.getInstance(i);
            forward(tempRecord);
            CnnLayer outputLayer = layerBuilder.getOutputLayer();

            int tempMapNum = outputLayer.getOutMapNum();
            double[] tempOut = new double[tempMapNum];
            for (int m = 0; m < tempMapNum; m++) {
                double[][] outmap = outputLayer.getMap(m);
                tempOut[m] = outmap[0][0];
            }

            resultPredictions[i] = MathUtils.getMaxIndex(tempOut);
            if (resultPredictions[i] == tempRecord.getLabel().intValue()) {
                tempCorrect++;
            }
        }

        System.out.println("Accuracy: " + tempCorrect / paraDataset.size());
        return resultPredictions;
    }


    /**
    * @Description: 范围检查,仅用于调试。
    * @Param: [paraMatrix, paraLowerBound, paraUpperBound]
    * @return: boolean
    */
    public boolean rangeCheck(double[][] paraMatrix, double paraLowerBound, double paraUpperBound) {
        for (int i = 0; i < paraMatrix.length; i++) {
            for (int j = 0; j < paraMatrix[0].length; j++) {
                if ((paraMatrix[i][j] < paraLowerBound) || (paraMatrix[i][j] > paraUpperBound)) {
                    System.out.println("" + paraMatrix[i][j] + " out of range (" + paraLowerBound
                            + ", " + paraUpperBound + ")\r\n");
                    return false;
                }
            }
        }

        return true;
    }


    public static void main(String[] args) {
        LayerBuilder builder = new LayerBuilder();
        // Input layer, the maps are 28*28
        builder.addLayer(new CnnLayer(LayerTypeEnum.INPUT, -1, new Size(28, 28)));
        // Convolution output has size 24*24, 24=28+1-5
        builder.addLayer(new CnnLayer(LayerTypeEnum.CONVOLUTION, 6, new Size(5, 5)));
        // Sampling output has size 12*12,12=24/2
        builder.addLayer(new CnnLayer(LayerTypeEnum.SAMPLING, -1, new Size(2, 2)));
        // Convolution output has size 8*8, 8=12+1-5
        builder.addLayer(new CnnLayer(LayerTypeEnum.CONVOLUTION, 12, new Size(5, 5)));
        // Sampling output has size4×4,4=8/2
        builder.addLayer(new CnnLayer(LayerTypeEnum.SAMPLING, -1, new Size(2, 2)));
        // output layer, digits 0 - 9.
        builder.addLayer(new CnnLayer(LayerTypeEnum.OUTPUT, 10, null));
        // Construct the full CNN.
        FullCnn tempCnn = new FullCnn(builder, 10);

        Dataset tempTrainingSet = new Dataset("F:\\研究生\\研0\\学习\\Java_Study\\data_set\\train.format", ",", 784);

        // Train the model.
        tempCnn.train(tempTrainingSet, 10);
        // tempCnn.predict(tempTrainingSet);
    }
}

在这里插入图片描述

第86天:网络构建 (2. 代码理解)

CNN 的结构与 ANN 其实是一样的.

  1. 根据 main 里面的设置, 各层节点数依次是 1, 6, 6, 12, 12, 10. 这里的每个节点存储一个矩阵.
  2. 这里的 6 和 12 为超参数, 可以自己设置.
  3. 卷积层中, 每条边对应一个卷积核. 池化层不需要参数.
  4. 第一层只有 1 个节点, 是因为它为 binary 的. 如果是彩色图 (RGB), 通道就有三层, 则第一层应该有 3 个节点.
  5. 最后一层有 10 个节点, 因为 0-9 共 10 种可能的数字. 它们均为 1*1 的矩阵, 分类时通过比较哪个值最大, 确定是哪一类, 这和 ANN 也是一致的.

主要理解里面的一些术语的概念:

卷积:
在这里插入图片描述
池化:
在这里插入图片描述

第87天:实验

  1. 可以看出, 虽然网络不大, 但是效果不错. 用测试集效果也相当.
  2. 自己另外找一个数据集, 构建网络, 进行实验并给出结果.
    这里后面有待补充

第88天:总结 (01 - 10 天: 基本语法)

其实就是对 java 语言的一些特性以及相应的程序设计进行了一定的梳理,通过这些知识点可以方便对后面进阶的操作进行理解。

第89天:总结 (11 - 50 天: 数据结构)

数据结构是一个程序的组成之一,程序=数据结构+算法,因此了解并熟练应用数据结构是一个程序猿必备的技能,这关系到程序的性能问题。

第90天:总结 (51 - 87 天: 机器学习)

这一阶段主要了解了相应的机器学习算法用程序代码去实现的过程,这一过程有助于理解算法的思想原理,而不是仅仅作为一个调包侠,要知其所以然。但是经过了解发现机器学习的相应算法了解起来还是比较轻松的,但是到了深度学习这一块,对神经网络的理解感觉不是很透彻,特别是反向传播那一块,不是特别理解误差函数的具体作用,只是明白它应该是为了不断的循环找到最优解。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
VGG-Net是一种经典的卷积神经网络模型,由牛津大学的研究团队开发。它在2014年的ImageNet图像分类挑战中取得了很好的成绩。VGG-Net的主要特点是使用了非常小的卷积核(3x3)和深层网络结构。 VGG-Net的网络结构非常简单,它由多个卷积层和池化层组成,最后接上全连接层进分类。其中,卷积层使用了多个3x3的卷积核,通过堆叠多个卷积层来增加网络的深度。池化层则用于减小特征图的尺寸,同时保留主要特征。VGG-Net的网络结构非常深,有16层或19层,因此也被称为VGG16或VGG19。 相比于其他卷积神经网络模型,VGG-Net具有以下特点: 1. 网络结构简单明了,只使用了3x3的卷积核和池化层。 2. 拥有较深的网络结构,可以提取更复杂的特征。 3. 在ImageNet图像分类挑战中取得了较好的性能。 CNN卷积神经网络)是一种深度学习模型,广泛应用于图像识别、语音识别等领域。CNN的核心思想是通过卷积层、池化层和全连接层来提取和学习图像的特征。 CNN的卷积层通过卷积操作对输入图像进特征提取,可以捕捉到图像的局部信息。池化层则用于减小特征图的尺寸,同时保留主要特征。最后,全连接层将提取到的特征映射到不同的类别上。 CNN的优势在于: 1. 具有局部感知性,能够有效地处理图像等二维数据。 2. 参数共享,减少了模型的参数量,降低了过拟合的风险。 3. 可以通过堆叠多个卷积层和池化层来构建深层网络,提取更高级的特征。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值