图像识别大致可以分为这么几步:
1.对训练图片进行预处理
2.对预处理的图片进行Hog特征提取
3.使用SVM分类器对特征进行分类形成模型
4.根据模型对测试图片进行预测
以下都是通过openCV实现(若直接使用openCV提取Hog特征可直接跳过预处理阶段直接看Hog特征提取阶段):
预处理阶段:
1.图像的灰度化处理
Imgproc.cvtColor(imageMat, imageMat, Imgproc.COLOR_RGB2GRAY);
2.灰度化的图片进行gamma校正
int width = imageMat.cols();
int height = imageMat.rows();
byte[] data = new byte[width * height];
imageMat.get(0, 0, data);
int index = 0;
float i = 0;
for (int row = 0; row < height; row++) {
for (int col = 0; col < width; col++) {
index = row * width + col;
i = data[index] & 0xff;
//归一化
i = (i + 0.5F) / 256;
//预补偿
i = (float) Math.pow(i, gamma);
//反归一化
i = i * 256 - 0.5F;
data[index] = (byte) i;
}
}
imageMat.put(0, 0, data);
3.计算图像梯度
Mat gradientImage = new Mat(imageMat.rows(), imageMat.cols(), imageMat.type());
Mat gradientImageX = new Mat();
Mat gradientImageY = new Mat();
Imgproc.Sobel(imageMat, gradientImageX, CvType.CV_64F, 1, 0, 1, 1, 1);
Imgproc.Sobel(imageMat, gradientImageY, CvType.CV_64F, 0, 1, 3, 1, 0);
// Imgproc.Sobel(src, gradientImage, CvType.CV_64F, 1, 1, 3, 1, 0);
int width = imageMat.cols();
int height = imageMat.rows();
int index = 0;
double[] dataGradient = new double[width * height];
double[] dataGradientX = new double[width * height];
double[] dataGradientY = new double[width * height];
gradientImageX.get(0, 0, dataGradientX);
gradientImageY.get(0, 0, dataGradientY);
//创建梯度图片对象,创建对象时已经将Size和Direction的List初始化完毕
PictureGradient pictureGradient = new PictureGradient();
pictureGradient.setHeight(height);
pictureGradient.setWidth(width);
//计算梯度方向
for (int row = 0; row < height; row++) {
for (int col = 0; col < width; col++) {
index = row * width + col;
double gradientX = dataGradientX[index];
double gradientY = dataGradientY[index];
dataGradient[index] = Math.sqrt(Math.pow(gradientX, 2) + Math.pow(gradientY, 2));
double gradientDirection = Math.atan2(gradientY, gradientX) * (180.0 / Math.PI);
pictureGradient.getDirection().add(gradientDirection);
pictureGradient.getSize().add(dataGradient[index]);
// System.out.println("index:" + index + ",x方向梯度:" + gradientX + ",y方向梯度:" + gradientY + ",方向梯度:" + dataGradient[index] + ",梯度方向:" + gradientDirection);
}
}
Hog特征提取阶段:
以下为Hog特征形成的实现过程(若用openCV可直接提取Hog特征,可以跳过这段代码)
//特征向量数(bin)
static int bin = 9;
//块包含2个细胞
static int blockSection = 2;
//细胞包含8个像素
static int cellSection = 8;
public static float[] acquiredHogDescriptor(Mat imageMat, PictureGradient pictureGradient, boolean overlapFlag) {
int width = imageMat.width();
int height = imageMat.height();
//一个block所对应的宽度像素个数
int blockwidth = blockSection * cellSection;
//图片的特征
float[] hogDescriptor = null;
//判断x,y方向可以包含多少个block
int xBlockSection = width / blockwidth;
int yBlockSection = height / blockwidth;
// System.out.println("宽度:" + width + ",xBlockSection:" + xBlockSection);
// System.out.println("高度:" + height + ",yBlockSection:" + yBlockSection);
//整个图像的梯度方向
List<Double> direction = pictureGradient.getDirection();
//整个图像的梯度
List<Double> size = pictureGradient.getSize();
int indexBlock, indexCell, indexPixel = 0;
int count = 0;
if (overlapFlag) {
yBlockSection = yBlockSection + yBlockSection * (blockSection - 1) - 1;
xBlockSection = xBlockSection + xBlockSection * (blockSection - 1) - 1;
hogDescriptor = new float[xBlockSection * yBlockSection * blockSection * blockSection * bin];
int currentDescriptorIndex = 0;
List<double[]> blockFeaturesList = new ArrayList<double[]>();
//遍历图片(图片中的所有的Block包含覆盖的)
for (int n = 0; n < yBlockSection; n++) {
for (int m = 0; m < xBlockSection; m++) {
indexBlock = n * xBlockSection * cellSection + m * cellSection;
//block中所含细胞的Weight
List<double[]> cellWeight = new ArrayList<double[]>();
//遍历Block(block中的细胞)
for (int i = 0; i < blockSection; i++) {
for (int j = 0; j < blockSection; j++) {
indexCell = indexBlock + i * blockSection * cellSection + j * cellSection;
double[] weight = new double[bin];
//遍历细胞(梯度直方图)
for (int z = 0; z < cellSection; z++) {
for (int x = 0; x < cellSection; x++) {
indexPixel = indexCell + z * cellSection + x;
int directionSection = directionClassification(direction.get(indexPixel));
//根据区间,进行加权,得到细胞的加权数组
weight[directionSection] += size.get(indexPixel);
// System.out.println("index:" + indexPixel + "梯度方向为:" + direction.get(indexPixel) + "梯度大小:" + size.get(indexPixel) + ",梯度区间:" + directionSection);
}
}
//将遍历完的细胞放到cellWeight中,遍历完block中的所有细胞添加到cellWeight中,后进行归一化处理
cellWeight.add(weight);
count++;
}
}
//将遍历完的block梯度权值进行归一化处理
Iterator<double[]> cellWeightIterator = cellWeight.listIterator();
float distanceSquare = 0;
while (cellWeightIterator.hasNext()) {
double[] weight = cellWeightIterator.next();
for (double b : weight)
distanceSquare += Math.pow(b, 2);
}
// System.out.println("欧几里得距离的平方:" + distanceSquare);
double distance = Math.sqrt(distanceSquare);
// System.out.println("欧几里得距离:" + distance);
double[] blockFeatures = new double[blockSection * blockSection * bin];
int currentIndex = 0;
cellWeightIterator = cellWeight.listIterator();
while (cellWeightIterator.hasNext()) {
double[] weight = cellWeightIterator.next();
for (double b : weight) {
// System.out.println(b + "," + distance);
blockFeatures[currentIndex] = b / (distance + 0.01F);
currentIndex++;
}
}
// System.out.println(blockFeatures[35]);
blockFeaturesList.add(blockFeatures);
}
}
Iterator<double[]> blockFeaturesListIterator = blockFeaturesList.listIterator();
while (blockFeaturesListIterator.hasNext()) {
double[] blockFeatures = blockFeaturesListIterator.next();
for (double b : blockFeatures) {
hogDescriptor[currentDescriptorIndex] = (float) b;
currentDescriptorIndex++;
}
}
// for (int i = 0; i < hogDescriptor.length; i++)
// System.out.println("第" + i + "个,Descriptor:" + hogDescriptor[i]);
} else {
//遍历图片(图片中的Block)
for (int n = 0; n < yBlockSection; n++) {
for (int m = 0; m < xBlockSection; m++) {
indexBlock = n * xBlockSection * blockwidth + m * blockwidth;
//遍历Block(block中的细胞)
for (int i = 0; i < blockSection; i++) {
for (int j = 0; j < blockSection; j++) {
indexCell = indexBlock + i * blockSection * cellSection + j * cellSection;
List<int[]> cellWeight = new ArrayList<int[]>();
float[] weight = new float[bin];
//遍历细胞(梯度直方图)
for (int z = 0; z < cellSection; z++) {
for (int x = 0; x < cellSection; x++) {
indexPixel = indexCell + z * cellSection + x;
int directionSection = directionClassification(direction.get(indexPixel));
//根据区间,进行加权,得到细胞的加权数组
weight[directionSection] += size.get(indexPixel);
System.out.println("index:" + indexPixel + "梯度方向为:" + direction.get(indexPixel) + ",梯度区间:" + directionSection);
}
}
//将遍历完的细胞放到cellWeight中,遍历完block中的所有细胞添加到cellWeight中,后进行归一化处理
// cellWeight.add(weight);
count++;
}
}
}
}
}
// System.out.println("特征维度:" + count * bin);
return hogDescriptor;
}
/**
* 限定角度??
* 判定梯度方向属于哪一区间
*
* @param direction
*/
public static int directionClassification(double direction) {
direction = Math.abs(direction);
//对梯度方向值进行去整,四舍五入
int dt = new BigDecimal(direction).divide(new BigDecimal(1), 0, BigDecimal.ROUND_HALF_UP).intValue();
//梯度方向除以单位角度,得到该梯度方向属于哪一区间
if (dt == 180)
return bin - 1;
int unitAngle = 180 / bin;
int directionSection = dt / unitAngle;
return directionSection;
}
openCV提取Hog特征
public static float[] openCVGetHog(Mat imageMat) {
HOGDescriptor hog = new HOGDescriptor(new Size(Constant.PICTURE_SIZE_WIDTH, Constant.PICTURE_SIZE_HEIGHT), new Size(16, 16), new Size(8, 8), new Size(8, 8), 9);
MatOfFloat descriptorsOfMat = new MatOfFloat();
hog.compute(imageMat, descriptorsOfMat);
return descriptorsOfMat.toArray();
}
SVM训练模型阶段:
Mat data_mat = new Mat(Constant.SAMPLE_COUNT, (int) Constant.PICTURE_FEATURE_DIM, CvType.CV_32FC1);
Mat res_mat = new Mat(Constant.SAMPLE_COUNT, 1, CvType.CV_32S);
for (int i = 0; i < descriptor.size(); i++) {
for (int j = 0; j < descriptor.get(i).length; j++) {
// System.out.println(descriptor.get(i)[j]);
data_mat.put(i, j, descriptor.get(i)[j]);
}
res_mat.put(i, 0, img_cag.get(i));
}
svm = SVM.create();
svm.setType(SVM.C_SVC);
svm.setKernel(SVM.LINEAR);
svm.setTermCriteria(new TermCriteria(TermCriteria.MAX_ITER, Constant.ITERATION_NUM, 1e-6));
svm.train(data_mat, ROW_SAMPLE, res_mat);
上面的res_mat(图片的标签)的Mat类型一定要设置为32S,标签集需要一个准确的int型数字
SVM设置的参数这里不详细的去说
测试图片预测阶段:
flag = svm.predict(mat);
结果:训练图片1907张,测试100张图片,识别率可以达到74%的正确率