1、英特尔MKL
面对计算量庞大的线性代数计算,英特尔为其CPU开发了MKL(Math Kernel Library)。Intel MKL能够充分利用CPU指令集和并行计算优势,较大提高矩阵计算速度。
具体参考:https://software.intel.com/content/www/us/en/develop/articles/intel-math-kernel-library-documentation.html
2、概述
本实验采用英特尔MKL与OpenMP完成模糊C均值图像分割。Intel MKL没有为矩阵逐元素乘积开发API。若要使用MKL计算矩阵逐元素乘积,步骤如下:
1、取出两个矩阵的相同一行(列),得到两个行向量A和B。
2、将A转置为列向量,得到
A
T
A^T
AT
3、计算
A
T
B
A^TB
ATB,得到一个方阵。
4、取出方阵的对角线元素,即为行向量A和B的逐元素乘积。
然而,第四步仍然需要较多次循环,不如用循环直接完成逐元素相乘。所以本文采用OpenMP多线程循环计算矩阵逐元素乘积。
3、实验
采用纯循环实现的FCM:
采用Intel MKL+OpenMP多线程
可以看到计算效率提高了5倍左右,结果没有变化,具有一定提升。
4、循环版FCM代码
#include <opencv2\opencv.hpp>
#include <stdio.h>
#include <iostream>
#include <time.h>
using namespace cv;
using namespace std;
int main()
{
Mat input = imread("F:\\C++语言程序\\模糊聚类\\Images\\118035.jpg", 0);
const int rows = input.rows;
const int cols = input.cols;
namedWindow("输入");
imshow("输入", input);
// 输入参数
int m = 2, K = 3, max_iter = 100;
double error = 0.01;
// 为输入图像开辟内存空间,用二维数组表示
double** input_ptr = (double**)malloc(rows * sizeof(double*));
for (int i = 0; i < rows; i++)
{
input_ptr[i] = (double*)malloc(cols * sizeof(double));
}
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
input_ptr[i][j] = (double)input.at<uchar>(i, j);
初始化隶属度数组U,用三维数组表示
// 开辟U的内存
double*** U = (double***)malloc(rows * sizeof(double**));
for (int i = 0; i < rows; i++)
{
U[i] = (double**)malloc(cols * sizeof(double*));
for (int j = 0; j < cols; j++)
U[i][j] = (double*)malloc(K * sizeof(double));
}
// 随机初始化U
srand((int)time(0));
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
for (int l = 0; l < K; l++)
U[i][j][l] = (double)rand() / RAND_MAX;
// 令每一个像素的U之和为1
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
{
double U_sum = 0;
for (int l = 0; l < K; l++)
U_sum += U[i][j][l];
for (int l = 0; l < K; l++)
U[i][j][l] /= U_sum;
}
// 开辟聚类中心center的内存
double* center = (double*)malloc(K * sizeof(double));
// 开辟目标函数J的内存
double* J = (double*)calloc(max_iter, sizeof(double));
// 聚类
clock_t time_start = clock();
int iter;
for (iter = 0; iter < max_iter; iter++)
{
// 更新聚类中心
for (int l = 0; l < K; l++)
{
double center_up, center_down;
center_up = center_down = 0;
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
{
center_up += input_ptr[i][j] * pow(U[i][j][l], m);
center_down += pow(U[i][j][l], m);
}
center[l] = center_up / center_down;
}
// 更新隶属度
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
{
double U_down = 0;
for (int l = 0; l < K; l++)
U_down = U_down + 1 / pow(input_ptr[i][j] - center[l], 2 / (m - 1));
for (int l = 0; l < K; l++)
U[i][j][l] = 1 / (pow(input_ptr[i][j] - center[l], 2 / (m - 1)) * U_down);
}
// 更新目标函数
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
for (int l = 0; l < K; l++)
J[iter] += pow(U[i][j][l], m) * pow(input_ptr[i][j] - center[l], 2);
printf("第%d次迭代,J = %.6f\n", iter + 1, J[iter]);
// 迭代条件判定
if (abs(J[iter] - J[iter - 1]) <= error && iter >= 1)
{
cout << "目标函数已收敛\n";
break;
}
else if (iter == max_iter - 1 && iter >= 1)
{
cout << "目标函数未收敛,达到最大迭代次数\n";
break;
}
}
clock_t time_end = clock();
printf("聚类耗时%.3f秒\n", (double)(time_end - time_start) / CLOCKS_PER_SEC);
printf("单次迭代耗时%.3f秒\n", (double)(time_end - time_start) / CLOCKS_PER_SEC / ((double)iter + 1));
cout << "聚类中心分别为\n";
for (int l = 0; l < K; l++)
cout << center[l] << "\n";
// 计算隶属度划分系数和划分熵
double Vpc, Vpe;
Vpc = Vpe = 0;
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
for (int l = 0; l < K; l++)
{
Vpc += U[i][j][l] * U[i][j][l];
Vpe += U[i][j][l] * log(U[i][j][l]);
}
Vpc = Vpc / (rows * cols) * 100;
Vpe = -Vpe / (rows * cols) * 100;
cout << "隶属度划分系数Vpc = " << Vpc << "%" << endl;
cout << "隶属度划分熵Vpe = " << Vpe << "%" << endl;
// 标签数组
int** labels = (int**)calloc(rows, sizeof(int*));
for (int i = 0; i < rows; i++)
labels[i] = (int*)calloc(cols, sizeof(int));
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
{
double U_max = 0;
for (int l = 0; l < K; l++)
if (U_max <= U[i][j][l])
{
U_max = U[i][j][l];
labels[i][j] = l + 1;
}
}
// 标签数组着色
Mat result = Mat(rows, cols, CV_8U);
uchar* colors = new uchar[K];
RNG rng(time(0));
for (int i = 0; i < K; i++)
*(colors + i) = (uchar)(center[i]);
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
for (int l = 1; l <= K; l++)
if (labels[i][j] == l)
result.at<uchar>(i, j) = *(colors + l - 1);
imshow("聚类结果", result);
waitKey(0);
return 0;
}
5、Intel MKL+OpenMP版FCM
#include <stdio.h>
#include <iostream>
#include <mkl.h>
#include <opencv2/opencv.hpp>
#include <omp.h>
using namespace std;
using namespace cv;
void power(double** A, double** result, int& rows, int& cols, double& p);
void power(double** A, int& rows, int& cols, double& p);
void equal(double* A, double* result, int& N);
void equal(double& number, double* result, int& N);
void print(double** A, int& rows, int& cols);
void print(double* A, int& N);
void reshape(short* A, short** result, int& N, int& row_num, int& col_num);
void vectorProduct(double* A, double* B, double* result, int& N)
{
#pragma omp parallel for
for (int i = 0; i < N; i++)
result[i] = A[i] * B[i];
}
void power(double** A, double** result, int& rows, int& cols, double& p)
{
#pragma omp parallel for collapse(2)
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
result[i][j] = pow(A[i][j], p);
}
void power(double** A, int& rows, int& cols, double& p)
{
#pragma omp parallel for collapse(2)
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
A[i][j] = pow(A[i][j], p);
}
void equal(double* A, double* result, int& N)
{
#pragma omp parallel for
for (int i = 0; i < N; i++)
result[i] = A[i];
}
void equal(double& number, double* result, int& N)
{
#pragma omp parallel for
for (int i = 0; i < N; i++)
result[i] = number;
}
void print(double** A, int& rows, int& cols)
{
int rows_dhow = rows;
int cols_dhow = cols;
if (rows > 10)
rows_dhow = 10;
if (cols > 10)
cols_dhow = 10;
cout << '\t';
for (int j = 0; j < cols_dhow; j++)
cout << j + 1 << '\t';
cout << endl;
for (int i = 0; i < rows_dhow; i++)
{
cout << i + 1 << '\t';
for (int j = 0; j < cols_dhow; j++)
printf("%.2f\t", A[i][j]);
cout << endl;
}
}
void print(double* A, int& N)
{
int cols_dhow = N;
if (N > 10)
cols_dhow = 10;
for (int j = 0; j < cols_dhow; j++)
cout << j + 1 << '\t';
cout << endl;
for (int j = 0; j < cols_dhow; j++)
printf("%.2f\t", A[j]);
cout << endl;
}
void reshape(short* A, short** result, int& N, int& row_num, int& col_num)
{
if (row_num * col_num != N)
{
cout << "元素数量发生改变,函数退出\n";
return;
}
for (int i = 0; i < row_num; i++)
for (int j = 0; j < col_num; j++)
result[i][j] = A[i * col_num + j];
}
int main()
{
int K = 3, max_iter = 100;
double m = 2, error = 0.01;
Mat f = imread("F:\\C++语言程序\\模糊聚类\\Images\\118035.jpg", 0);
imshow("原始图像", f);
int rows = f.rows;
int cols = f.cols;
int N = rows * cols;
// 将图像一维化
double* input = new double[N];
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
input[i * cols + j] = (double)f.at<uchar>(i, j);
f.release();
double* input_temp = new double[N];
// 初始化聚类隶属度U
double** U = new double*[K];
double** U_m = new double*[K];
double* U_down = new double[N];
for (int i = 0; i < K; i++)
{
U[i] = new double[N];
U_m[i] = new double[N];
}
srand((int)time(0));
for (int i = 0; i < K; i++)
for (int j = 0; j < N; j++)
U[i][j] = (double)rand() / RAND_MAX;
for (int i = 0; i < N; i++)
{
double U_dum = 0;
for (int j = 0; j < K; j++)
U_dum += *(*(U + j) + i);
for (int j = 0; j < K; j++)
*(*(U + j) + i) /= U_dum;
}
power(U, U_m, K, N, m);
// 初始化聚类中心center
double* center = new double[K];
// 初始化距离
double** d = new double* [K];
double** d_rec = new double* [K];
for (int i = 0; i < K; i++)
{
d[i] = new double[N];
d_rec[i] = new double[N];
}
// 初始化目标函数值
double J = 0, J_old = 0;
double time_start = dsecnd();
// 聚类
int iter;
for (iter = 0; iter < max_iter; iter++)
{
for (int i = 0; i < K; i++)
{
// 更新聚类中心
*(center + i) = cblas_ddot(N, U_m[i], 1, input, 1) / cblas_dasum(N, U_m[i], 1);
equal(*(center + i), *(d + i), N); // d[:][i] = center[i]
equal(input, input_temp, N); // input_temp = input
// 更新距离
cblas_drot(N, input_temp, 1, *(d + i), 1, -1, -1); // d[i][:] = input_temp - d[i][:]
}
equal(input, input_temp, N);
// 距离平方,得到像素与聚类中心的欧氏距离
power(d, K, N, m);
// 计算隶属度
double p = -1;
power(d, d_rec, K, N, p);
double init = 0;
equal(init, U_down, N);
for (int i = 0; i < K; i++)
cblas_drot(N, U_down, 1, d_rec[i], 1, 1, 1); // U_down = U_down + d_rec[i]
for (int i = 0; i < K; i++)
vectorProduct(d[i], U_down, U[i], N); // U[i][:] = d[i][:] .* U_down[:]
power(U, K, N, p);
power(U, U_m, K, N, m);
// 计算目标函数
double J_temp;
J = 0;
for (int i = 0; i < K; i++)
{
vectorProduct(U_m[i], d[i], d[i], N);
J_temp = cblas_dasum(N, d[i], 1);
J += J_temp;
}
printf("第%d次聚类, J = %.5f\n", iter + 1, J);
// 迭代条件判定
if (iter >= 1)
{
if (abs(J - J_old) <= error)
{
cout << "目标函数已收敛\n";
break;
}
else if (iter == max_iter - 1)
{
cout << "目标函数未收敛,达到最大迭代次数\n";
break;
}
}
J_old = J;
}
double time_end = dsecnd();
printf("聚类耗时%.3f秒\n", (time_end - time_start));
printf("单次迭代耗时%.3f秒\n", (time_end - time_start) / ((double)iter + 1));
// 清理聚类过程中的二维临时数组
for (int i = 0; i < K; i++)
{
delete[]U_m[i];
U_m[i] = NULL;
delete[]d[i];
d[i] = NULL;
delete[]d_rec[i];
d_rec[i] = NULL;
}
delete[]U_m;
U_m = NULL;
delete[]d;
d = NULL;
delete[]d_rec;
d_rec = NULL;
// 清理聚类过程中的一维临时数组
delete[]input_temp;
input_temp = NULL;
delete[]input;
input = NULL;
delete[]U_down;
U_down = NULL;
cout << "聚类中心分别为\n";
print(center, K);
// 计算隶属度划分系数和划分熵
double Vpc, Vpe;
Vpc = Vpe = 0;
for (int i = 0; i < K; i++)
for (int j = 0; j < N; j++)
{
Vpc += U[i][j] * U[i][j];
Vpe += U[i][j] * log(U[i][j]);
}
Vpc = Vpc / N * 100;
Vpe = -Vpe / N * 100;
cout << "隶属度划分系数Vpc = " << Vpc << "%" << endl;
cout << "隶属度划分熵Vpe = " << Vpe << "%" << endl;
// 标签数组
short* labels = new short[N];
for (int i = 0; i < N; i++)
{
double U_max = 0;
for (int j = 0; j < K; j++)
{
if (U_max <= U[j][i])
{
U_max = U[j][i];
labels[i] = (short)j;
}
}
}
for (int i = 0; i < K; i++)
{
delete[]U[i];
U[i] = NULL;
}
delete[]U;
U = NULL;
short** labels_reshape = new short* [rows];
for (int i = 0; i < rows; i++)
labels_reshape[i] = new short[cols];
reshape(labels, labels_reshape, N, rows, cols);
delete[]labels;
labels = NULL;
// 标签数组着色
Mat result = Mat(rows, cols, CV_8U);
uchar* colors = new uchar[K];
RNG rng(time(0));
for (int i = 0; i < K; i++)
*(colors + i) = (uchar)(center[i]);
delete[]center;
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
for (int l = 0; l < K; l++)
if (labels_reshape[i][j] == (short)l)
result.at<uchar>(i, j) = *(colors + l);
imshow("聚类结果", result);
waitKey(0);
return 0;
}