#define USE_MNIST_LOADER
#define MNIST_DOUBLE
#include "mnist.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <omp.h>
#include <time.h>
#include "mpi.h"
int nTt=5000;
int nTe=400;
int AllTrain=60005;
int AllTest=1005;
typedef struct
{
float distance;
int label;
}Distance;
float calDistance(mnist_data digit1, mnist_data digit2)
/*求距离*/
{
int i, squareSum = 0.0;
//#pragma omp parallel for
for (i = 0; i<28; i++)
{
for (int j = 0; j<28; j++)
squareSum += pow(digit1.data[i][j] - digit2.data[i][j], 2.0);
//取平方然后开根号
}
return sqrtf(squareSum);
}
void exchange(Distance *in, int index1, int index2)
/*交换字符串两项*/
{
Distance tmp = (Distance)in[index1];
in[index1] = in[index2];
in[index2] = tmp;
}
void selectSort(Distance *in, int length)
/*选择排序*/
{
int i, j, min;
int N = length;
for (i = 0; i<N - 1; i++)
{
min = i;
for (j = i + 1; j<N; j++)
{
if (in[j].distance<in[min].distance) min = j;
}
exchange(in, i, min);
}
}
int prediction(int K, mnist_data in, mnist_data *train, int nt)
/*利用训练数据预测一个数据digit*/
{
int i, it;
Distance* distance= (Distance*)malloc(sizeof(Distance)*nTt);
/*求取输入digit与训练数据的距离*/
#pragma omp parallel for
for (it = 0; it<nTt; it++)
{
// printf("%d",omp_get_thread_num());
distance[it].distance = calDistance(in, train[it]);
distance[it].label = train[it].label;
}
/*给计算的距离排序(选择排序)*/
int predict = 0;
int num[10]={0};
int maxsize=-1;
selectSort(distance,nTt);
for (i = 0; i<K; i++)
{
num[distance[i].label]++;
if(maxsize<num[distance[i].label])
{
maxsize=num[distance[i].label];
predict =distance[i].label;
}
}
return (int)(predict);
}
void knn_classifiy(int K,int argc, char** argv)
/*用测试数据集进行测试*/
{
MPI_Init(&argc,&argv);
int myrank,SIZE;
time_t start, finish; //定义第一次调用CPU时钟单位的实际,可以理解为定义一个计数器
double Total_time; //定义一个double类型的变量,用于存储时间单位
start = time(NULL); //获取进入要测试执行时间代码段之前的CPU时间占用值
MPI_Comm_size(MPI_COMM_WORLD, &SIZE);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
mnist_data *Dtrain = (mnist_data*)malloc(AllTrain * sizeof(mnist_data));
mnist_data *Dtest = (mnist_data*)malloc(AllTest * sizeof(mnist_data));
int testCount[10] = { 0 };
int trainCount[10] = { 0 };
int groupSize = nTe/SIZE;
int startNumber, endNumber ;
int CountResult[10]={0};
MPI_Datatype Dis,oldtypes[2]={MPI_DOUBLE,MPI_UNSIGNED};
int blocklength[2]={28*28,1};
MPI_Aint displament[2];
displament[0]=0;
displament[1]=28*28*sizeof(double)+sizeof(double);
MPI_Type_create_struct(2, blocklength, displament, oldtypes, &Dis);
MPI_Type_commit(&Dis);
/*average; because the caculate account cost almost the same for each progress*/
if(nTe%SIZE>myrank)
{
startNumber=groupSize*myrank+myrank;
endNumber=startNumber+groupSize;
}
else
{
startNumber=groupSize*myrank+nTe%SIZE;
endNumber=startNumber+groupSize-1;
}
if(myrank==0)
{
printf(".knn_classifiy.\n");
int ret;
int i, j;
char* imageTrainFile ="train-images.idx3-ubyte";
char* lableTrainFile ="train-labels.idx1-ubyte";
/*读入训练数据*/
//int trainLabels=(int*)malloc(nTt*sizeof(int));
unsigned int Cnt;
if (ret = mnist_load(imageTrainFile, lableTrainFile, &Dtrain, &Cnt))
{
printf("An error occured: %d\n", ret);
}
else
{
printf("load successfully!\n");
//mnist = *m;
printf("image count: %d, %d\n", Cnt, sizeof(Dtrain->data));
for (i = 0; i <nTt; i++) {
//printf("\n%d", Dtrain[i].label);
for (j = 0; j < 28; j++) {
// printf("\n");
for (int k = 0; k < 28; k++) {
if (Dtrain[i].data[j][k] == 0)Dtrain[i].data[j][k] = 0;
else Dtrain[i].data[j][k] = 1;
//printf("%d ", (int)Dtrain[i].data[j][k]);
}
}
//printf("\n ")
}
}
printf("..load training digits.\n");
for (i = 0; i < nTt; i++)
{
//loadDigit(&Dtrain[i], fp, &trainLabels[i]);
trainCount[Dtrain[i].label] ++;
}
//fclose(fp);
printf("..Done.\n");
/*读入测试数据*/
char* imageTestFile = "t10k-images.idx3-ubyte";
char* lableTestFile = "t10k-labels.idx1-ubyte";
//int testLabels=(int *)malloc(nTe*sizeof(int));
unsigned int cnt = 0;
//= (Digit*)malloc(ntest*sizeof(Digit));
//fp = fopen(testingFile,"r");
printf("..load testing digits.\n");
if (ret = mnist_load(imageTestFile, lableTestFile, &Dtest, &cnt))
{
printf("An error occured: %d\n", ret);
}
else
{
printf("load successfully!\n");
//mnist = *m;
printf("image count: %d, %d\n", cnt, sizeof(Dtrain->data));
/*显示数字digit,现在没有什么必要,读到minist数组就好*/
for (i = 0; i < nTe; i++) {
for (j = 0; j < 28; j++) {
//printf("\n");
for (int k = 0; k < 28; k++) {
if (Dtest[i].data[j][k] == 0)Dtest[i].data[j][k] = 0;
else Dtest[i].data[j][k] = 1;
//printf("%d", (int)Dtest[i].data[j][k]);
}
}
}
}
for (i = 0; i < nTe; i++)
{
//loadDigit(&Dtest[i], fp, &testLabels[i]);
testCount[Dtest[i].label] ++;
}
//fclose(fp);
printf("..Done.\n");
/*读数完成后将训练数据和测试数据广播到所有的进程*/
for(int i=1;i<SIZE;i++){
int s1;
MPI_Pack_size( 2*nTt, Dis, MPI_COMM_WORLD, &s1 );
int bufsize = 3 * MPI_BSEND_OVERHEAD + s1,bsize;
char *buf,*bbuf;
buf = (char *)malloc( bufsize );
MPI_Buffer_attach( buf, bufsize );
MPI_Bsend( Dtrain, nTt, Dis, i, i, MPI_COMM_WORLD );
MPI_Bsend( Dtest, nTe, Dis, i, i, MPI_COMM_WORLD );
MPI_Buffer_detach( &bbuf, &bsize );
//第二个i是tag
}
// MPI_Bcast(Dtrain,AllTrain,Dis,0,MPI_COMM_WORLD);
//MPI_Bcast(Dtest,AllTest,Dis,0,MPI_COMM_WORLD);
}
else{
MPI_Recv( Dtrain, nTt, Dis, 0, myrank, MPI_COMM_WORLD, MPI_STATUS_IGNORE );
MPI_Recv( Dtest, nTe, Dis, 0, myrank, MPI_COMM_WORLD, MPI_STATUS_IGNORE );
}
/*求测试数据与训练数据之间的距离*/
MPI_Barrier(MPI_COMM_WORLD);
printf("%d ..Cal Distance begin.\n",myrank);
//Distance Distance2Train=(Distance*)malloc(nTe*sizeof(Distance));
int CorrectCount[10] = { 0 };
int itrain, itest, predict;
printf("loading:");
for (itest = startNumber; itest < endNumber; itest++)
{
predict = prediction(K, Dtest[itest], Dtrain, nTt);
/*给预测准确的进行计数*/
if (predict == Dtest[itest].label)
{
CorrectCount[predict] ++;
}
}
MPI_Barrier(MPI_COMM_WORLD);
//send the count result to process 0
//这句无效???MPI_Reduce(CorrectCount,CountResult,10,MPI_INT,MPI_SUM,0,MPI_COMM_WORLD);
//printf("%%100 \n");
if(myrank!=0){
int s1;
MPI_Pack_size( 20, MPI_INT, MPI_COMM_WORLD, &s1 );
int bufsize = 3 * MPI_BSEND_OVERHEAD + s1,bsize;
char *buf,*bbuf;
buf = (char *)malloc( bufsize );
MPI_Buffer_attach( buf, bufsize );
MPI_Bsend( CorrectCount, 10, MPI_INT, 0, myrank, MPI_COMM_WORLD );
MPI_Buffer_detach( &bbuf, &bsize );
}
else{
for(int i=1;i<SIZE;i++){
int temp[10]={0};
MPI_Recv( temp, 10, MPI_INT, i, i, MPI_COMM_WORLD, MPI_STATUS_IGNORE );
for(int j=0;j<10;j++){
CorrectCount[j]+=temp[j];
}
}
}
/*输出测试数据的准确率*/
if(myrank==0)
{
printf("\n\tCorrect ratio: \n\n");
for (int i = 0; i < 10; i++)
{
printf("%d: ( %2d / %2d ) = %.2f%%\n",
i,
CorrectCount[i],
testCount[i],
(float)(CorrectCount[i] * 1.0 / testCount[i] * 100));
}
int Totalnum = 0;
int Totalcorrect=0;
for(int i=0;i<10;i++){
Totalnum += testCount[i];
Totalcorrect += CorrectCount[i];
}
printf("Total Correct ratio: %.2f%%\n",(float)(Totalcorrect * 1.0 / Totalnum * 100));
}
finish = time(NULL); //单位换算,换算成毫秒
Total_time=finish-start;
if(myrank==0)
printf("total time is: %lfs\n",Total_time);
MPI_Finalize();
}
int main(int argc, char** argv)
{
int K = 2;
/*对已知数据进行测试,统计预测的正确率*/
int nProcess=4;
/*start MPICH*/
knn_classifiy(K,argc,argv);
/*finish MPICH*/
return 0;
}
并行KNN数字识别
最新推荐文章于 2021-03-10 23:25:36 发布