并行KNN数字识别

#define USE_MNIST_LOADER
#define MNIST_DOUBLE
#include "mnist.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <omp.h>
#include <time.h>
#include "mpi.h"

int nTt=5000;
int nTe=400;
int AllTrain=60005;
int AllTest=1005;


typedef struct
{
	float distance;
	int label;
}Distance;

float calDistance(mnist_data digit1, mnist_data digit2)
/*求距离*/
{
	int i, squareSum = 0.0;
	//#pragma omp parallel for
	for (i = 0; i<28; i++)
	{
		for (int j = 0; j<28; j++)
			squareSum += pow(digit1.data[i][j] - digit2.data[i][j], 2.0);
		//取平方然后开根号
	}
	return sqrtf(squareSum);
}

void exchange(Distance *in, int index1, int index2)
/*交换字符串两项*/
{
	Distance tmp = (Distance)in[index1];
	in[index1] = in[index2];
	in[index2] = tmp;
}

void selectSort(Distance *in, int length)
/*选择排序*/
{
	int i, j, min;
	int N = length;
	for (i = 0; i<N - 1; i++)
	{
		min = i;
		for (j = i + 1; j<N; j++)
		{
			if (in[j].distance<in[min].distance) min = j;
		}
		exchange(in, i, min);
	}
}

int prediction(int K, mnist_data in, mnist_data *train, int nt)
/*利用训练数据预测一个数据digit*/
{
	int i, it;
	Distance* distance= (Distance*)malloc(sizeof(Distance)*nTt);
	/*求取输入digit与训练数据的距离*/
        #pragma omp parallel for
        for (it = 0; it<nTt; it++)
        {
           // printf("%d",omp_get_thread_num());
            distance[it].distance = calDistance(in, train[it]);
            distance[it].label = train[it].label;
        }
	/*给计算的距离排序(选择排序)*/
	int predict = 0;
	int num[10]={0};
	int maxsize=-1;
	selectSort(distance,nTt);
	for (i = 0; i<K; i++)
	{
        num[distance[i].label]++;
        if(maxsize<num[distance[i].label])
        {
            maxsize=num[distance[i].label];
            predict =distance[i].label;
		}
	}
	return (int)(predict);
}

void knn_classifiy(int K,int argc, char** argv)
/*用测试数据集进行测试*/
{
	MPI_Init(&argc,&argv);
	int myrank,SIZE;
    time_t start, finish;     //定义第一次调用CPU时钟单位的实际,可以理解为定义一个计数器
	double Total_time;        //定义一个double类型的变量,用于存储时间单位
	start = time(NULL);        //获取进入要测试执行时间代码段之前的CPU时间占用值
	MPI_Comm_size(MPI_COMM_WORLD, &SIZE);
	MPI_Comm_rank(MPI_COMM_WORLD, &myrank);

	mnist_data *Dtrain = (mnist_data*)malloc(AllTrain * sizeof(mnist_data));
	mnist_data *Dtest = (mnist_data*)malloc(AllTest * sizeof(mnist_data));
	int testCount[10] = { 0 };
	int trainCount[10] = { 0 };
	int groupSize = nTe/SIZE;
	int startNumber, endNumber ;
	int CountResult[10]={0};

	MPI_Datatype Dis,oldtypes[2]={MPI_DOUBLE,MPI_UNSIGNED};
	int blocklength[2]={28*28,1};
	MPI_Aint displament[2];
    displament[0]=0;
    displament[1]=28*28*sizeof(double)+sizeof(double);
    MPI_Type_create_struct(2, blocklength, displament, oldtypes, &Dis);
    MPI_Type_commit(&Dis);
	/*average; because the caculate account cost almost the same for each progress*/
	if(nTe%SIZE>myrank)
	{
		startNumber=groupSize*myrank+myrank;
		endNumber=startNumber+groupSize;
	}
	else
	{
		startNumber=groupSize*myrank+nTe%SIZE;
		endNumber=startNumber+groupSize-1;
	}
	if(myrank==0)
	{
		printf(".knn_classifiy.\n");
		int ret;
		int i, j;

		char* imageTrainFile ="train-images.idx3-ubyte";
		char* lableTrainFile ="train-labels.idx1-ubyte";
		/*读入训练数据*/
		//int trainLabels=(int*)malloc(nTt*sizeof(int));
		unsigned int Cnt;
		if (ret = mnist_load(imageTrainFile, lableTrainFile, &Dtrain, &Cnt))
		{
			printf("An error occured: %d\n", ret);
		}
		else
		{
			printf("load successfully!\n");
			//mnist = *m;
			printf("image count: %d, %d\n", Cnt, sizeof(Dtrain->data));
			for (i = 0; i <nTt; i++) {
				//printf("\n%d", Dtrain[i].label);
				for (j = 0; j < 28; j++) {
				//	printf("\n");
					for (int k = 0; k < 28; k++) {
						if (Dtrain[i].data[j][k] == 0)Dtrain[i].data[j][k] = 0;
						else Dtrain[i].data[j][k] = 1;
						//printf("%d ", (int)Dtrain[i].data[j][k]);
					}
				}
				//printf("\n ")
			}
		}
		printf("..load training digits.\n");
		for (i = 0; i < nTt; i++)
		{
			//loadDigit(&Dtrain[i], fp, &trainLabels[i]);
			trainCount[Dtrain[i].label] ++;
		}
		//fclose(fp);
		printf("..Done.\n");
		/*读入测试数据*/
		char* imageTestFile = "t10k-images.idx3-ubyte";
		char* lableTestFile = "t10k-labels.idx1-ubyte";
		//int testLabels=(int *)malloc(nTe*sizeof(int));

		unsigned int cnt = 0;
		//= (Digit*)malloc(ntest*sizeof(Digit));
		//fp = fopen(testingFile,"r");
		printf("..load testing digits.\n");
		if (ret = mnist_load(imageTestFile, lableTestFile, &Dtest, &cnt))
		{
			printf("An error occured: %d\n", ret);
		}
		else
		{
			printf("load successfully!\n");
			//mnist = *m;
			printf("image count: %d, %d\n", cnt, sizeof(Dtrain->data));
			/*显示数字digit,现在没有什么必要,读到minist数组就好*/
			for (i = 0; i < nTe; i++) {
				for (j = 0; j < 28; j++) {
					//printf("\n");
					for (int k = 0; k < 28; k++) {
						if (Dtest[i].data[j][k] == 0)Dtest[i].data[j][k] = 0;
						else Dtest[i].data[j][k] = 1;
						//printf("%d", (int)Dtest[i].data[j][k]);
					}
				}
			}
		}
		for (i = 0; i < nTe; i++)
		{
			//loadDigit(&Dtest[i], fp, &testLabels[i]);
			testCount[Dtest[i].label] ++;
		}
		//fclose(fp);
		printf("..Done.\n");

		/*读数完成后将训练数据和测试数据广播到所有的进程*/
        for(int i=1;i<SIZE;i++){
            int s1;
            MPI_Pack_size( 2*nTt, Dis, MPI_COMM_WORLD, &s1 );
            int bufsize = 3 * MPI_BSEND_OVERHEAD + s1,bsize;
            char *buf,*bbuf;
            buf = (char *)malloc( bufsize );
            MPI_Buffer_attach( buf, bufsize );
            MPI_Bsend( Dtrain, nTt, Dis, i, i, MPI_COMM_WORLD );
            MPI_Bsend( Dtest, nTe, Dis, i, i, MPI_COMM_WORLD );
            MPI_Buffer_detach( &bbuf, &bsize );
            //第二个i是tag
        }
       // MPI_Bcast(Dtrain,AllTrain,Dis,0,MPI_COMM_WORLD);
        //MPI_Bcast(Dtest,AllTest,Dis,0,MPI_COMM_WORLD);
	}
	else{
            MPI_Recv( Dtrain, nTt, Dis, 0, myrank, MPI_COMM_WORLD, MPI_STATUS_IGNORE );
            MPI_Recv( Dtest, nTe, Dis, 0, myrank, MPI_COMM_WORLD, MPI_STATUS_IGNORE );
	}
	/*求测试数据与训练数据之间的距离*/

    MPI_Barrier(MPI_COMM_WORLD);
	printf("%d ..Cal Distance begin.\n",myrank);
	//Distance Distance2Train=(Distance*)malloc(nTe*sizeof(Distance));
	int CorrectCount[10] = { 0 };
	int itrain, itest, predict;
	printf("loading:");
	for (itest = startNumber; itest < endNumber; itest++)
	{
		predict = prediction(K, Dtest[itest], Dtrain, nTt);
		/*给预测准确的进行计数*/
		if (predict == Dtest[itest].label)
		{
			CorrectCount[predict] ++;
		}
	}
	MPI_Barrier(MPI_COMM_WORLD);
	//send the count result to process 0
	//这句无效???MPI_Reduce(CorrectCount,CountResult,10,MPI_INT,MPI_SUM,0,MPI_COMM_WORLD);
	//printf("%%100 \n");
	if(myrank!=0){
        int s1;
        MPI_Pack_size( 20, MPI_INT, MPI_COMM_WORLD, &s1 );
        int bufsize = 3 * MPI_BSEND_OVERHEAD + s1,bsize;
        char *buf,*bbuf;
        buf = (char *)malloc( bufsize );
        MPI_Buffer_attach( buf, bufsize );
        MPI_Bsend( CorrectCount, 10, MPI_INT, 0, myrank, MPI_COMM_WORLD );
        MPI_Buffer_detach( &bbuf, &bsize );
	}
	else{
        for(int i=1;i<SIZE;i++){
            int temp[10]={0};
            MPI_Recv( temp, 10, MPI_INT, i, i, MPI_COMM_WORLD, MPI_STATUS_IGNORE );
            for(int j=0;j<10;j++){
                CorrectCount[j]+=temp[j];
            }
        }
	}

	/*输出测试数据的准确率*/
	if(myrank==0)
	{
		printf("\n\tCorrect ratio:   \n\n");
		for (int i = 0; i < 10; i++)
		{
			printf("%d:  (  %2d / %2d ) =  %.2f%%\n",
				i,
				CorrectCount[i],
				testCount[i],
				(float)(CorrectCount[i] * 1.0 / testCount[i] * 100));
		}
		int Totalnum = 0;
		int Totalcorrect=0;
		for(int i=0;i<10;i++){
            Totalnum += testCount[i];
            Totalcorrect += CorrectCount[i];
		}
		printf("Total Correct ratio:   %.2f%%\n",(float)(Totalcorrect * 1.0 / Totalnum * 100));
	}
		finish = time(NULL);   //单位换算,换算成毫秒
        Total_time=finish-start;
        if(myrank==0)
        printf("total time is: %lfs\n",Total_time);
		MPI_Finalize();
}

int main(int argc, char** argv)
{
	int K = 2;
	/*对已知数据进行测试,统计预测的正确率*/
	int nProcess=4;
	/*start MPICH*/
	knn_classifiy(K,argc,argv);
	/*finish MPICH*/
	return 0;
}

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值