1. vi src/cuda/Makefile
enable verbose print
38 DEFINES += -DVERBOSE_PRINT
添加printf打印MPI_Send/MPI_Recv模式
Idea: 在每个MPI_Send / MPI_Recv之前,打印sender/receiver的MPI rank
./src/pgesv/HPL_spreadN.c
159 MPI_Group g1, basegroup;
160 int ranks[1], ranks_out[1];
161 char hostname[50];
203 if( ierr == MPI_SUCCESS ) {
204 // MPI_Comm_rank(comm,&rank);
205 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
206 gethostname(hostname, 50);
207 ranks[0]=IPMAP[npm1-partner];
208 MPI_Comm_group( comm, &g1 );
209 MPI_Comm_group( MPI_COMM_WORLD, &basegroup );
210 MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
211 printf("<HPL_spreadN> [%s]rank[%d] receives from left rank[%d]\n", hostname, rank, ranks_out[0]);
212 ierr = MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type,
213 IPMAP[npm1-partner], Cmsgid, comm,
214 &status );
215 }
228 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
229 gethostname(hostname, 50);
230 ranks[0]=IPMAP[npm1-partner];
231 MPI_Comm_group( comm, &g1 );
232 MPI_Comm_group( MPI_COMM_WORLD, &basegroup );
233 MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
234 printf("<HPL_spreadN> [%s]rank[%d] sends to left rank[%d]\n", hostname, rank, ranks_out[0]);
286 if( ierr == MPI_SUCCESS ) {
288 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
289 gethostname(hostname, 50);
290 ranks[0]=IPMAP[SRCDIST+partner];
291 MPI_Comm_group( comm, &g1 );
292 MPI_Comm_group( MPI_COMM_WORLD, &basegroup );
293 MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
294 printf("<HPL_spreadN> [%s]rank[%d] receives from right rank[%d]\n", hostname, rank, ranks_out[0]);
311 if( ierr == MPI_SUCCESS ) {
313 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
314 gethostname(hostname, 50);
315 ranks[0]=IPMAP[SRCDIST+partner];
316 MPI_Comm_group( comm, &g1 );
317 MPI_Comm_group( MPI_COMM_WORLD, &basegroup );
318 MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
319 printf("<HPL_spreadN> [%s]rank[%d] sends to right rank[%d]\n", hostname, rank, ranks_out[0]);
322 ierr = MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type, IPMAP[SRCDIST+partner], Cmsgid, comm );
./src/pgesv/HPL_rollN.c
申请变量
MPI_Group g1, basegroup;
int ranks[1], ranks_out[1];
char hostname[50];
计算local group g1和global group basegroup
comm = PANEL->grid->col_comm;
MPI_Comm_group( comm, &g1 );
MPI_Comm_group( MPI_COMM_WORLD, &basegroup );
在MPI_Send/MPI_Recv之前计算local rank相对应的global rank
if( ierr == MPI_SUCCESS ){
MPI_Comm_rank(comm,&rank);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
gethostname(hostname, 50);
MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
printf("<HPL_rollN-Irecv> [%s]rank[%d] receives from rank[%d]\n", hostname, rank, ranks_out[0]);
ierr = MPI_Irecv( Mptr( U, ibufR, 0, LDU ), 1, type[I_RECV], partner, Cmsgid, comm, &request );
}
if( ierr == MPI_SUCCESS ){
MPI_Comm_rank(comm,&rank);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
gethostname(hostname, 50);
MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
printf("<HPL_rollN-Send> [%s]rank[%d] sends to rank[%d]\n", hostname, rank, ranks_out[0]); //print global rank
// printf("<HPL_rollN-Irecv> [%s]rank[%d] sends to rank[%d]\n", hostname, rank, partner); //only print local rank
ierr = MPI_Send( Mptr( U, ibufS, 0, LDU ), 1, type[I_SEND],
partner, Cmsgid, comm );
}
./src/comm/HPL_1ring.c
MPI_Group g1, basegroup;
int ranks[1], ranks_out[1], glo_rank,loc_rank; //glo_rank很重要,之前直接取到局部rank变量,引起计算错误
char hostname[50];
153 if( rank == root )
154 {
155 gethostname(hostname, 50);
156 MPI_Comm_group( comm, &g1 );
157 MPI_Comm_rank(MPI_COMM_WORLD, &glo_rank);
158 ranks[0]=MModAdd1(rank, size);
159 MPI_Comm_group( MPI_COMM_WORLD, &basegroup );
160 MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
161 printf("<HPL_1ring> rank[%d] sends to rank[%d]\n", glo_rank, ranks_out[0]);
162 ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( rank,
163 size ), msgid, comm );
173 if( go != 0 )
174 {
175 MPI_Comm_rank(MPI_COMM_WORLD, &glo_rank);
176 MPI_Comm_group( comm, &g1 );
177 MPI_Comm_group( MPI_COMM_WORLD, &basegroup );
178 ranks[0]=prev;
179 MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
180 printf("<HPL_1ring> rank[%d] receives from rank[%d]\n", glo_rank, ranks_out[0]);
181 ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, prev, msgid,
182 comm, &PANEL->status[0] );
183 next = MModAdd1( rank, size );
184 if( ( ierr == MPI_SUCCESS ) && ( next != root ) )
185 {
186 ranks[0]=next;
187 MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
188 printf("<HPL_1ring> rank[%d] sends to rank[%d]\n", glo_rank, ranks_out[0]);
189 ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next,
190 msgid, comm );