没什么太多东西想写,写了5个实验,第四个用的东西还多些。说白了MPI是用来通讯的,OpenMP用来把串行程序并行化的。
贴个程序结了算了……
解线性方程组的Jordan消去法
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <mpi.h>
#include <omp.h>
#define A(i,j) A[i*(N+1)+j]
#define D(i,j) D[i*(N+1)+j]
int getRowPerProcess(int rowN, int pN)
{
if (rowN%pN == 0)
return rowN/pN;
return rowN/pN+1;
}
void mySwap(float& loc1, float& loc2)
{
float t = loc2;
loc2 = loc1;
loc1 = t;
}
void mySwap(int& loc1, int& loc2)
{
int t = loc2;
loc2 = loc1;
loc1 = t;
}
int main(int argc,char **argv)
{
//omp_set_num_threads(3);
int rank,p,N,rowPP;
float *A=0, *D=0, *Row=0, *B=0, *B2=0;
int *xLoc = 0;
MPI_Status status;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&p);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
if (rank == 0)
{
FILE *data = fopen("in.txt","rt");
fscanf(data,"%d", &N);
A=(float*)malloc(sizeof(float)*(N+1)*N);
for(int i=0;i<N;i++)
{
for(int j=0;j<=N;j++)
fscanf(data,"%f",&A(i,j));
}
fclose(data);
}
MPI_Bcast(&N,1,MPI_INT,0,MPI_COMM_WORLD);
Row = (float*)malloc(sizeof(float)*(N+1));
rowPP = getRowPerProcess(N,p);
D = (float*)malloc(sizeof(float)*(N+1)*rowPP);
if (rank == 0)
{
xLoc = (int*)malloc(sizeof(int)*N);
B = (float*)malloc(sizeof(int)*N);
#pragma omp parallel for
for(int i=0; i<N; ++i)
{
xLoc[i]=i;
//Send Data to other processes
if (i%p == 0)
memcpy(D+(i/p)*(N+1),&A(i,0),(N+1)*sizeof(float));
else
MPI_Send(&A(i,0),N+1,MPI_FLOAT,i%p,i/p,MPI_COMM_WORLD);
}
}
else
{
int i=rank;
while (i<N)
{
MPI_Recv(D+(i/p)*(N+1),N+1,MPI_FLOAT,0,i/p,MPI_COMM_WORLD,&status);
i+=p;
}
}
//Work
for(int i=0;i<N;i++)
{
if (i%p == rank)
{
int curRow = i/p;
//找到当前行[i,N-1]最大值,进行行交换
int start = curRow*(N+1), end = start+N-1,maxLoc = i;
float max = D[start+i];
#pragma omp parallel for shared(max,maxLoc)
for (int j=start+1+i; j<=end; j++)
{
if (fabs(D[j]) > fabs(max))
{
#pragma omp critical
{
max = D[j];
maxLoc = j%(N+1);
}
}
}
if (rank == 0)
mySwap(xLoc[i],xLoc[maxLoc]);
MPI_Bcast(&maxLoc,1,MPI_INT,rank,MPI_COMM_WORLD);
#pragma omp parallel for firstprivate(i,maxLoc)
for (int j=0; j<rowPP; ++j)
mySwap(D(j,i), D(j,maxLoc));
#pragma omp parallel for firstprivate(max)
for (int j=end+1; j>=start+i; --j)
D[j]/=max;
#pragma omp parallel for firstprivate(curRow)
for (int j=0; j<rowPP; ++j)
{
if (curRow == j) continue;
float factor = D(j,i);
for(int k=i;k<=N; ++k)
D(j,k)-=factor*D(curRow,k);
}
#pragma omp parallel for firstprivate(rank,N,curRow)
for (int j=0; j<p; ++j)
{
if (j == rank) continue;
MPI_Send(&D(curRow,0),N+1,MPI_FLOAT,j,11,MPI_COMM_WORLD);
}
}
else
{
int maxLoc=0;
MPI_Bcast(&maxLoc,1,MPI_INT,i%p,MPI_COMM_WORLD);
#pragma omp parallel for firstprivate(i,maxLoc)
for (int j=0; j<rowPP; ++j)
mySwap(D(j,i), D(j,maxLoc));
if (rank == 0)
mySwap(xLoc[i],xLoc[maxLoc]);
MPI_Recv(Row,N+1,MPI_FLOAT,i%p,11,MPI_COMM_WORLD,&status);
#pragma omp parallel for
for (int j=0; j<rowPP; ++j)
{
float factor = D(j,i);
for(int k=i;k<=N; ++k)
D(j,k)-=factor*Row[k];
}
}
}
if(rank!=0)
{
#pragma omp parallel for firstprivate(rowPP,N,p,rank)
for(int i=0;i<rowPP;i++)
if (rank+i*p < N)
MPI_Send(&D(i,N),1,MPI_FLOAT,0,rank+i*p,MPI_COMM_WORLD);
}
else//接收结果
{
#pragma omp parallel for firstprivate(rowPP,N,p)
for(int i=0;i<rowPP;i++)
if (i*p < N)
B[i*p] = D(i,N);
#pragma omp parallel for firstprivate(N,p)
for (int i=0; i<N; ++i)
{
if (i%p == 0)
continue;
MPI_Recv(B+i,1,MPI_FLOAT,i%p,i,MPI_COMM_WORLD,&status);
}
B2 = (float*)malloc(sizeof(float)*N);
#pragma omp parallel for firstprivate(N)
for (int i=0; i<N; ++i)
B2[xLoc[i]] = B[i];
FILE* out = fopen("out.txt","wt");
fprintf(out,"Answers:/n");
for (int i=0; i<N; ++i)
fprintf(out,"%.3f/n",B2[i]);
fclose(out);
}
if (A != 0) free(A);
if (D != 0) free(D);
if (Row != 0) free(Row);
if (B != 0) free(B);
if (B2 != 0) free(B2);
if (xLoc != 0) free(xLoc);
MPI_Finalize();
return 0;
}