MPI是一种消息传递库规范。接口规范已经在C/c++和Fortran程序中定义好了。提供的示例使用了C语言和LAM/MPI。LAM/MPI是一种高质量消息传递接口(MPI)的实现。
例1:demo.c
#include "mpi.h" #include <stdio.h> int main(int argc,char *argv[]) { int numtasks, rank, rc; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&numtasks); MPI_Comm_rank(MPI_COMM_WORLD,&rank); printf ("Number of tasks= %d My rank= %d ", numtasks,rank); MPI_Finalize(); }
命令
lamboot mpicc -o demo demo.c mpirun -np <number of processes> demo
结果
下一个示例使用MPI来设计矩阵乘法。
一个大小为N的矩阵,该矩阵可以被搬运数整除(比如:一个矩阵的大小为4,那么搬运数也为4,每个搬运工将从矩阵A中领取1行)。控制器给每个搬运工发送同等数量的行的矩阵A,全矩阵B和追查行的位置偏移。每个搬运工接收控制器发送的信息,并完成有关行的矩阵乘法,并创建结果矩阵C的相关行,将它发送给偏移行的位置。控制器从每个搬运工那接收所有矩阵C的结果行,并完成结果矩阵.
例2:
/********************************************************************** * MPI-based matrix multiplication AxB=C *********************************************************************/ #include <stdio.h> #include "mpi.h" #define N 4 /* number of rows and columns in matrix */ MPI_Status status; double a[N][N],b[N][N],c[N][N]; main(int argc, char **argv) { int numtasks,taskid,numworkers,source,dest,rows,offset,i,j,k; struct timeval start, stop; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &taskid); MPI_Comm_size(MPI_COMM_WORLD, &numtasks); numworkers = numtasks-1; /*---------------------------- master ----------------------------*/ if (taskid == 0) { for (i=0; i<N; i++) { for (j=0; j<N; j++) { a[i][j]= 1.0; b[i][j]= 2.0; } } gettimeofday(&start, 0); /* send matrix data to the worker tasks */ rows = N/numworkers; offset = 0; for (dest=1; dest<=numworkers; dest++) { MPI_Send(&offset, 1, MPI_INT, dest, 1, MPI_COMM_WORLD); MPI_Send(&rows, 1, MPI_INT, dest, 1, MPI_COMM_WORLD); MPI_Send(&a[offset][0], rows*N, MPI_DOUBLE,dest,1, MPI_COMM_WORLD); MPI_Send(&b, N*N, MPI_DOUBLE, dest, 1, MPI_COMM_WORLD); offset = offset + rows; } /* wait for results from all worker tasks */ for (i=1; i<=numworkers; i++) { source = i; MPI_Recv(&offset, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status); MPI_Recv(&rows, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status); MPI_Recv(&c[offset][0], rows*N, MPI_DOUBLE, source, 2, MPI_COMM_WORLD, &status); } gettimeofday(&stop, 0); printf("Here is the result matrix: "); for (i=0; i<N; i++) { for (j=0; j<N; j++) printf("%6.2f ", c[i][j]); printf (" "); } fprintf(stdout,"Time = %.6f ", (stop.tv_sec+stop.tv_usec*1e-6)-(start.tv_sec+start.tv_usec*1e-6)); } /*---------------------------- worker----------------------------*/ if (taskid > 0) { source = 0; MPI_Recv(&offset, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status); MPI_Recv(&rows, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status); MPI_Recv(&a, rows*N, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status); MPI_Recv(&b, N*N, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status); /* Matrix multiplication */ for (k=0; k<N; k++) for (i=0; i<rows; i++) { c[i][k] = 0.0; for (j=0; j<N; j++) c[i][k] = c[i][k] + a[i][j] * b[j][k]; } MPI_Send(&offset, 1, MPI_INT, 0, 2, MPI_COMM_WORLD); MPI_Send(&rows, 1, MPI_INT, 0, 2, MPI_COMM_WORLD); MPI_Send(&c, rows*N, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD); } MPI_Finalize(); }
结果
例3:解决方案
/********************************************************************** * MPI-based matrix multiplication AxB=C *********************************************************************/ #include <stdio.h> #include <sys/time.h> #include "mpi.h" #define N 500 /* number of rows and columns in matrix */ MPI_Status status; float a[N][N],b[N][N],c[N][N]; main(int argc, char **argv) { int numtasks,taskid,numworkers,source,dest,rows,offset,remain,i,j,k; struct timeval start, stop; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &taskid); MPI_Comm_size(MPI_COMM_WORLD, &numtasks); numworkers = numtasks-1; /*---------------------------- master ----------------------------*/ if (taskid == 0) { for (i=0; i<N; i++) { for (j=0; j<N; j++) { a[i][j]= 1.0; b[i][j]= 2.0; } } #ifdef PRINT /* print matrices */ printf("Matrix A: "); for (i=0; i<N; i++){ for (j=0; j<N; j++) printf("%.3f ",a[i][j]); printf(" "); } printf("Matrix B: "); for (i=0; i<N; i++){ for (j=0; j<N; j++) printf("%.3f ",b[i][j]); printf(" "); } #endif gettimeofday(&start, 0); /* send matrix data to the worker tasks */ if (N <= numworkers) { rows = 1; } else { if (N%numworkers!=0) // Not divisible by numworkers { rows = N/numworkers+1; remain = N%numworkers; } else { rows = N/numworkers; } } offset = 0; for (dest=1; dest<=numworkers; dest++, remain--) { MPI_Send(&offset, 1, MPI_INT, dest, 1, MPI_COMM_WORLD); MPI_Send(&rows, 1, MPI_INT, dest, 1, MPI_COMM_WORLD); MPI_Send(&a[offset][0], rows*N, MPI_FLOAT,dest,1, MPI_COMM_WORLD); MPI_Send(&b, N*N, MPI_FLOAT, dest, 1, MPI_COMM_WORLD); offset = offset + rows; if(remain==1) rows-=1; } /* wait for results from all worker tasks */ for (i=1; i<=numworkers; i++) { source = i; MPI_Recv(&offset, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status); MPI_Recv(&rows, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status); MPI_Recv(&c[offset][0], rows*N, MPI_FLOAT, source, 2, MPI_COMM_WORLD, &status); } gettimeofday(&stop, 0); #ifdef PRINT printf("Here is the result matrix: "); for (i=0; i<N; i++) { for (j=0; j<N; j++) printf("%6.2f ", c[i][j]); printf (" "); } #endif fprintf(stdout,"Time = %.6f ", (stop.tv_sec+stop.tv_usec*1e-6)-(start.tv_sec+start.tv_usec*1e-6)); } /*---------------------------- worker----------------------------*/ if (taskid > 0) { source = 0; MPI_Recv(&offset, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status); MPI_Recv(&rows, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status); MPI_Recv(&a, rows*N, MPI_FLOAT, source, 1, MPI_COMM_WORLD, &status); MPI_Recv(&b, N*N, MPI_FLOAT, source, 1, MPI_COMM_WORLD, &status); /* Matrix multiplication */ for (k=0; k<N; k++) for (i=0; i<rows; i++) { c[i][k] = 0.0; for (j=0; j<N; j++) c[i][k] = c[i][k] + a[i][j] * b[j][k]; } MPI_Send(&offset, 1, MPI_INT, 0, 2, MPI_COMM_WORLD); MPI_Send(&rows, 1, MPI_INT, 0, 2, MPI_COMM_WORLD); MPI_Send(&c, rows*N, MPI_FLOAT, 0, 2, MPI_COMM_WORLD); } MPI_Finalize(); }
例4:顺序矩阵代码
/* Matrix Multiplication */ #include <stdio.h> #include <stdlib.h> #include <math.h> #include <sys/time.h> #include <assert.h> #define RANDLIMIT 5 /* Magnitude limit of generated randno.*/ #define N 500 /* Matrix Size */ #define NUMLIMIT 70.0 float a[N][N]; float b[N][N]; float c[N][N]; int main(int argc, char *argv[]) { struct timeval start, stop; int i,j,k; /* generate mxs */ for (i=0; i<N; i++) for (j=0; j<N; j++) { a[i][j] = 1+(int) (NUMLIMIT*rand()/(RAND_MAX+1.0)); /*a[i][j] = 1.0; b[i][j] = 2.0;*/ b[i][j] = (double) (rand() % RANDLIMIT); /*c[i][j] = 0.0;*/ } #ifdef PRINT /* print matrices */ printf("Matrix A: "); for (i=0; i<N; i++){ for (j=0; j<N; j++) printf("%.3f ",a[i][j]); printf(" "); } printf("Matrix B: "); for (i=0; i<N; i++){ for (j=0; j<N; j++) printf("%.3f ",b[i][j]); printf(" "); } printf("Matrix C: "); for (i=0; i<N; i++){ for (j=0; j<N; j++) printf("%.3f ",c[i][j]); printf(" "); } #endif gettimeofday(&start, 0); for (i=0; i<N; i++) { for (j=0; j<N; j++) { c[i][j] = 0.0; for (k=0; k<N; k++) c[i][j] = c[i][j] + a[i][k]*b[k][j]; /* Working;standard way */ /*c[j][i] = c[j][i] + a[j][k]*b[k][i];*/ /* Working; Makes C column by col */ } /* end j loop */ } gettimeofday(&stop, 0); #ifdef PRINT /* print results*/ printf("Answer c: "); for (i=0; i<N; i++){ for (j=0; j<N; j++) printf("%.3f ",c[i][j]); printf(" "); } #endif fprintf(stdout,"Time = %.6f ", (stop.tv_sec+stop.tv_usec*1e-6)-(start.tv_sec+start.tv_usec*1e-6)); return(0); }
结果
检查结果,我们可以清楚地看到,当矩阵规模较大时,顺序程序比矩阵乘法的并行程序需要更多的时间。
本文中的所有译文仅用于学习和交流目的,转载请务必注明文章译者、出处、和本文链接。 2KB翻译工作遵照 CC 协议,如果我们的工作有侵犯到您的权益,请及时联系我们。2KB项目(www.2kb.com,源码交易平台),提供担保交易、源码交易、虚拟商品、在家创业、在线创业、任务交易、网站设计、软件设计、网络兼职、站长交易、域名交易、链接买卖、网站交易、广告买卖、站长培训、建站美工等服务