2KB项目,专业的源码交易网站 帮助 收藏 每日签到

MPI - 消息传递接口

  • 时间:2019-01-23 18:46 编辑:2KB 来源:2KB.COM 阅读:386
  • 扫一扫,手机访问
  • 分享
摘要:
LAM/MPI 英文原文:MPI - Message Passing Interface

MPI是一种消息传递库规范。接口规范已经在C/c++和Fortran程序中定义好了。提供的示例使用了C语言和LAM/MPI。LAM/MPI是一种高质量消息传递接口(MPI)的实现。

例1:demo.c

#include "mpi.h"
#include <stdio.h>

   int main(int argc,char *argv[])
   {
   int  numtasks, rank, rc; 

   MPI_Init(&argc,&argv);
   
   MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
   MPI_Comm_rank(MPI_COMM_WORLD,&rank);
   printf ("Number of tasks= %d My rank= %d
", numtasks,rank);  

   MPI_Finalize();
   }

命令

lamboot
mpicc -o demo demo.c
mpirun -np <number of processes> demo

结果

下一个示例使用MPI来设计矩阵乘法。

一个大小为N的矩阵,该矩阵可以被搬运数整除(比如:一个矩阵的大小为4,那么搬运数也为4,每个搬运工将从矩阵A中领取1行)。控制器给每个搬运工发送同等数量的行的矩阵A,全矩阵B和追查行的位置偏移。每个搬运工接收控制器发送的信息,并完成有关行的矩阵乘法,并创建结果矩阵C的相关行,将它发送给偏移行的位置。控制器从每个搬运工那接收所有矩阵C的结果行,并完成结果矩阵.

例2:

/**********************************************************************
 * MPI-based matrix multiplication AxB=C  
 *********************************************************************/


#include <stdio.h>
#include "mpi.h"
#define N    4        /* number of rows and columns in matrix */

MPI_Status status;

double a[N][N],b[N][N],c[N][N];  
       
main(int argc, char **argv) 
{
  int numtasks,taskid,numworkers,source,dest,rows,offset,i,j,k;

  struct timeval start, stop;
  
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
  MPI_Comm_size(MPI_COMM_WORLD, &numtasks);

  numworkers = numtasks-1;

  /*---------------------------- master ----------------------------*/
  if (taskid == 0) {
    for (i=0; i<N; i++) {
      for (j=0; j<N; j++) {   
 a[i][j]= 1.0;
 b[i][j]= 2.0;
      }
    }

    gettimeofday(&start, 0);

    /* send matrix data to the worker tasks */
    rows = N/numworkers;
    offset = 0;
    
    for (dest=1; dest<=numworkers; dest++) 
    {       
      MPI_Send(&offset, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
      MPI_Send(&rows, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
      MPI_Send(&a[offset][0], rows*N, MPI_DOUBLE,dest,1, MPI_COMM_WORLD);
      MPI_Send(&b, N*N, MPI_DOUBLE, dest, 1, MPI_COMM_WORLD);
      offset = offset + rows;
    }

    /* wait for results from all worker tasks */
    for (i=1; i<=numworkers; i++) 
    {   
      source = i;
      MPI_Recv(&offset, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);
      MPI_Recv(&rows, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);
      MPI_Recv(&c[offset][0], rows*N, MPI_DOUBLE, source, 2, MPI_COMM_WORLD, &status);
    }

    gettimeofday(&stop, 0);

    printf("Here is the result matrix:
");
    for (i=0; i<N; i++) { 
      for (j=0; j<N; j++) 
 printf("%6.2f   ", c[i][j]);
      printf ("
");
    }
  
    fprintf(stdout,"Time = %.6f

",
         (stop.tv_sec+stop.tv_usec*1e-6)-(start.tv_sec+start.tv_usec*1e-6));

  } 

  /*---------------------------- worker----------------------------*/
  if (taskid > 0) {
    source = 0;
    MPI_Recv(&offset, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);
    MPI_Recv(&rows, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);
    MPI_Recv(&a, rows*N, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status);
    MPI_Recv(&b, N*N, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status);
  
    /* Matrix multiplication */
    for (k=0; k<N; k++)
      for (i=0; i<rows; i++) {
        c[i][k] = 0.0;
        for (j=0; j<N; j++)
   c[i][k] = c[i][k] + a[i][j] * b[j][k];
      }


    MPI_Send(&offset, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);
    MPI_Send(&rows, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);
    MPI_Send(&c, rows*N, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD);
  }  
    
  MPI_Finalize();
} 

结果

接下来,我们优化代码,让它符合以下要求:
  • 应使用#define N 定义矩阵的大小。
  • 矩阵中的元素类型应该是“float“。
  • 矩阵可以是方形矩阵(例如,N*N),但N不能恰好被处理器的数量整除
  • 应检查计算打印结果矩阵的正确性。可使用#define PRINT定义打印代码段
  • 应找到这种代码加速(例如:对于N = 100,200等)与连续矩阵乘法代码比较。

例3:解决方案

/**********************************************************************
 * MPI-based matrix multiplication AxB=C  
 *********************************************************************/


#include <stdio.h>
#include <sys/time.h>
#include "mpi.h"
#define N    500        /* number of rows and columns in matrix */

MPI_Status status;

float a[N][N],b[N][N],c[N][N];  
       
main(int argc, char **argv) 
{
  int numtasks,taskid,numworkers,source,dest,rows,offset,remain,i,j,k;
  struct timeval start, stop;
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
  MPI_Comm_size(MPI_COMM_WORLD, &numtasks);

  numworkers = numtasks-1;

  /*---------------------------- master ----------------------------*/
  if (taskid == 0) {
    for (i=0; i<N; i++) {
      for (j=0; j<N; j++) {   
        a[i][j]= 1.0;
        b[i][j]= 2.0;
      }
    }

#ifdef PRINT
        /* print matrices */
        printf("Matrix A:
");           
        for (i=0; i<N; i++){      
           for (j=0; j<N; j++)  
              printf("%.3f	",a[i][j]); 
           printf("
");  
        }        

        printf("Matrix B:
");       
        for (i=0; i<N; i++){  
           for (j=0; j<N; j++)
              printf("%.3f	",b[i][j]);
           printf("
");
        }   

#endif

    gettimeofday(&start, 0);

    /* send matrix data to the worker tasks */
    if (N <= numworkers)
    {
 rows = 1;  
    }
    else
    {
      if (N%numworkers!=0) // Not divisible by numworkers
      {        
 rows = N/numworkers+1;
        remain = N%numworkers;
      }
      else
      {
        rows = N/numworkers;  
      }
    }
    offset = 0;
    
    for (dest=1; dest<=numworkers; dest++, remain--) 
    {       
      MPI_Send(&offset, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
      MPI_Send(&rows, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
      MPI_Send(&a[offset][0], rows*N, MPI_FLOAT,dest,1, MPI_COMM_WORLD);
      MPI_Send(&b, N*N, MPI_FLOAT, dest, 1, MPI_COMM_WORLD);
      offset = offset + rows;
      if(remain==1) rows-=1;
    }

    /* wait for results from all worker tasks */
    for (i=1; i<=numworkers; i++) 
    {   
      source = i;
      MPI_Recv(&offset, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);
      MPI_Recv(&rows, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);
      MPI_Recv(&c[offset][0], rows*N, MPI_FLOAT, source, 2, MPI_COMM_WORLD, &status);
    }

    gettimeofday(&stop, 0);
#ifdef PRINT
    printf("Here is the result matrix:
");
    for (i=0; i<N; i++) { 
      for (j=0; j<N; j++) 
 printf("%6.2f   ", c[i][j]);
      printf ("
");
    }
#endif  
    fprintf(stdout,"Time = %.6f

",
         (stop.tv_sec+stop.tv_usec*1e-6)-(start.tv_sec+start.tv_usec*1e-6));

  } 

  /*---------------------------- worker----------------------------*/
  if (taskid > 0) {
    source = 0;
    MPI_Recv(&offset, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);
    MPI_Recv(&rows, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);
    MPI_Recv(&a, rows*N, MPI_FLOAT, source, 1, MPI_COMM_WORLD, &status);
    MPI_Recv(&b, N*N, MPI_FLOAT, source, 1, MPI_COMM_WORLD, &status);
  
    /* Matrix multiplication */
    for (k=0; k<N; k++)
      for (i=0; i<rows; i++) {
        c[i][k] = 0.0;
        for (j=0; j<N; j++)
   c[i][k] = c[i][k] + a[i][j] * b[j][k];
      }


    MPI_Send(&offset, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);
    MPI_Send(&rows, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);
    MPI_Send(&c, rows*N, MPI_FLOAT, 0, 2, MPI_COMM_WORLD);
  }  
    
  MPI_Finalize();
}
 

例4:顺序矩阵代码

/* Matrix Multiplication */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/time.h>
#include <assert.h>

#define RANDLIMIT 5 /* Magnitude limit of generated randno.*/
#define N  500   /* Matrix Size */
#define NUMLIMIT 70.0

float a[N][N];
float b[N][N];
float c[N][N];

int main(int argc, char *argv[])
{
    struct timeval start, stop; 
    int i,j,k;

 
    /* generate mxs  */
    for (i=0; i<N; i++)
        for (j=0; j<N; j++) {
            a[i][j] = 1+(int) (NUMLIMIT*rand()/(RAND_MAX+1.0)); 
            /*a[i][j] = 1.0; 
            b[i][j] = 2.0;*/
            b[i][j] = (double) (rand() % RANDLIMIT);
            /*c[i][j] = 0.0;*/
        }

#ifdef PRINT
        /* print matrices */
        printf("Matrix A:
");           
        for (i=0; i<N; i++){      
           for (j=0; j<N; j++)  
              printf("%.3f	",a[i][j]); 
           printf("
");  
        }        

        printf("Matrix B:
");       
        for (i=0; i<N; i++){  
           for (j=0; j<N; j++)
              printf("%.3f	",b[i][j]);
           printf("
");
        }   

        printf("Matrix C:
");       
        for (i=0; i<N; i++){  
           for (j=0; j<N; j++)
              printf("%.3f	",c[i][j]);
           printf("
");
        }   
#endif

    gettimeofday(&start, 0);

    for (i=0; i<N; i++) {
 for (j=0; j<N; j++) {
           c[i][j] = 0.0;
    for (k=0; k<N; k++)
              c[i][j] = c[i][j] + a[i][k]*b[k][j]; /* Working;standard way */
            /*c[j][i] = c[j][i] + a[j][k]*b[k][i];*/ /* Working; Makes C column by col */
        } /* end j loop */
    }

    gettimeofday(&stop, 0);

#ifdef PRINT
    /* print results*/
    printf("Answer c:
");
    for (i=0; i<N; i++){
        for (j=0; j<N; j++) 
           printf("%.3f	",c[i][j]);
        printf("
");
    }
#endif

    fprintf(stdout,"Time = %.6f

",
         (stop.tv_sec+stop.tv_usec*1e-6)-(start.tv_sec+start.tv_usec*1e-6));
    return(0);
}


结果

检查结果,我们可以清楚地看到,当矩阵规模较大时,顺序程序比矩阵乘法的并行程序需要更多的时间。

本文中的所有译文仅用于学习和交流目的,转载请务必注明文章译者、出处、和本文链接。 2KB翻译工作遵照 CC 协议,如果我们的工作有侵犯到您的权益,请及时联系我们。


2KB项目(www.2kb.com,源码交易平台),提供担保交易、源码交易、虚拟商品、在家创业、在线创业、任务交易、网站设计、软件设计、网络兼职、站长交易、域名交易、链接买卖、网站交易、广告买卖、站长培训、建站美工等服务

  • 全部评论(0)
资讯详情页最新发布上方横幅
最新发布的资讯信息
【计算机/互联网|】Nginx出现502错误(2020-01-20 21:02)
【计算机/互联网|】网站运营全智能软手V0.1版发布(2020-01-20 12:16)
【计算机/互联网|】淘宝这是怎么了?(2020-01-19 19:15)
【行业动态|】谷歌关闭小米智能摄像头,因为窃听器显示了陌生人家中的照片(2020-01-15 09:42)
【行业动态|】据报道谷歌新闻终止了数字杂志,退还主动订阅(2020-01-15 09:39)
【行业动态|】康佳将OLED电视带到美国与LG和索尼竞争(2020-01-15 09:38)
【行业动态|】2020年最佳AV接收机(2020-01-15 09:35)
【行业动态|】2020年最佳流媒体设备:Roku,Apple TV,Firebar,Chromecast等(2020-01-15 09:31)
【行业动态|】CES 2020预览:更多的流媒体服务和订阅即将到来(2020-01-08 21:41)
【行业动态|】从埃隆·马斯克到杰夫·贝佐斯,这30位人物定义了2010年代(2020-01-01 15:14)
联系我们

Q Q: 7090832

电话:400-0011-990

邮箱:7090832@qq.com

时间:9:00-23:00

联系客服
商家入住 服务咨询 投拆建议 联系客服
0577-67068160
手机版

扫一扫进手机版
返回顶部