Actual source code: performance.c

  1: static char help[] = "Time vector operations on GPU\n";
  2: /* This program produces the results for Argonne Technical Report ANL-19/41.
  3:    The technical report and resources for generating data can be found in the
  4:    repository:  https://gitlab.com/hannah_mairs/summit-performance */

  6: #include <petscvec.h>

  8: int main(int argc, char **argv)
  9: {
 10:   Vec            v,w,x;
 11:   PetscInt       n=15;
 12:   PetscScalar    val;
 13:   PetscReal      norm1,norm2;
 14:   PetscRandom    rctx;
 15: #if defined(PETSC_USE_LOG)
 16:   PetscLogStage  stage;
 17: #endif

 19:   PetscInitialize(&argc,&argv,(char*)0,help);
 20:   PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
 21:   PetscRandomCreate(PETSC_COMM_WORLD,&rctx);
 22:   PetscRandomSetFromOptions(rctx);
 23:   VecCreate(PETSC_COMM_WORLD,&v);
 24:   VecSetSizes(v,PETSC_DECIDE,n);
 25:   VecSetFromOptions(v);
 26:   VecDuplicate(v,&w);
 27:   VecSetRandom(v,rctx);
 28:   VecSetRandom(w,rctx);

 30:   /* create dummy vector to clear cache */
 31:   VecCreate(PETSC_COMM_WORLD,&x);
 32:   VecSetSizes(x,PETSC_DECIDE,10000000);
 33:   VecSetFromOptions(x);
 34:   VecSetRandom(x,rctx);

 36:   /* send v to GPU */
 37:   PetscBarrier(NULL);
 38:   VecNorm(v,NORM_1,&norm1);

 40:   /* register a stage work on GPU */
 41:   PetscLogStageRegister("Work on GPU", &stage);
 42:   PetscLogStagePush(stage);
 43:   VecNorm(w,NORM_1,&norm1); /* send w to GPU */
 44:   VecNorm(x,NORM_1,&norm1); /* clear cache */
 45:   PetscBarrier(NULL);
 46:   VecAXPY(w,1.0,v);
 47:   VecNorm(x,NORM_INFINITY,&norm1);
 48:   PetscBarrier(NULL);
 49:   VecDot(w,v,&val);
 50:   VecNorm(x,NORM_1,&norm1);
 51:   PetscBarrier(NULL);
 52:   VecSet(v,0.0);
 53:   VecNorm(x,NORM_2,&norm2);
 54:   PetscBarrier(NULL);
 55:   VecCopy(v,w);
 56:   PetscLogStagePop();

 58:   PetscPrintf(PETSC_COMM_WORLD,"Test completed successfully!\n");
 59:   VecDestroy(&v);
 60:   VecDestroy(&w);
 61:   VecDestroy(&x);
 62:   PetscRandomDestroy(&rctx);
 63:   PetscFinalize();
 64:   return 0;
 65: }

 67: /*TEST

 69:    testset:
 70:       nsize: 2
 71:       output_file: output/performance_cuda.out

 73:       test:
 74:         suffix: cuda
 75:         args: -vec_type mpicuda
 76:         requires: cuda

 78:       test:
 79:         suffix: hip
 80:         args: -vec_type mpihip
 81:         requires: hip

 83: TEST*/