Actual source code: mpikok.kokkos.cxx
2: /*
3: This file contains routines for Parallel vector operations.
4: */
6: #include <petscvec_kokkos.hpp>
7: #include <petsc/private/vecimpl.h>
8: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
9: #include <../src/vec/vec/impls/seq/kokkos/veckokkosimpl.hpp>
11: PetscErrorCode VecDestroy_MPIKokkos(Vec v)
12: {
13: Vec_Kokkos *veckok = static_cast<Vec_Kokkos*>(v->spptr);
15: delete veckok;
16: VecDestroy_MPI(v);
17: return 0;
18: }
20: PetscErrorCode VecNorm_MPIKokkos(Vec xin,NormType type,PetscReal *z)
21: {
22: PetscReal sum,work = 0.0;
24: if (type == NORM_2 || type == NORM_FROBENIUS) {
25: VecNorm_SeqKokkos(xin,NORM_2,&work);
26: work *= work;
27: MPIU_Allreduce(&work,&sum,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
28: *z = PetscSqrtReal(sum);
29: } else if (type == NORM_1) {
30: /* Find the local part */
31: VecNorm_SeqKokkos(xin,NORM_1,&work);
32: /* Find the global max */
33: MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
34: } else if (type == NORM_INFINITY) {
35: /* Find the local max */
36: VecNorm_SeqKokkos(xin,NORM_INFINITY,&work);
37: /* Find the global max */
38: MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
39: } else if (type == NORM_1_AND_2) {
40: PetscReal temp[2];
41: VecNorm_SeqKokkos(xin,NORM_1,temp);
42: VecNorm_SeqKokkos(xin,NORM_2,temp+1);
43: temp[1] = temp[1]*temp[1];
44: MPIU_Allreduce(temp,z,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
45: z[1] = PetscSqrtReal(z[1]);
46: }
47: return 0;
48: }
50: /* z = y^H x */
51: PetscErrorCode VecDot_MPIKokkos(Vec xin,Vec yin,PetscScalar *z)
52: {
53: PetscScalar sum,work;
55: VecDot_SeqKokkos(xin,yin,&work);
56: MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
57: *z = sum;
58: return 0;
59: }
61: PetscErrorCode VecMDot_MPIKokkos(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
62: {
63: PetscScalar awork[128],*work = awork;
65: if (nv > 128) PetscMalloc1(nv,&work);
66: VecMDot_SeqKokkos(xin,nv,y,work);
67: MPIU_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
68: if (nv > 128) PetscFree(work);
69: return 0;
70: }
72: /* z = y^T x */
73: PetscErrorCode VecTDot_MPIKokkos(Vec xin,Vec yin,PetscScalar *z)
74: {
75: PetscScalar sum,work;
77: VecTDot_SeqKokkos(xin,yin,&work);
78: MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
79: *z = sum;
80: return 0;
81: }
83: PetscErrorCode VecMTDot_MPIKokkos(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
84: {
85: PetscScalar awork[128],*work = awork;
87: if (nv > 128) PetscMalloc1(nv,&work);
88: VecMTDot_SeqKokkos(xin,nv,y,work);
89: MPIU_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
90: if (nv > 128) PetscFree(work);
91: return 0;
92: }
94: PetscErrorCode VecMax_MPIKokkos(Vec xin,PetscInt *idx,PetscReal *z)
95: {
96: PetscReal work;
98: /* Find the local max */
99: VecMax_SeqKokkos(xin,idx,&work);
100: #if defined(PETSC_HAVE_MPIUNI)
101: *z = work;
102: #else
103: /* Find the global max */
104: if (!idx) { /* User does not need idx */
105: MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
106: } else {
107: struct { PetscReal v; PetscInt i; } in,out;
109: in.v = work;
110: in.i = *idx + xin->map->rstart;
111: MPIU_Allreduce(&in,&out,1,MPIU_REAL_INT,MPIU_MAXLOC,PetscObjectComm((PetscObject)xin));
112: *z = out.v;
113: *idx = out.i;
114: }
115: #endif
116: return 0;
117: }
119: PetscErrorCode VecMin_MPIKokkos(Vec xin,PetscInt *idx,PetscReal *z)
120: {
121: PetscReal work;
123: /* Find the local Min */
124: VecMin_SeqKokkos(xin,idx,&work);
125: #if defined(PETSC_HAVE_MPIUNI)
126: *z = work;
127: #else
128: /* Find the global Min */
129: if (!idx) {
130: MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)xin));
131: } else {
132: struct { PetscReal v; PetscInt i; } in,out;
134: in.v = work;
135: in.i = *idx + xin->map->rstart;
136: MPIU_Allreduce(&in,&out,1,MPIU_REAL_INT,MPIU_MINLOC,PetscObjectComm((PetscObject)xin));
137: *z = out.v;
138: *idx = out.i;
139: }
140: #endif
141: return 0;
142: }
144: PetscErrorCode VecDuplicate_MPIKokkos(Vec win,Vec *vv)
145: {
146: Vec v;
147: Vec_MPI *vecmpi;
148: Vec_Kokkos *veckok;
150: /* Reuse VecDuplicate_MPI, which contains a lot of stuff */
151: VecDuplicate_MPI(win,&v); /* after the call, v is a VECMPI, with data zero'ed */
152: PetscObjectChangeTypeName((PetscObject)v,VECMPIKOKKOS);
153: PetscMemcpy(v->ops,win->ops,sizeof(struct _VecOps));
155: /* Build the Vec_Kokkos struct */
156: vecmpi = static_cast<Vec_MPI*>(v->data);
157: veckok = new Vec_Kokkos(v->map->n,vecmpi->array);
158: Kokkos::deep_copy(veckok->v_dual.view_device(),0.0);
159: v->spptr = veckok;
160: v->offloadmask = PETSC_OFFLOAD_KOKKOS;
161: *vv = v;
162: return 0;
163: }
165: PetscErrorCode VecDotNorm2_MPIKokkos(Vec s,Vec t,PetscScalar *dp,PetscScalar *nm)
166: {
167: PetscScalar work[2],sum[2];
169: VecDotNorm2_SeqKokkos(s,t,work,work+1);
170: MPIU_Allreduce(&work,&sum,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)s));
171: *dp = sum[0];
172: *nm = sum[1];
173: return 0;
174: }
176: static PetscErrorCode VecGetSubVector_MPIKokkos(Vec x,IS is,Vec *y)
177: {
178: VecGetSubVector_Kokkos_Private(x,PETSC_TRUE,is,y);
179: return 0;
180: }
182: static PetscErrorCode VecSetOps_MPIKokkos(Vec v)
183: {
184: v->ops->abs = VecAbs_SeqKokkos;
185: v->ops->reciprocal = VecReciprocal_SeqKokkos;
186: v->ops->pointwisemult = VecPointwiseMult_SeqKokkos;
187: v->ops->setrandom = VecSetRandom_SeqKokkos;
188: v->ops->dotnorm2 = VecDotNorm2_MPIKokkos;
189: v->ops->waxpy = VecWAXPY_SeqKokkos;
190: v->ops->norm = VecNorm_MPIKokkos;
191: v->ops->min = VecMin_MPIKokkos;
192: v->ops->max = VecMax_MPIKokkos;
193: v->ops->sum = VecSum_SeqKokkos;
194: v->ops->shift = VecShift_SeqKokkos;
195: v->ops->scale = VecScale_SeqKokkos;
196: v->ops->copy = VecCopy_SeqKokkos;
197: v->ops->set = VecSet_SeqKokkos;
198: v->ops->swap = VecSwap_SeqKokkos;
199: v->ops->axpy = VecAXPY_SeqKokkos;
200: v->ops->axpby = VecAXPBY_SeqKokkos;
201: v->ops->maxpy = VecMAXPY_SeqKokkos;
202: v->ops->aypx = VecAYPX_SeqKokkos;
203: v->ops->axpbypcz = VecAXPBYPCZ_SeqKokkos;
204: v->ops->pointwisedivide = VecPointwiseDivide_SeqKokkos;
205: v->ops->placearray = VecPlaceArray_SeqKokkos;
206: v->ops->replacearray = VecReplaceArray_SeqKokkos;
207: v->ops->resetarray = VecResetArray_SeqKokkos;
209: v->ops->dot = VecDot_MPIKokkos;
210: v->ops->tdot = VecTDot_MPIKokkos;
211: v->ops->mdot = VecMDot_MPIKokkos;
212: v->ops->mtdot = VecMTDot_MPIKokkos;
214: v->ops->dot_local = VecDot_SeqKokkos;
215: v->ops->tdot_local = VecTDot_SeqKokkos;
216: v->ops->mdot_local = VecMDot_SeqKokkos;
217: v->ops->mtdot_local = VecMTDot_SeqKokkos;
219: v->ops->norm_local = VecNorm_SeqKokkos;
220: v->ops->duplicate = VecDuplicate_MPIKokkos;
221: v->ops->destroy = VecDestroy_MPIKokkos;
222: v->ops->getlocalvector = VecGetLocalVector_SeqKokkos;
223: v->ops->restorelocalvector = VecRestoreLocalVector_SeqKokkos;
224: v->ops->getlocalvectorread = VecGetLocalVector_SeqKokkos;
225: v->ops->restorelocalvectorread = VecRestoreLocalVector_SeqKokkos;
226: v->ops->getarraywrite = VecGetArrayWrite_SeqKokkos;
227: v->ops->getarray = VecGetArray_SeqKokkos;
228: v->ops->restorearray = VecRestoreArray_SeqKokkos;
229: v->ops->getarrayandmemtype = VecGetArrayAndMemType_SeqKokkos;
230: v->ops->restorearrayandmemtype = VecRestoreArrayAndMemType_SeqKokkos;
231: v->ops->getarraywriteandmemtype= VecGetArrayWriteAndMemType_SeqKokkos;
232: v->ops->getsubvector = VecGetSubVector_MPIKokkos;
233: v->ops->restoresubvector = VecRestoreSubVector_SeqKokkos;
234: return 0;
235: }
237: /*MC
238: VECMPIKOKKOS - VECMPIKOKKOS = "mpikokkos" - The basic parallel vector, modified to use Kokkos
240: Options Database Keys:
241: . -vec_type mpikokkos - sets the vector type to VECMPIKOKKOS during a call to VecSetFromOptions()
243: Level: beginner
245: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIKokkosWithArray(), VECMPI, VecType, VecCreateMPI()
246: M*/
247: PetscErrorCode VecCreate_MPIKokkos(Vec v)
248: {
249: Vec_MPI *vecmpi;
250: Vec_Kokkos *veckok;
252: PetscKokkosInitializeCheck();
253: PetscLayoutSetUp(v->map);
254: VecCreate_MPI(v); /* Calloc host array */
256: vecmpi = static_cast<Vec_MPI*>(v->data);
257: PetscObjectChangeTypeName((PetscObject)v,VECMPIKOKKOS);
258: VecSetOps_MPIKokkos(v);
259: veckok = new Vec_Kokkos(v->map->n,vecmpi->array,NULL); /* Alloc device array but do not init it */
260: v->spptr = static_cast<void*>(veckok);
261: v->offloadmask = PETSC_OFFLOAD_KOKKOS;
262: return 0;
263: }
265: /*@C
266: VecCreateMPIKokkosWithArray - Creates a parallel, array-style vector,
267: where the user provides the GPU array space to store the vector values.
269: Collective
271: Input Parameters:
272: + comm - the MPI communicator to use
273: . bs - block size, same meaning as VecSetBlockSize()
274: . n - local vector length, cannot be PETSC_DECIDE
275: . N - global vector length (or PETSC_DECIDE to have calculated)
276: - array - the user provided GPU array to store the vector values
278: Output Parameter:
279: . vv - the vector
281: Notes:
282: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
283: same type as an existing vector.
285: If the user-provided array is NULL, then VecKokkosPlaceArray() can be used
286: at a later stage to SET the array for storing the vector values.
288: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
289: The user should not free the array until the vector is destroyed.
291: Level: intermediate
293: .seealso: VecCreateSeqKokkosWithArray(), VecCreateMPIWithArray(), VecCreateSeqWithArray(),
294: VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost(),
295: VecCreateMPI(), VecCreateGhostWithArray(), VecPlaceArray()
297: @*/
298: PetscErrorCode VecCreateMPIKokkosWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar darray[],Vec *v)
299: {
300: Vec w;
301: Vec_Kokkos *veckok;
302: Vec_MPI *vecmpi;
303: PetscScalar *harray;
306: PetscKokkosInitializeCheck();
307: PetscSplitOwnership(comm,&n,&N);
308: VecCreate(comm,&w);
309: VecSetSizes(w,n,N);
310: VecSetBlockSize(w,bs);
311: PetscLayoutSetUp(w->map);
313: if (std::is_same<DefaultMemorySpace,Kokkos::HostSpace>::value) {harray = const_cast<PetscScalar*>(darray);}
314: else PetscMalloc1(w->map->n,&harray); /* If device is not the same as host, allocate the host array ourselves */
316: VecCreate_MPI_Private(w,PETSC_FALSE/*alloc*/,0/*nghost*/,harray); /* Build a sequential vector with provided data */
317: vecmpi = static_cast<Vec_MPI*>(w->data);
319: if (!std::is_same<DefaultMemorySpace,Kokkos::HostSpace>::value) vecmpi->array_allocated = harray; /* The host array was allocated by petsc */
321: PetscObjectChangeTypeName((PetscObject)w,VECMPIKOKKOS);
322: VecSetOps_MPIKokkos(w);
323: veckok = new Vec_Kokkos(n,harray,const_cast<PetscScalar*>(darray));
324: veckok->v_dual.modify_device(); /* Mark the device is modified */
325: w->spptr = static_cast<void*>(veckok);
326: w->offloadmask = PETSC_OFFLOAD_KOKKOS;
327: *v = w;
328: return 0;
329: }
331: /*
332: VecCreateMPIKokkosWithArrays_Private - Creates a Kokkos parallel, array-style vector
333: with user-provided arrays on host and device.
335: Collective
337: Input Parameter:
338: + comm - the communicator
339: . bs - the block size
340: . n - the local vector length
341: . N - the global vector length
342: - harray - host memory where the vector elements are to be stored.
343: - darray - device memory where the vector elements are to be stored.
345: Output Parameter:
346: . v - the vector
348: Notes:
349: If there is no device, then harray and darray must be the same.
350: If n is not zero, then harray and darray must be allocated.
351: After the call, the created vector is supposed to be in a synchronized state, i.e.,
352: we suppose harray and darray have the same data.
354: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
355: The user should not free the array until the vector is destroyed.
356: */
357: PetscErrorCode VecCreateMPIKokkosWithArrays_Private(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar harray[],const PetscScalar darray[],Vec *v)
358: {
359: Vec w;
361: PetscKokkosInitializeCheck();
362: if (n) {
365: }
367: VecCreateMPIWithArray(comm,bs,n,N,harray,&w);
368: PetscObjectChangeTypeName((PetscObject)w,VECMPIKOKKOS); /* Change it to Kokkos */
369: VecSetOps_MPIKokkos(w);
370: w->spptr = new Vec_Kokkos(n,const_cast<PetscScalar*>(harray),const_cast<PetscScalar*>(darray));
371: w->offloadmask = PETSC_OFFLOAD_KOKKOS;
372: *v = w;
373: return 0;
374: }
376: /*MC
377: VECKOKKOS - VECKOKKOS = "kokkos" - The basic vector, modified to use Kokkos
379: Options Database Keys:
380: . -vec_type kokkos - sets the vector type to VECKOKKOS during a call to VecSetFromOptions()
382: Level: beginner
384: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIKokkosWithArray(), VECMPI, VecType, VecCreateMPI()
385: M*/
386: PetscErrorCode VecCreate_Kokkos(Vec v)
387: {
388: PetscMPIInt size;
390: MPI_Comm_size(PetscObjectComm((PetscObject)v),&size);
391: if (size == 1) VecSetType(v,VECSEQKOKKOS);
392: else VecSetType(v,VECMPIKOKKOS);
393: return 0;
394: }