Actual source code: math2opusutils.cu
1: #include <petsc/private/matimpl.h>
2: #include <petsc/private/vecimpl.h>
3: #include <petscsf.h>
4: #if defined(PETSC_HAVE_CUDA)
5: #include <thrust/for_each.h>
6: #include <thrust/device_vector.h>
7: #include <thrust/execution_policy.h>
8: #endif
10: PETSC_INTERN PetscErrorCode PetscSFGetVectorSF(PetscSF sf, PetscInt nv, PetscInt ldr, PetscInt ldl, PetscSF *vsf)
11: {
12: PetscSF rankssf;
13: const PetscSFNode *iremote;
14: PetscSFNode *viremote,*rremotes;
15: const PetscInt *ilocal;
16: PetscInt *vilocal = NULL,*ldrs;
17: const PetscMPIInt *ranks;
18: PetscMPIInt *sranks;
19: PetscInt nranks,nr,nl,vnr,vnl,i,v,j,maxl;
20: MPI_Comm comm;
21: PetscErrorCode ierr;
27: if (nv == 1) {
28: PetscObjectReference((PetscObject)sf);
29: *vsf = sf;
30: return(0);
31: }
32: PetscObjectGetComm((PetscObject)sf,&comm);
33: PetscSFGetGraph(sf,&nr,&nl,&ilocal,&iremote);
34: PetscSFGetLeafRange(sf,NULL,&maxl);
35: maxl += 1;
36: if (ldl == PETSC_DECIDE) ldl = maxl;
37: if (ldr == PETSC_DECIDE) ldr = nr;
38: if (ldr < nr) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid leading dimension %D < %D",ldr,nr);
39: if (ldl < maxl) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid leading dimension %D < %D",ldl,maxl);
40: vnr = nr*nv;
41: vnl = nl*nv;
42: PetscMalloc1(vnl,&viremote);
43: if (ilocal) {
44: PetscMalloc1(vnl,&vilocal);
45: }
47: /* TODO: Should this special SF be available, e.g.
48: PetscSFGetRanksSF or similar? */
49: PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
50: PetscMalloc1(nranks,&sranks);
51: PetscArraycpy(sranks,ranks,nranks);
52: PetscSortMPIInt(nranks,sranks);
53: PetscMalloc1(nranks,&rremotes);
54: for (i=0;i<nranks;i++) {
55: rremotes[i].rank = sranks[i];
56: rremotes[i].index = 0;
57: }
58: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&rankssf);
59: PetscSFSetGraph(rankssf,1,nranks,NULL,PETSC_OWN_POINTER,rremotes,PETSC_OWN_POINTER);
60: PetscMalloc1(nranks,&ldrs);
61: PetscSFBcastBegin(rankssf,MPIU_INT,&ldr,ldrs,MPI_REPLACE);
62: PetscSFBcastEnd(rankssf,MPIU_INT,&ldr,ldrs,MPI_REPLACE);
63: PetscSFDestroy(&rankssf);
65: j = -1;
66: for (i=0;i<nl;i++) {
67: const PetscInt r = iremote[i].rank;
68: const PetscInt ii = iremote[i].index;
70: if (j < 0 || sranks[j] != r) {
71: PetscFindMPIInt(r,nranks,sranks,&j);
72: }
73: if (j < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unable to locate neighbor rank %D",r);
74: for (v=0;v<nv;v++) {
75: viremote[v*nl + i].rank = r;
76: viremote[v*nl + i].index = v*ldrs[j] + ii;
77: if (ilocal) vilocal[v*nl + i] = v*ldl + ilocal[i];
78: }
79: }
80: PetscFree(sranks);
81: PetscFree(ldrs);
82: PetscSFCreate(comm,vsf);
83: PetscSFSetGraph(*vsf,vnr,vnl,vilocal,PETSC_OWN_POINTER,viremote,PETSC_OWN_POINTER);
84: return(0);
85: }
87: #if defined(PETSC_HAVE_CUDA)
88: struct SignVector_Functor
89: {
90: const PetscScalar *v;
91: PetscScalar *s;
92: SignVector_Functor(const PetscScalar *_v, PetscScalar *_s) : v(_v), s(_s) {}
94: __host__ __device__ void operator()(PetscInt i)
95: {
96: s[i] = (v[i] < 0 ? -1 : 1);
97: }
98: };
99: #endif
101: PETSC_INTERN PetscErrorCode VecSign(Vec v, Vec s)
102: {
103: const PetscScalar *av;
104: PetscScalar *as;
105: PetscInt i,n;
106: PetscErrorCode ierr;
107: #if defined(PETSC_HAVE_CUDA)
108: PetscBool viscuda,siscuda;
109: #endif
114: VecGetLocalSize(s,&n);
115: VecGetLocalSize(v,&i);
116: if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Invalid local sizes %D != %D",i,n);
117: #if defined(PETSC_HAVE_CUDA)
118: PetscObjectTypeCompareAny((PetscObject)v,&viscuda,VECSEQCUDA,VECMPICUDA,"");
119: PetscObjectTypeCompareAny((PetscObject)s,&siscuda,VECSEQCUDA,VECMPICUDA,"");
120: viscuda = (PetscBool)(viscuda && !v->boundtocpu);
121: siscuda = (PetscBool)(siscuda && !s->boundtocpu);
122: if (viscuda && siscuda) {
123: VecCUDAGetArrayRead(v,&av);
124: VecCUDAGetArrayWrite(s,&as);
125: SignVector_Functor sign_vector(av, as);
126: thrust::for_each(thrust::device,thrust::counting_iterator<PetscInt>(0),
127: thrust::counting_iterator<PetscInt>(n), sign_vector);
128: VecCUDARestoreArrayWrite(s,&as);
129: VecCUDARestoreArrayRead(v,&av);
130: } else
131: #endif
132: {
133: VecGetArrayRead(v,&av);
134: VecGetArrayWrite(s,&as);
135: for (i=0;i<n;i++) as[i] = PetscAbsScalar(av[i]) < 0 ? -1. : 1.;
136: VecRestoreArrayWrite(s,&as);
137: VecRestoreArrayRead(v,&av);
138: }
139: return(0);
140: }
142: #if defined(PETSC_HAVE_CUDA)
143: struct StandardBasis_Functor
144: {
145: PetscScalar *v;
146: PetscInt j;
148: StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) {}
149: __host__ __device__ void operator()(PetscInt i)
150: {
151: v[i] = (i == j ? 1 : 0);
152: }
153: };
154: #endif
156: PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i)
157: {
158: #if defined(PETSC_HAVE_CUDA)
159: PetscBool iscuda;
160: #endif
161: PetscInt st,en;
165: VecGetOwnershipRange(x,&st,&en);
166: #if defined(PETSC_HAVE_CUDA)
167: PetscObjectTypeCompareAny((PetscObject)x,&iscuda,VECSEQCUDA,VECMPICUDA,"");
168: iscuda = (PetscBool)(iscuda && !x->boundtocpu);
169: if (iscuda) {
170: PetscScalar *ax;
171: VecCUDAGetArrayWrite(x,&ax);
172: StandardBasis_Functor delta(ax, i-st);
173: thrust::for_each(thrust::device,thrust::counting_iterator<PetscInt>(0),
174: thrust::counting_iterator<PetscInt>(en-st), delta);
175: VecCUDARestoreArrayWrite(x,&ax);
176: } else
177: #endif
178: {
179: VecSet(x,0.);
180: if (st <= i && i < en) {
181: VecSetValue(x,i,1.0,INSERT_VALUES);
182: }
183: VecAssemblyBegin(x);
184: VecAssemblyEnd(x);
185: }
186: return(0);
187: }
189: /* these are approximate norms */
190: /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
191: NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
192: PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal* n)
193: {
194: Vec x,y,w,z;
195: PetscReal normz,adot;
196: PetscScalar dot;
197: PetscInt i,j,N,jold = -1;
199: PetscBool boundtocpu = PETSC_TRUE;
202: #if defined(PETSC_HAVE_DEVICE)
203: boundtocpu = A->boundtocpu;
204: #endif
205: switch (normtype) {
206: case NORM_INFINITY:
207: case NORM_1:
208: if (normsamples < 0) normsamples = 10; /* pure guess */
209: if (normtype == NORM_INFINITY) {
210: Mat B;
211: MatCreateTranspose(A,&B);
212: A = B;
213: } else {
214: PetscObjectReference((PetscObject)A);
215: }
216: MatCreateVecs(A,&x,&y);
217: MatCreateVecs(A,&z,&w);
218: VecBindToCPU(x,boundtocpu);
219: VecBindToCPU(y,boundtocpu);
220: VecBindToCPU(z,boundtocpu);
221: VecBindToCPU(w,boundtocpu);
222: VecGetSize(x,&N);
223: VecSet(x,1./N);
224: *n = 0.0;
225: for (i = 0; i < normsamples; i++) {
226: MatMult(A,x,y);
227: VecSign(y,w);
228: MatMultTranspose(A,w,z);
229: VecNorm(z,NORM_INFINITY,&normz);
230: VecDot(x,z,&dot);
231: adot = PetscAbsScalar(dot);
232: PetscInfo4(A,"%s norm it %D -> (%g %g)\n",NormTypes[normtype],i,(double)normz,(double)adot);
233: if (normz <= adot && i > 0) {
234: VecNorm(y,NORM_1,n);
235: break;
236: }
237: VecMax(z,&j,&normz);
238: if (j == jold) {
239: VecNorm(y,NORM_1,n);
240: PetscInfo2(A,"%s norm it %D -> breakdown (j==jold)\n",NormTypes[normtype],i);
241: break;
242: }
243: jold = j;
244: VecSetDelta(x,j);
245: }
246: MatDestroy(&A);
247: VecDestroy(&x);
248: VecDestroy(&w);
249: VecDestroy(&y);
250: VecDestroy(&z);
251: break;
252: case NORM_2:
253: if (normsamples < 0) normsamples = 20; /* pure guess */
254: MatCreateVecs(A,&x,&y);
255: MatCreateVecs(A,&z,NULL);
256: VecBindToCPU(x,boundtocpu);
257: VecBindToCPU(y,boundtocpu);
258: VecBindToCPU(z,boundtocpu);
259: VecSetRandom(x,NULL);
260: VecNormalize(x,NULL);
261: *n = 0.0;
262: for (i = 0; i < normsamples; i++) {
263: MatMult(A,x,y);
264: VecNormalize(y,n);
265: MatMultTranspose(A,y,z);
266: VecNorm(z,NORM_2,&normz);
267: VecDot(x,z,&dot);
268: adot = PetscAbsScalar(dot);
269: PetscInfo5(A,"%s norm it %D -> %g (%g %g)\n",NormTypes[normtype],i,(double)*n,(double)normz,(double)adot);
270: if (normz <= adot) break;
271: if (i < normsamples - 1) {
272: Vec t;
274: VecNormalize(z,NULL);
275: t = x;
276: x = z;
277: z = t;
278: }
279: }
280: VecDestroy(&x);
281: VecDestroy(&y);
282: VecDestroy(&z);
283: break;
284: default:
285: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"%s norm not supported",NormTypes[normtype]);
286: }
287: PetscInfo3(A,"%s norm %g computed in %D iterations\n",NormTypes[normtype],(double)*n,i);
288: return(0);
289: }