Actual source code: bddcprivate.c
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/ksp/pc/impls/bddc/bddc.h>
3: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4: #include <../src/mat/impls/dense/seq/dense.h>
5: #include <petscdmplex.h>
6: #include <petscblaslapack.h>
7: #include <petsc/private/sfimpl.h>
8: #include <petsc/private/dmpleximpl.h>
9: #include <petscdmda.h>
11: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);
13: /* if range is true, it returns B s.t. span{B} = range(A)
14: if range is false, it returns B s.t. range(B) _|_ range(A) */
15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
16: {
17: PetscScalar *uwork,*data,*U, ds = 0.;
18: PetscReal *sing;
19: PetscBLASInt bM,bN,lwork,lierr,di = 1;
20: PetscInt ulw,i,nr,nc,n;
22: #if defined(PETSC_USE_COMPLEX)
23: PetscReal *rwork2;
24: #endif
27: MatGetSize(A,&nr,&nc);
28: if (!nr || !nc) return(0);
30: /* workspace */
31: if (!work) {
32: ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
33: PetscMalloc1(ulw,&uwork);
34: } else {
35: ulw = lw;
36: uwork = work;
37: }
38: n = PetscMin(nr,nc);
39: if (!rwork) {
40: PetscMalloc1(n,&sing);
41: } else {
42: sing = rwork;
43: }
45: /* SVD */
46: PetscMalloc1(nr*nr,&U);
47: PetscBLASIntCast(nr,&bM);
48: PetscBLASIntCast(nc,&bN);
49: PetscBLASIntCast(ulw,&lwork);
50: MatDenseGetArray(A,&data);
51: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
52: #if !defined(PETSC_USE_COMPLEX)
53: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
54: #else
55: PetscMalloc1(5*n,&rwork2);
56: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,rwork2,&lierr));
57: PetscFree(rwork2);
58: #endif
59: PetscFPTrapPop();
60: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
61: MatDenseRestoreArray(A,&data);
62: for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
63: if (!rwork) {
64: PetscFree(sing);
65: }
66: if (!work) {
67: PetscFree(uwork);
68: }
69: /* create B */
70: if (!range) {
71: MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
72: MatDenseGetArray(*B,&data);
73: PetscArraycpy(data,U+nr*i,(nr-i)*nr);
74: } else {
75: MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
76: MatDenseGetArray(*B,&data);
77: PetscArraycpy(data,U,i*nr);
78: }
79: MatDenseRestoreArray(*B,&data);
80: PetscFree(U);
81: return(0);
82: }
84: /* TODO REMOVE */
85: #if defined(PRINT_GDET)
86: static int inc = 0;
87: static int lev = 0;
88: #endif
90: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
91: {
93: Mat GE,GEd;
94: PetscInt rsize,csize,esize;
95: PetscScalar *ptr;
98: ISGetSize(edge,&esize);
99: if (!esize) return(0);
100: ISGetSize(extrow,&rsize);
101: ISGetSize(extcol,&csize);
103: /* gradients */
104: ptr = work + 5*esize;
105: MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
106: MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
107: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
108: MatDestroy(&GE);
110: /* constants */
111: ptr += rsize*csize;
112: MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
113: MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
114: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
115: MatDestroy(&GE);
116: MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
117: MatDestroy(&GEd);
119: if (corners) {
120: Mat GEc;
121: const PetscScalar *vals;
122: PetscScalar v;
124: MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
125: MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
126: MatDenseGetArrayRead(GEd,&vals);
127: /* v = PetscAbsScalar(vals[0]) */;
128: v = 1.;
129: cvals[0] = vals[0]/v;
130: cvals[1] = vals[1]/v;
131: MatDenseRestoreArrayRead(GEd,&vals);
132: MatScale(*GKins,1./v);
133: #if defined(PRINT_GDET)
134: {
135: PetscViewer viewer;
136: char filename[256];
137: sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
138: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
139: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
140: PetscObjectSetName((PetscObject)GEc,"GEc");
141: MatView(GEc,viewer);
142: PetscObjectSetName((PetscObject)(*GKins),"GK");
143: MatView(*GKins,viewer);
144: PetscObjectSetName((PetscObject)GEd,"Gproj");
145: MatView(GEd,viewer);
146: PetscViewerDestroy(&viewer);
147: }
148: #endif
149: MatDestroy(&GEd);
150: MatDestroy(&GEc);
151: }
153: return(0);
154: }
156: PetscErrorCode PCBDDCNedelecSupport(PC pc)
157: {
158: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
159: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
160: Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
161: Vec tvec;
162: PetscSF sfv;
163: ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
164: MPI_Comm comm;
165: IS lned,primals,allprimals,nedfieldlocal;
166: IS *eedges,*extrows,*extcols,*alleedges;
167: PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
168: PetscScalar *vals,*work;
169: PetscReal *rwork;
170: const PetscInt *idxs,*ii,*jj,*iit,*jjt;
171: PetscInt ne,nv,Lv,order,n,field;
172: PetscInt n_neigh,*neigh,*n_shared,**shared;
173: PetscInt i,j,extmem,cum,maxsize,nee;
174: PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs;
175: PetscInt *sfvleaves,*sfvroots;
176: PetscInt *corners,*cedges;
177: PetscInt *ecount,**eneighs,*vcount,**vneighs;
178: PetscInt *emarks;
179: PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal;
180: PetscErrorCode ierr;
183: /* If the discrete gradient is defined for a subset of dofs and global is true,
184: it assumes G is given in global ordering for all the dofs.
185: Otherwise, the ordering is global for the Nedelec field */
186: order = pcbddc->nedorder;
187: conforming = pcbddc->conforming;
188: field = pcbddc->nedfield;
189: global = pcbddc->nedglobal;
190: setprimal = PETSC_FALSE;
191: print = PETSC_FALSE;
192: singular = PETSC_FALSE;
194: /* Command line customization */
195: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
196: PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
197: PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
198: PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
199: /* print debug info TODO: to be removed */
200: PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
201: PetscOptionsEnd();
203: /* Return if there are no edges in the decomposition and the problem is not singular */
204: MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
205: ISLocalToGlobalMappingGetSize(al2g,&n);
206: PetscObjectGetComm((PetscObject)pc,&comm);
207: if (!singular) {
208: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
209: lrc[0] = PETSC_FALSE;
210: for (i=0;i<n;i++) {
211: if (PetscRealPart(vals[i]) > 2.) {
212: lrc[0] = PETSC_TRUE;
213: break;
214: }
215: }
216: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
217: MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
218: if (!lrc[1]) return(0);
219: }
221: /* Get Nedelec field */
222: if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal);
223: if (pcbddc->n_ISForDofsLocal && field >= 0) {
224: PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
225: nedfieldlocal = pcbddc->ISForDofsLocal[field];
226: ISGetLocalSize(nedfieldlocal,&ne);
227: } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
228: ne = n;
229: nedfieldlocal = NULL;
230: global = PETSC_TRUE;
231: } else if (field == PETSC_DECIDE) {
232: PetscInt rst,ren,*idx;
234: PetscArrayzero(matis->sf_leafdata,n);
235: PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
236: MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
237: for (i=rst;i<ren;i++) {
238: PetscInt nc;
240: MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
241: if (nc > 1) matis->sf_rootdata[i-rst] = 1;
242: MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
243: }
244: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
245: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
246: PetscMalloc1(n,&idx);
247: for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
248: ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
249: } else {
250: SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
251: }
253: /* Sanity checks */
254: if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
255: if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
256: if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order);
258: /* Just set primal dofs and return */
259: if (setprimal) {
260: IS enedfieldlocal;
261: PetscInt *eidxs;
263: PetscMalloc1(ne,&eidxs);
264: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
265: if (nedfieldlocal) {
266: ISGetIndices(nedfieldlocal,&idxs);
267: for (i=0,cum=0;i<ne;i++) {
268: if (PetscRealPart(vals[idxs[i]]) > 2.) {
269: eidxs[cum++] = idxs[i];
270: }
271: }
272: ISRestoreIndices(nedfieldlocal,&idxs);
273: } else {
274: for (i=0,cum=0;i<ne;i++) {
275: if (PetscRealPart(vals[i]) > 2.) {
276: eidxs[cum++] = i;
277: }
278: }
279: }
280: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
281: ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
282: PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
283: PetscFree(eidxs);
284: ISDestroy(&nedfieldlocal);
285: ISDestroy(&enedfieldlocal);
286: return(0);
287: }
289: /* Compute some l2g maps */
290: if (nedfieldlocal) {
291: IS is;
293: /* need to map from the local Nedelec field to local numbering */
294: ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
295: /* need to map from the local Nedelec field to global numbering for the whole dofs*/
296: ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
297: ISLocalToGlobalMappingCreateIS(is,&al2g);
298: /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
299: if (global) {
300: PetscObjectReference((PetscObject)al2g);
301: el2g = al2g;
302: } else {
303: IS gis;
305: ISRenumber(is,NULL,NULL,&gis);
306: ISLocalToGlobalMappingCreateIS(gis,&el2g);
307: ISDestroy(&gis);
308: }
309: ISDestroy(&is);
310: } else {
311: /* restore default */
312: pcbddc->nedfield = -1;
313: /* one ref for the destruction of al2g, one for el2g */
314: PetscObjectReference((PetscObject)al2g);
315: PetscObjectReference((PetscObject)al2g);
316: el2g = al2g;
317: fl2g = NULL;
318: }
320: /* Start communication to drop connections for interior edges (for cc analysis only) */
321: PetscArrayzero(matis->sf_leafdata,n);
322: PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
323: if (nedfieldlocal) {
324: ISGetIndices(nedfieldlocal,&idxs);
325: for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
326: ISRestoreIndices(nedfieldlocal,&idxs);
327: } else {
328: for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
329: }
330: PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
331: PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
333: if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
334: MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
335: MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
336: if (global) {
337: PetscInt rst;
339: MatGetOwnershipRange(G,&rst,NULL);
340: for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
341: if (matis->sf_rootdata[i] < 2) {
342: matis->sf_rootdata[cum++] = i + rst;
343: }
344: }
345: MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
346: MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
347: } else {
348: PetscInt *tbz;
350: PetscMalloc1(ne,&tbz);
351: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
352: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
353: ISGetIndices(nedfieldlocal,&idxs);
354: for (i=0,cum=0;i<ne;i++)
355: if (matis->sf_leafdata[idxs[i]] == 1)
356: tbz[cum++] = i;
357: ISRestoreIndices(nedfieldlocal,&idxs);
358: ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
359: MatZeroRows(G,cum,tbz,0.,NULL,NULL);
360: PetscFree(tbz);
361: }
362: } else { /* we need the entire G to infer the nullspace */
363: PetscObjectReference((PetscObject)pcbddc->discretegradient);
364: G = pcbddc->discretegradient;
365: }
367: /* Extract subdomain relevant rows of G */
368: ISLocalToGlobalMappingGetIndices(el2g,&idxs);
369: ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
370: MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
371: ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
372: ISDestroy(&lned);
373: MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
374: MatDestroy(&lGall);
375: MatISGetLocalMat(lGis,&lG);
377: /* SF for nodal dofs communications */
378: MatGetLocalSize(G,NULL,&Lv);
379: MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
380: PetscObjectReference((PetscObject)vl2g);
381: ISLocalToGlobalMappingGetSize(vl2g,&nv);
382: PetscSFCreate(comm,&sfv);
383: ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
384: PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
385: ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
386: i = singular ? 2 : 1;
387: PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);
389: /* Destroy temporary G created in MATIS format and modified G */
390: PetscObjectReference((PetscObject)lG);
391: MatDestroy(&lGis);
392: MatDestroy(&G);
394: if (print) {
395: PetscObjectSetName((PetscObject)lG,"initial_lG");
396: MatView(lG,NULL);
397: }
399: /* Save lG for values insertion in change of basis */
400: MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);
402: /* Analyze the edge-nodes connections (duplicate lG) */
403: MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
404: MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
405: PetscBTCreate(nv,&btv);
406: PetscBTCreate(ne,&bte);
407: PetscBTCreate(ne,&btb);
408: PetscBTCreate(ne,&btbd);
409: PetscBTCreate(nv,&btvcand);
410: /* need to import the boundary specification to ensure the
411: proper detection of coarse edges' endpoints */
412: if (pcbddc->DirichletBoundariesLocal) {
413: IS is;
415: if (fl2g) {
416: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
417: } else {
418: is = pcbddc->DirichletBoundariesLocal;
419: }
420: ISGetLocalSize(is,&cum);
421: ISGetIndices(is,&idxs);
422: for (i=0;i<cum;i++) {
423: if (idxs[i] >= 0) {
424: PetscBTSet(btb,idxs[i]);
425: PetscBTSet(btbd,idxs[i]);
426: }
427: }
428: ISRestoreIndices(is,&idxs);
429: if (fl2g) {
430: ISDestroy(&is);
431: }
432: }
433: if (pcbddc->NeumannBoundariesLocal) {
434: IS is;
436: if (fl2g) {
437: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
438: } else {
439: is = pcbddc->NeumannBoundariesLocal;
440: }
441: ISGetLocalSize(is,&cum);
442: ISGetIndices(is,&idxs);
443: for (i=0;i<cum;i++) {
444: if (idxs[i] >= 0) {
445: PetscBTSet(btb,idxs[i]);
446: }
447: }
448: ISRestoreIndices(is,&idxs);
449: if (fl2g) {
450: ISDestroy(&is);
451: }
452: }
454: /* Count neighs per dof */
455: ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);
456: ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);
458: /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
459: for proper detection of coarse edges' endpoints */
460: PetscBTCreate(ne,&btee);
461: for (i=0;i<ne;i++) {
462: if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) {
463: PetscBTSet(btee,i);
464: }
465: }
466: PetscMalloc1(ne,&marks);
467: if (!conforming) {
468: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
469: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
470: }
471: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
472: MatSeqAIJGetArray(lGe,&vals);
473: cum = 0;
474: for (i=0;i<ne;i++) {
475: /* eliminate rows corresponding to edge dofs belonging to coarse faces */
476: if (!PetscBTLookup(btee,i)) {
477: marks[cum++] = i;
478: continue;
479: }
480: /* set badly connected edge dofs as primal */
481: if (!conforming) {
482: if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
483: marks[cum++] = i;
484: PetscBTSet(bte,i);
485: for (j=ii[i];j<ii[i+1];j++) {
486: PetscBTSet(btv,jj[j]);
487: }
488: } else {
489: /* every edge dofs should be connected trough a certain number of nodal dofs
490: to other edge dofs belonging to coarse edges
491: - at most 2 endpoints
492: - order-1 interior nodal dofs
493: - no undefined nodal dofs (nconn < order)
494: */
495: PetscInt ends = 0,ints = 0, undef = 0;
496: for (j=ii[i];j<ii[i+1];j++) {
497: PetscInt v = jj[j],k;
498: PetscInt nconn = iit[v+1]-iit[v];
499: for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
500: if (nconn > order) ends++;
501: else if (nconn == order) ints++;
502: else undef++;
503: }
504: if (undef || ends > 2 || ints != order -1) {
505: marks[cum++] = i;
506: PetscBTSet(bte,i);
507: for (j=ii[i];j<ii[i+1];j++) {
508: PetscBTSet(btv,jj[j]);
509: }
510: }
511: }
512: }
513: /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
514: if (!order && ii[i+1] != ii[i]) {
515: PetscScalar val = 1./(ii[i+1]-ii[i]-1);
516: for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
517: }
518: }
519: PetscBTDestroy(&btee);
520: MatSeqAIJRestoreArray(lGe,&vals);
521: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
522: if (!conforming) {
523: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
524: MatDestroy(&lGt);
525: }
526: MatZeroRows(lGe,cum,marks,0.,NULL,NULL);
528: /* identify splitpoints and corner candidates */
529: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
530: if (print) {
531: PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
532: MatView(lGe,NULL);
533: PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
534: MatView(lGt,NULL);
535: }
536: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
537: MatSeqAIJGetArray(lGt,&vals);
538: for (i=0;i<nv;i++) {
539: PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
540: PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
541: if (!order) { /* variable order */
542: PetscReal vorder = 0.;
544: for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
545: test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
546: if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test);
547: ord = 1;
548: }
549: if (PetscUnlikelyDebug(test%ord)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord);
550: for (j=ii[i];j<ii[i+1] && sneighs;j++) {
551: if (PetscBTLookup(btbd,jj[j])) {
552: bdir = PETSC_TRUE;
553: break;
554: }
555: if (vc != ecount[jj[j]]) {
556: sneighs = PETSC_FALSE;
557: } else {
558: PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
559: for (k=0;k<vc;k++) {
560: if (vn[k] != en[k]) {
561: sneighs = PETSC_FALSE;
562: break;
563: }
564: }
565: }
566: }
567: if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
568: if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir);
569: PetscBTSet(btv,i);
570: } else if (test == ord) {
571: if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
572: if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i);
573: PetscBTSet(btv,i);
574: } else {
575: if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i);
576: PetscBTSet(btvcand,i);
577: }
578: }
579: }
580: ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);
581: ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);
582: PetscBTDestroy(&btbd);
584: /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
585: if (order != 1) {
586: if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
587: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
588: for (i=0;i<nv;i++) {
589: if (PetscBTLookup(btvcand,i)) {
590: PetscBool found = PETSC_FALSE;
591: for (j=ii[i];j<ii[i+1] && !found;j++) {
592: PetscInt k,e = jj[j];
593: if (PetscBTLookup(bte,e)) continue;
594: for (k=iit[e];k<iit[e+1];k++) {
595: PetscInt v = jjt[k];
596: if (v != i && PetscBTLookup(btvcand,v)) {
597: found = PETSC_TRUE;
598: break;
599: }
600: }
601: }
602: if (!found) {
603: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i);
604: PetscBTClear(btvcand,i);
605: } else {
606: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i);
607: }
608: }
609: }
610: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
611: }
612: MatSeqAIJRestoreArray(lGt,&vals);
613: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
614: MatDestroy(&lGe);
616: /* Get the local G^T explicitly */
617: MatDestroy(&lGt);
618: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
619: MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
621: /* Mark interior nodal dofs */
622: ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
623: PetscBTCreate(nv,&btvi);
624: for (i=1;i<n_neigh;i++) {
625: for (j=0;j<n_shared[i];j++) {
626: PetscBTSet(btvi,shared[i][j]);
627: }
628: }
629: ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
631: /* communicate corners and splitpoints */
632: PetscMalloc1(nv,&vmarks);
633: PetscArrayzero(sfvleaves,nv);
634: PetscArrayzero(sfvroots,Lv);
635: for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;
637: if (print) {
638: IS tbz;
640: cum = 0;
641: for (i=0;i<nv;i++)
642: if (sfvleaves[i])
643: vmarks[cum++] = i;
645: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
646: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
647: ISView(tbz,NULL);
648: ISDestroy(&tbz);
649: }
651: PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
652: PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
653: PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE);
654: PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves,MPI_REPLACE);
656: /* Zero rows of lGt corresponding to identified corners
657: and interior nodal dofs */
658: cum = 0;
659: for (i=0;i<nv;i++) {
660: if (sfvleaves[i]) {
661: vmarks[cum++] = i;
662: PetscBTSet(btv,i);
663: }
664: if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
665: }
666: PetscBTDestroy(&btvi);
667: if (print) {
668: IS tbz;
670: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
671: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
672: ISView(tbz,NULL);
673: ISDestroy(&tbz);
674: }
675: MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
676: PetscFree(vmarks);
677: PetscSFDestroy(&sfv);
678: PetscFree2(sfvleaves,sfvroots);
680: /* Recompute G */
681: MatDestroy(&lG);
682: MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
683: if (print) {
684: PetscObjectSetName((PetscObject)lG,"used_lG");
685: MatView(lG,NULL);
686: PetscObjectSetName((PetscObject)lGt,"used_lGt");
687: MatView(lGt,NULL);
688: }
690: /* Get primal dofs (if any) */
691: cum = 0;
692: for (i=0;i<ne;i++) {
693: if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
694: }
695: if (fl2g) {
696: ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
697: }
698: ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
699: if (print) {
700: PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
701: ISView(primals,NULL);
702: }
703: PetscBTDestroy(&bte);
704: /* TODO: what if the user passed in some of them ? */
705: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
706: ISDestroy(&primals);
708: /* Compute edge connectivity */
709: PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
711: /* Symbolic conn = lG*lGt */
712: MatProductCreate(lG,lGt,NULL,&conn);
713: MatProductSetType(conn,MATPRODUCT_AB);
714: MatProductSetAlgorithm(conn,"default");
715: MatProductSetFill(conn,PETSC_DEFAULT);
716: PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_");
717: MatProductSetFromOptions(conn);
718: MatProductSymbolic(conn);
720: MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
721: if (fl2g) {
722: PetscBT btf;
723: PetscInt *iia,*jja,*iiu,*jju;
724: PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;
726: /* create CSR for all local dofs */
727: PetscMalloc1(n+1,&iia);
728: if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
729: if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n);
730: iiu = pcbddc->mat_graph->xadj;
731: jju = pcbddc->mat_graph->adjncy;
732: } else if (pcbddc->use_local_adj) {
733: rest = PETSC_TRUE;
734: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
735: } else {
736: free = PETSC_TRUE;
737: PetscMalloc2(n+1,&iiu,n,&jju);
738: iiu[0] = 0;
739: for (i=0;i<n;i++) {
740: iiu[i+1] = i+1;
741: jju[i] = -1;
742: }
743: }
745: /* import sizes of CSR */
746: iia[0] = 0;
747: for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];
749: /* overwrite entries corresponding to the Nedelec field */
750: PetscBTCreate(n,&btf);
751: ISGetIndices(nedfieldlocal,&idxs);
752: for (i=0;i<ne;i++) {
753: PetscBTSet(btf,idxs[i]);
754: iia[idxs[i]+1] = ii[i+1]-ii[i];
755: }
757: /* iia in CSR */
758: for (i=0;i<n;i++) iia[i+1] += iia[i];
760: /* jja in CSR */
761: PetscMalloc1(iia[n],&jja);
762: for (i=0;i<n;i++)
763: if (!PetscBTLookup(btf,i))
764: for (j=0;j<iiu[i+1]-iiu[i];j++)
765: jja[iia[i]+j] = jju[iiu[i]+j];
767: /* map edge dofs connectivity */
768: if (jj) {
769: ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
770: for (i=0;i<ne;i++) {
771: PetscInt e = idxs[i];
772: for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
773: }
774: }
775: ISRestoreIndices(nedfieldlocal,&idxs);
776: PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
777: if (rest) {
778: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
779: }
780: if (free) {
781: PetscFree2(iiu,jju);
782: }
783: PetscBTDestroy(&btf);
784: } else {
785: PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
786: }
788: /* Analyze interface for edge dofs */
789: PCBDDCAnalyzeInterface(pc);
790: pcbddc->mat_graph->twodim = PETSC_FALSE;
792: /* Get coarse edges in the edge space */
793: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
794: MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
796: if (fl2g) {
797: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
798: PetscMalloc1(nee,&eedges);
799: for (i=0;i<nee;i++) {
800: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
801: }
802: } else {
803: eedges = alleedges;
804: primals = allprimals;
805: }
807: /* Mark fine edge dofs with their coarse edge id */
808: PetscArrayzero(marks,ne);
809: ISGetLocalSize(primals,&cum);
810: ISGetIndices(primals,&idxs);
811: for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
812: ISRestoreIndices(primals,&idxs);
813: if (print) {
814: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
815: ISView(primals,NULL);
816: }
818: maxsize = 0;
819: for (i=0;i<nee;i++) {
820: PetscInt size,mark = i+1;
822: ISGetLocalSize(eedges[i],&size);
823: ISGetIndices(eedges[i],&idxs);
824: for (j=0;j<size;j++) marks[idxs[j]] = mark;
825: ISRestoreIndices(eedges[i],&idxs);
826: maxsize = PetscMax(maxsize,size);
827: }
829: /* Find coarse edge endpoints */
830: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
831: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
832: for (i=0;i<nee;i++) {
833: PetscInt mark = i+1,size;
835: ISGetLocalSize(eedges[i],&size);
836: if (!size && nedfieldlocal) continue;
837: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
838: ISGetIndices(eedges[i],&idxs);
839: if (print) {
840: PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);
841: ISView(eedges[i],NULL);
842: }
843: for (j=0;j<size;j++) {
844: PetscInt k, ee = idxs[j];
845: if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee);
846: for (k=ii[ee];k<ii[ee+1];k++) {
847: if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]);
848: if (PetscBTLookup(btv,jj[k])) {
849: if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]);
850: } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
851: PetscInt k2;
852: PetscBool corner = PETSC_FALSE;
853: for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
854: if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
855: /* it's a corner if either is connected with an edge dof belonging to a different cc or
856: if the edge dof lie on the natural part of the boundary */
857: if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
858: corner = PETSC_TRUE;
859: break;
860: }
861: }
862: if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
863: if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]);
864: PetscBTSet(btv,jj[k]);
865: } else {
866: if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n");
867: }
868: }
869: }
870: }
871: ISRestoreIndices(eedges[i],&idxs);
872: }
873: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
874: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
875: PetscBTDestroy(&btb);
877: /* Reset marked primal dofs */
878: ISGetLocalSize(primals,&cum);
879: ISGetIndices(primals,&idxs);
880: for (i=0;i<cum;i++) marks[idxs[i]] = 0;
881: ISRestoreIndices(primals,&idxs);
883: /* Now use the initial lG */
884: MatDestroy(&lG);
885: MatDestroy(&lGt);
886: lG = lGinit;
887: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
889: /* Compute extended cols indices */
890: PetscBTCreate(nv,&btvc);
891: PetscBTCreate(nee,&bter);
892: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
893: MatSeqAIJGetMaxRowNonzeros(lG,&i);
894: i *= maxsize;
895: PetscCalloc1(nee,&extcols);
896: PetscMalloc2(i,&extrow,i,&gidxs);
897: eerr = PETSC_FALSE;
898: for (i=0;i<nee;i++) {
899: PetscInt size,found = 0;
901: cum = 0;
902: ISGetLocalSize(eedges[i],&size);
903: if (!size && nedfieldlocal) continue;
904: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
905: ISGetIndices(eedges[i],&idxs);
906: PetscBTMemzero(nv,btvc);
907: for (j=0;j<size;j++) {
908: PetscInt k,ee = idxs[j];
909: for (k=ii[ee];k<ii[ee+1];k++) {
910: PetscInt vv = jj[k];
911: if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
912: else if (!PetscBTLookupSet(btvc,vv)) found++;
913: }
914: }
915: ISRestoreIndices(eedges[i],&idxs);
916: PetscSortRemoveDupsInt(&cum,extrow);
917: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
918: PetscSortIntWithArray(cum,gidxs,extrow);
919: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
920: /* it may happen that endpoints are not defined at this point
921: if it is the case, mark this edge for a second pass */
922: if (cum != size -1 || found != 2) {
923: PetscBTSet(bter,i);
924: if (print) {
925: PetscObjectSetName((PetscObject)eedges[i],"error_edge");
926: ISView(eedges[i],NULL);
927: PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
928: ISView(extcols[i],NULL);
929: }
930: eerr = PETSC_TRUE;
931: }
932: }
933: /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
934: MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
935: if (done) {
936: PetscInt *newprimals;
938: PetscMalloc1(ne,&newprimals);
939: ISGetLocalSize(primals,&cum);
940: ISGetIndices(primals,&idxs);
941: PetscArraycpy(newprimals,idxs,cum);
942: ISRestoreIndices(primals,&idxs);
943: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
944: if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr);
945: for (i=0;i<nee;i++) {
946: PetscBool has_candidates = PETSC_FALSE;
947: if (PetscBTLookup(bter,i)) {
948: PetscInt size,mark = i+1;
950: ISGetLocalSize(eedges[i],&size);
951: ISGetIndices(eedges[i],&idxs);
952: /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
953: for (j=0;j<size;j++) {
954: PetscInt k,ee = idxs[j];
955: if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]);
956: for (k=ii[ee];k<ii[ee+1];k++) {
957: /* set all candidates located on the edge as corners */
958: if (PetscBTLookup(btvcand,jj[k])) {
959: PetscInt k2,vv = jj[k];
960: has_candidates = PETSC_TRUE;
961: if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv);
962: PetscBTSet(btv,vv);
963: /* set all edge dofs connected to candidate as primals */
964: for (k2=iit[vv];k2<iit[vv+1];k2++) {
965: if (marks[jjt[k2]] == mark) {
966: PetscInt k3,ee2 = jjt[k2];
967: if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2);
968: newprimals[cum++] = ee2;
969: /* finally set the new corners */
970: for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
971: if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]);
972: PetscBTSet(btv,jj[k3]);
973: }
974: }
975: }
976: } else {
977: if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]);
978: }
979: }
980: }
981: if (!has_candidates) { /* circular edge */
982: PetscInt k, ee = idxs[0],*tmarks;
984: PetscCalloc1(ne,&tmarks);
985: if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i);
986: for (k=ii[ee];k<ii[ee+1];k++) {
987: PetscInt k2;
988: if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]);
989: PetscBTSet(btv,jj[k]);
990: for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
991: }
992: for (j=0;j<size;j++) {
993: if (tmarks[idxs[j]] > 1) {
994: if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]);
995: newprimals[cum++] = idxs[j];
996: }
997: }
998: PetscFree(tmarks);
999: }
1000: ISRestoreIndices(eedges[i],&idxs);
1001: }
1002: ISDestroy(&extcols[i]);
1003: }
1004: PetscFree(extcols);
1005: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1006: PetscSortRemoveDupsInt(&cum,newprimals);
1007: if (fl2g) {
1008: ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1009: ISDestroy(&primals);
1010: for (i=0;i<nee;i++) {
1011: ISDestroy(&eedges[i]);
1012: }
1013: PetscFree(eedges);
1014: }
1015: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1016: ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1017: PetscFree(newprimals);
1018: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1019: ISDestroy(&primals);
1020: PCBDDCAnalyzeInterface(pc);
1021: pcbddc->mat_graph->twodim = PETSC_FALSE;
1022: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1023: if (fl2g) {
1024: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1025: PetscMalloc1(nee,&eedges);
1026: for (i=0;i<nee;i++) {
1027: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1028: }
1029: } else {
1030: eedges = alleedges;
1031: primals = allprimals;
1032: }
1033: PetscCalloc1(nee,&extcols);
1035: /* Mark again */
1036: PetscArrayzero(marks,ne);
1037: for (i=0;i<nee;i++) {
1038: PetscInt size,mark = i+1;
1040: ISGetLocalSize(eedges[i],&size);
1041: ISGetIndices(eedges[i],&idxs);
1042: for (j=0;j<size;j++) marks[idxs[j]] = mark;
1043: ISRestoreIndices(eedges[i],&idxs);
1044: }
1045: if (print) {
1046: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1047: ISView(primals,NULL);
1048: }
1050: /* Recompute extended cols */
1051: eerr = PETSC_FALSE;
1052: for (i=0;i<nee;i++) {
1053: PetscInt size;
1055: cum = 0;
1056: ISGetLocalSize(eedges[i],&size);
1057: if (!size && nedfieldlocal) continue;
1058: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1059: ISGetIndices(eedges[i],&idxs);
1060: for (j=0;j<size;j++) {
1061: PetscInt k,ee = idxs[j];
1062: for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1063: }
1064: ISRestoreIndices(eedges[i],&idxs);
1065: PetscSortRemoveDupsInt(&cum,extrow);
1066: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1067: PetscSortIntWithArray(cum,gidxs,extrow);
1068: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1069: if (cum != size -1) {
1070: if (print) {
1071: PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1072: ISView(eedges[i],NULL);
1073: PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1074: ISView(extcols[i],NULL);
1075: }
1076: eerr = PETSC_TRUE;
1077: }
1078: }
1079: }
1080: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1081: PetscFree2(extrow,gidxs);
1082: PetscBTDestroy(&bter);
1083: if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1084: /* an error should not occur at this point */
1085: if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1087: /* Check the number of endpoints */
1088: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1089: PetscMalloc1(2*nee,&corners);
1090: PetscMalloc1(nee,&cedges);
1091: for (i=0;i<nee;i++) {
1092: PetscInt size, found = 0, gc[2];
1094: /* init with defaults */
1095: cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1096: ISGetLocalSize(eedges[i],&size);
1097: if (!size && nedfieldlocal) continue;
1098: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1099: ISGetIndices(eedges[i],&idxs);
1100: PetscBTMemzero(nv,btvc);
1101: for (j=0;j<size;j++) {
1102: PetscInt k,ee = idxs[j];
1103: for (k=ii[ee];k<ii[ee+1];k++) {
1104: PetscInt vv = jj[k];
1105: if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1106: if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i);
1107: corners[i*2+found++] = vv;
1108: }
1109: }
1110: }
1111: if (found != 2) {
1112: PetscInt e;
1113: if (fl2g) {
1114: ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1115: } else {
1116: e = idxs[0];
1117: }
1118: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]);
1119: }
1121: /* get primal dof index on this coarse edge */
1122: ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1123: if (gc[0] > gc[1]) {
1124: PetscInt swap = corners[2*i];
1125: corners[2*i] = corners[2*i+1];
1126: corners[2*i+1] = swap;
1127: }
1128: cedges[i] = idxs[size-1];
1129: ISRestoreIndices(eedges[i],&idxs);
1130: if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1131: }
1132: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1133: PetscBTDestroy(&btvc);
1135: if (PetscDefined(USE_DEBUG)) {
1136: /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1137: not interfere with neighbouring coarse edges */
1138: PetscMalloc1(nee+1,&emarks);
1139: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1140: for (i=0;i<nv;i++) {
1141: PetscInt emax = 0,eemax = 0;
1143: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1144: PetscArrayzero(emarks,nee+1);
1145: for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1146: for (j=1;j<nee+1;j++) {
1147: if (emax < emarks[j]) {
1148: emax = emarks[j];
1149: eemax = j;
1150: }
1151: }
1152: /* not relevant for edges */
1153: if (!eemax) continue;
1155: for (j=ii[i];j<ii[i+1];j++) {
1156: if (marks[jj[j]] && marks[jj[j]] != eemax) {
1157: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]);
1158: }
1159: }
1160: }
1161: PetscFree(emarks);
1162: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1163: }
1165: /* Compute extended rows indices for edge blocks of the change of basis */
1166: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1167: MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1168: extmem *= maxsize;
1169: PetscMalloc1(extmem*nee,&extrow);
1170: PetscMalloc1(nee,&extrows);
1171: PetscCalloc1(nee,&extrowcum);
1172: for (i=0;i<nv;i++) {
1173: PetscInt mark = 0,size,start;
1175: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1176: for (j=ii[i];j<ii[i+1];j++)
1177: if (marks[jj[j]] && !mark)
1178: mark = marks[jj[j]];
1180: /* not relevant */
1181: if (!mark) continue;
1183: /* import extended row */
1184: mark--;
1185: start = mark*extmem+extrowcum[mark];
1186: size = ii[i+1]-ii[i];
1187: if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem);
1188: PetscArraycpy(extrow+start,jj+ii[i],size);
1189: extrowcum[mark] += size;
1190: }
1191: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1192: MatDestroy(&lGt);
1193: PetscFree(marks);
1195: /* Compress extrows */
1196: cum = 0;
1197: for (i=0;i<nee;i++) {
1198: PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1199: PetscSortRemoveDupsInt(&size,start);
1200: ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1201: cum = PetscMax(cum,size);
1202: }
1203: PetscFree(extrowcum);
1204: PetscBTDestroy(&btv);
1205: PetscBTDestroy(&btvcand);
1207: /* Workspace for lapack inner calls and VecSetValues */
1208: PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);
1210: /* Create change of basis matrix (preallocation can be improved) */
1211: MatCreate(comm,&T);
1212: MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1213: pc->pmat->rmap->N,pc->pmat->rmap->N);
1214: MatSetType(T,MATAIJ);
1215: MatSeqAIJSetPreallocation(T,10,NULL);
1216: MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1217: MatSetLocalToGlobalMapping(T,al2g,al2g);
1218: MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1219: MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1220: ISLocalToGlobalMappingDestroy(&al2g);
1222: /* Defaults to identity */
1223: MatCreateVecs(pc->pmat,&tvec,NULL);
1224: VecSet(tvec,1.0);
1225: MatDiagonalSet(T,tvec,INSERT_VALUES);
1226: VecDestroy(&tvec);
1228: /* Create discrete gradient for the coarser level if needed */
1229: MatDestroy(&pcbddc->nedcG);
1230: ISDestroy(&pcbddc->nedclocal);
1231: if (pcbddc->current_level < pcbddc->max_levels) {
1232: ISLocalToGlobalMapping cel2g,cvl2g;
1233: IS wis,gwis;
1234: PetscInt cnv,cne;
1236: ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1237: if (fl2g) {
1238: ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1239: } else {
1240: PetscObjectReference((PetscObject)wis);
1241: pcbddc->nedclocal = wis;
1242: }
1243: ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1244: ISDestroy(&wis);
1245: ISRenumber(gwis,NULL,&cne,&wis);
1246: ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1247: ISDestroy(&wis);
1248: ISDestroy(&gwis);
1250: ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1251: ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1252: ISDestroy(&wis);
1253: ISRenumber(gwis,NULL,&cnv,&wis);
1254: ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1255: ISDestroy(&wis);
1256: ISDestroy(&gwis);
1258: MatCreate(comm,&pcbddc->nedcG);
1259: MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1260: MatSetType(pcbddc->nedcG,MATAIJ);
1261: MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1262: MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1263: MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1264: ISLocalToGlobalMappingDestroy(&cel2g);
1265: ISLocalToGlobalMappingDestroy(&cvl2g);
1266: }
1267: ISLocalToGlobalMappingDestroy(&vl2g);
1269: #if defined(PRINT_GDET)
1270: inc = 0;
1271: lev = pcbddc->current_level;
1272: #endif
1274: /* Insert values in the change of basis matrix */
1275: for (i=0;i<nee;i++) {
1276: Mat Gins = NULL, GKins = NULL;
1277: IS cornersis = NULL;
1278: PetscScalar cvals[2];
1280: if (pcbddc->nedcG) {
1281: ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1282: }
1283: PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1284: if (Gins && GKins) {
1285: const PetscScalar *data;
1286: const PetscInt *rows,*cols;
1287: PetscInt nrh,nch,nrc,ncc;
1289: ISGetIndices(eedges[i],&cols);
1290: /* H1 */
1291: ISGetIndices(extrows[i],&rows);
1292: MatGetSize(Gins,&nrh,&nch);
1293: MatDenseGetArrayRead(Gins,&data);
1294: MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1295: MatDenseRestoreArrayRead(Gins,&data);
1296: ISRestoreIndices(extrows[i],&rows);
1297: /* complement */
1298: MatGetSize(GKins,&nrc,&ncc);
1299: if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i);
1300: if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i);
1301: if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc);
1302: MatDenseGetArrayRead(GKins,&data);
1303: MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1304: MatDenseRestoreArrayRead(GKins,&data);
1306: /* coarse discrete gradient */
1307: if (pcbddc->nedcG) {
1308: PetscInt cols[2];
1310: cols[0] = 2*i;
1311: cols[1] = 2*i+1;
1312: MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1313: }
1314: ISRestoreIndices(eedges[i],&cols);
1315: }
1316: ISDestroy(&extrows[i]);
1317: ISDestroy(&extcols[i]);
1318: ISDestroy(&cornersis);
1319: MatDestroy(&Gins);
1320: MatDestroy(&GKins);
1321: }
1322: ISLocalToGlobalMappingDestroy(&el2g);
1324: /* Start assembling */
1325: MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1326: if (pcbddc->nedcG) {
1327: MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1328: }
1330: /* Free */
1331: if (fl2g) {
1332: ISDestroy(&primals);
1333: for (i=0;i<nee;i++) {
1334: ISDestroy(&eedges[i]);
1335: }
1336: PetscFree(eedges);
1337: }
1339: /* hack mat_graph with primal dofs on the coarse edges */
1340: {
1341: PCBDDCGraph graph = pcbddc->mat_graph;
1342: PetscInt *oqueue = graph->queue;
1343: PetscInt *ocptr = graph->cptr;
1344: PetscInt ncc,*idxs;
1346: /* find first primal edge */
1347: if (pcbddc->nedclocal) {
1348: ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1349: } else {
1350: if (fl2g) {
1351: ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1352: }
1353: idxs = cedges;
1354: }
1355: cum = 0;
1356: while (cum < nee && cedges[cum] < 0) cum++;
1358: /* adapt connected components */
1359: PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1360: graph->cptr[0] = 0;
1361: for (i=0,ncc=0;i<graph->ncc;i++) {
1362: PetscInt lc = ocptr[i+1]-ocptr[i];
1363: if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1364: graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1365: graph->queue[graph->cptr[ncc]] = cedges[cum];
1366: ncc++;
1367: lc--;
1368: cum++;
1369: while (cum < nee && cedges[cum] < 0) cum++;
1370: }
1371: graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1372: for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1373: ncc++;
1374: }
1375: graph->ncc = ncc;
1376: if (pcbddc->nedclocal) {
1377: ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1378: }
1379: PetscFree2(ocptr,oqueue);
1380: }
1381: ISLocalToGlobalMappingDestroy(&fl2g);
1382: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1383: PCBDDCGraphResetCSR(pcbddc->mat_graph);
1384: MatDestroy(&conn);
1386: ISDestroy(&nedfieldlocal);
1387: PetscFree(extrow);
1388: PetscFree2(work,rwork);
1389: PetscFree(corners);
1390: PetscFree(cedges);
1391: PetscFree(extrows);
1392: PetscFree(extcols);
1393: MatDestroy(&lG);
1395: /* Complete assembling */
1396: MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1397: if (pcbddc->nedcG) {
1398: MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1399: #if 0
1400: PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1401: MatView(pcbddc->nedcG,NULL);
1402: #endif
1403: }
1405: /* set change of basis */
1406: PCBDDCSetChangeOfBasisMat(pc,T,singular);
1407: MatDestroy(&T);
1409: return(0);
1410: }
1412: /* the near-null space of BDDC carries information on quadrature weights,
1413: and these can be collinear -> so cheat with MatNullSpaceCreate
1414: and create a suitable set of basis vectors first */
1415: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1416: {
1418: PetscInt i;
1421: for (i=0;i<nvecs;i++) {
1422: PetscInt first,last;
1424: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1425: if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1426: if (i>=first && i < last) {
1427: PetscScalar *data;
1428: VecGetArray(quad_vecs[i],&data);
1429: if (!has_const) {
1430: data[i-first] = 1.;
1431: } else {
1432: data[2*i-first] = 1./PetscSqrtReal(2.);
1433: data[2*i-first+1] = -1./PetscSqrtReal(2.);
1434: }
1435: VecRestoreArray(quad_vecs[i],&data);
1436: }
1437: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1438: }
1439: MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1440: for (i=0;i<nvecs;i++) { /* reset vectors */
1441: PetscInt first,last;
1442: VecLockReadPop(quad_vecs[i]);
1443: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1444: if (i>=first && i < last) {
1445: PetscScalar *data;
1446: VecGetArray(quad_vecs[i],&data);
1447: if (!has_const) {
1448: data[i-first] = 0.;
1449: } else {
1450: data[2*i-first] = 0.;
1451: data[2*i-first+1] = 0.;
1452: }
1453: VecRestoreArray(quad_vecs[i],&data);
1454: }
1455: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1456: VecLockReadPush(quad_vecs[i]);
1457: }
1458: return(0);
1459: }
1461: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1462: {
1463: Mat loc_divudotp;
1464: Vec p,v,vins,quad_vec,*quad_vecs;
1465: ISLocalToGlobalMapping map;
1466: PetscScalar *vals;
1467: const PetscScalar *array;
1468: PetscInt i,maxneighs = 0,maxsize,*gidxs;
1469: PetscInt n_neigh,*neigh,*n_shared,**shared;
1470: PetscMPIInt rank;
1471: PetscErrorCode ierr;
1474: ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1475: for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs);
1476: MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1477: if (!maxneighs) {
1478: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1479: *nnsp = NULL;
1480: return(0);
1481: }
1482: maxsize = 0;
1483: for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1484: PetscMalloc2(maxsize,&gidxs,maxsize,&vals);
1485: /* create vectors to hold quadrature weights */
1486: MatCreateVecs(A,&quad_vec,NULL);
1487: if (!transpose) {
1488: MatGetLocalToGlobalMapping(A,&map,NULL);
1489: } else {
1490: MatGetLocalToGlobalMapping(A,NULL,&map);
1491: }
1492: VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1493: VecDestroy(&quad_vec);
1494: PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1495: for (i=0;i<maxneighs;i++) {
1496: VecLockReadPop(quad_vecs[i]);
1497: }
1499: /* compute local quad vec */
1500: MatISGetLocalMat(divudotp,&loc_divudotp);
1501: if (!transpose) {
1502: MatCreateVecs(loc_divudotp,&v,&p);
1503: } else {
1504: MatCreateVecs(loc_divudotp,&p,&v);
1505: }
1506: VecSet(p,1.);
1507: if (!transpose) {
1508: MatMultTranspose(loc_divudotp,p,v);
1509: } else {
1510: MatMult(loc_divudotp,p,v);
1511: }
1512: if (vl2l) {
1513: Mat lA;
1514: VecScatter sc;
1516: MatISGetLocalMat(A,&lA);
1517: MatCreateVecs(lA,&vins,NULL);
1518: VecScatterCreate(v,NULL,vins,vl2l,&sc);
1519: VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1520: VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1521: VecScatterDestroy(&sc);
1522: } else {
1523: vins = v;
1524: }
1525: VecGetArrayRead(vins,&array);
1526: VecDestroy(&p);
1528: /* insert in global quadrature vecs */
1529: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1530: for (i=1;i<n_neigh;i++) {
1531: const PetscInt *idxs;
1532: PetscInt idx,nn,j;
1534: idxs = shared[i];
1535: nn = n_shared[i];
1536: for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1537: PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1538: idx = -(idx+1);
1539: if (idx < 0 || idx >= maxneighs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %D not in [0,%D)",idx,maxneighs);
1540: ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);
1541: VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);
1542: }
1543: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1544: VecRestoreArrayRead(vins,&array);
1545: if (vl2l) {
1546: VecDestroy(&vins);
1547: }
1548: VecDestroy(&v);
1549: PetscFree2(gidxs,vals);
1551: /* assemble near null space */
1552: for (i=0;i<maxneighs;i++) {
1553: VecAssemblyBegin(quad_vecs[i]);
1554: }
1555: for (i=0;i<maxneighs;i++) {
1556: VecAssemblyEnd(quad_vecs[i]);
1557: VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");
1558: VecLockReadPush(quad_vecs[i]);
1559: }
1560: VecDestroyVecs(maxneighs,&quad_vecs);
1561: return(0);
1562: }
1564: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1565: {
1566: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1570: if (primalv) {
1571: if (pcbddc->user_primal_vertices_local) {
1572: IS list[2], newp;
1574: list[0] = primalv;
1575: list[1] = pcbddc->user_primal_vertices_local;
1576: ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);
1577: ISSortRemoveDups(newp);
1578: ISDestroy(&list[1]);
1579: pcbddc->user_primal_vertices_local = newp;
1580: } else {
1581: PCBDDCSetPrimalVerticesLocalIS(pc,primalv);
1582: }
1583: }
1584: return(0);
1585: }
1587: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1588: {
1589: PetscInt f, *comp = (PetscInt *)ctx;
1592: for (f=0;f<Nf;f++) out[f] = X[*comp];
1593: return(0);
1594: }
1596: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1597: {
1599: Vec local,global;
1600: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1601: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
1602: PetscBool monolithic = PETSC_FALSE;
1605: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1606: PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1607: PetscOptionsEnd();
1608: /* need to convert from global to local topology information and remove references to information in global ordering */
1609: MatCreateVecs(pc->pmat,&global,NULL);
1610: MatCreateVecs(matis->A,&local,NULL);
1611: VecBindToCPU(global,PETSC_TRUE);
1612: VecBindToCPU(local,PETSC_TRUE);
1613: if (monolithic) { /* just get block size to properly compute vertices */
1614: if (pcbddc->vertex_size == 1) {
1615: MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);
1616: }
1617: goto boundary;
1618: }
1620: if (pcbddc->user_provided_isfordofs) {
1621: if (pcbddc->n_ISForDofs) {
1622: PetscInt i;
1624: PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1625: for (i=0;i<pcbddc->n_ISForDofs;i++) {
1626: PetscInt bs;
1628: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1629: ISGetBlockSize(pcbddc->ISForDofs[i],&bs);
1630: ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1631: ISDestroy(&pcbddc->ISForDofs[i]);
1632: }
1633: pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1634: pcbddc->n_ISForDofs = 0;
1635: PetscFree(pcbddc->ISForDofs);
1636: }
1637: } else {
1638: if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1639: DM dm;
1641: MatGetDM(pc->pmat, &dm);
1642: if (!dm) {
1643: PCGetDM(pc, &dm);
1644: }
1645: if (dm) {
1646: IS *fields;
1647: PetscInt nf,i;
1649: DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1650: PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1651: for (i=0;i<nf;i++) {
1652: PetscInt bs;
1654: PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1655: ISGetBlockSize(fields[i],&bs);
1656: ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1657: ISDestroy(&fields[i]);
1658: }
1659: PetscFree(fields);
1660: pcbddc->n_ISForDofsLocal = nf;
1661: } else { /* See if MATIS has fields attached by the conversion from MatNest */
1662: PetscContainer c;
1664: PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1665: if (c) {
1666: MatISLocalFields lf;
1667: PetscContainerGetPointer(c,(void**)&lf);
1668: PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1669: } else { /* fallback, create the default fields if bs > 1 */
1670: PetscInt i, n = matis->A->rmap->n;
1671: MatGetBlockSize(pc->pmat,&i);
1672: if (i > 1) {
1673: pcbddc->n_ISForDofsLocal = i;
1674: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1675: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1676: ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1677: }
1678: }
1679: }
1680: }
1681: } else {
1682: PetscInt i;
1683: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1684: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1685: }
1686: }
1687: }
1689: boundary:
1690: if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1691: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1692: } else if (pcbddc->DirichletBoundariesLocal) {
1693: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1694: }
1695: if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1696: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1697: } else if (pcbddc->NeumannBoundariesLocal) {
1698: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1699: }
1700: if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1701: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1702: }
1703: VecDestroy(&global);
1704: VecDestroy(&local);
1705: /* detect local disconnected subdomains if requested (use matis->A) */
1706: if (pcbddc->detect_disconnected) {
1707: IS primalv = NULL;
1708: PetscInt i;
1709: PetscBool filter = pcbddc->detect_disconnected_filter;
1711: for (i=0;i<pcbddc->n_local_subs;i++) {
1712: ISDestroy(&pcbddc->local_subs[i]);
1713: }
1714: PetscFree(pcbddc->local_subs);
1715: PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);
1716: PCBDDCAddPrimalVerticesLocalIS(pc,primalv);
1717: ISDestroy(&primalv);
1718: }
1719: /* early stage corner detection */
1720: {
1721: DM dm;
1723: MatGetDM(pc->pmat,&dm);
1724: if (!dm) {
1725: PCGetDM(pc,&dm);
1726: }
1727: if (dm) {
1728: PetscBool isda;
1730: PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);
1731: if (isda) {
1732: ISLocalToGlobalMapping l2l;
1733: IS corners;
1734: Mat lA;
1735: PetscBool gl,lo;
1737: {
1738: Vec cvec;
1739: const PetscScalar *coords;
1740: PetscInt dof,n,cdim;
1741: PetscBool memc = PETSC_TRUE;
1743: DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1744: DMGetCoordinates(dm,&cvec);
1745: VecGetLocalSize(cvec,&n);
1746: VecGetBlockSize(cvec,&cdim);
1747: n /= cdim;
1748: PetscFree(pcbddc->mat_graph->coords);
1749: PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);
1750: VecGetArrayRead(cvec,&coords);
1751: #if defined(PETSC_USE_COMPLEX)
1752: memc = PETSC_FALSE;
1753: #endif
1754: if (dof != 1) memc = PETSC_FALSE;
1755: if (memc) {
1756: PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof);
1757: } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1758: PetscReal *bcoords = pcbddc->mat_graph->coords;
1759: PetscInt i, b, d;
1761: for (i=0;i<n;i++) {
1762: for (b=0;b<dof;b++) {
1763: for (d=0;d<cdim;d++) {
1764: bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]);
1765: }
1766: }
1767: }
1768: }
1769: VecRestoreArrayRead(cvec,&coords);
1770: pcbddc->mat_graph->cdim = cdim;
1771: pcbddc->mat_graph->cnloc = dof*n;
1772: pcbddc->mat_graph->cloc = PETSC_FALSE;
1773: }
1774: DMDAGetSubdomainCornersIS(dm,&corners);
1775: MatISGetLocalMat(pc->pmat,&lA);
1776: MatGetLocalToGlobalMapping(lA,&l2l,NULL);
1777: MatISRestoreLocalMat(pc->pmat,&lA);
1778: lo = (PetscBool)(l2l && corners);
1779: MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1780: if (gl) { /* From PETSc's DMDA */
1781: const PetscInt *idx;
1782: PetscInt dof,bs,*idxout,n;
1784: DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1785: ISLocalToGlobalMappingGetBlockSize(l2l,&bs);
1786: ISGetLocalSize(corners,&n);
1787: ISGetIndices(corners,&idx);
1788: if (bs == dof) {
1789: PetscMalloc1(n,&idxout);
1790: ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);
1791: } else { /* the original DMDA local-to-local map have been modified */
1792: PetscInt i,d;
1794: PetscMalloc1(dof*n,&idxout);
1795: for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d;
1796: ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);
1798: bs = 1;
1799: n *= dof;
1800: }
1801: ISRestoreIndices(corners,&idx);
1802: DMDARestoreSubdomainCornersIS(dm,&corners);
1803: ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);
1804: PCBDDCAddPrimalVerticesLocalIS(pc,corners);
1805: ISDestroy(&corners);
1806: pcbddc->corner_selected = PETSC_TRUE;
1807: pcbddc->corner_selection = PETSC_TRUE;
1808: }
1809: if (corners) {
1810: DMDARestoreSubdomainCornersIS(dm,&corners);
1811: }
1812: }
1813: }
1814: }
1815: if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1816: DM dm;
1818: MatGetDM(pc->pmat,&dm);
1819: if (!dm) {
1820: PCGetDM(pc,&dm);
1821: }
1822: if (dm) { /* this can get very expensive, I need to find a faster alternative */
1823: Vec vcoords;
1824: PetscSection section;
1825: PetscReal *coords;
1826: PetscInt d,cdim,nl,nf,**ctxs;
1827: PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1829: DMGetCoordinateDim(dm,&cdim);
1830: DMGetLocalSection(dm,§ion);
1831: PetscSectionGetNumFields(section,&nf);
1832: DMCreateGlobalVector(dm,&vcoords);
1833: VecGetLocalSize(vcoords,&nl);
1834: PetscMalloc1(nl*cdim,&coords);
1835: PetscMalloc2(nf,&funcs,nf,&ctxs);
1836: PetscMalloc1(nf,&ctxs[0]);
1837: for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1838: for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;
1839: for (d=0;d<cdim;d++) {
1840: PetscInt i;
1841: const PetscScalar *v;
1843: for (i=0;i<nf;i++) ctxs[i][0] = d;
1844: DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);
1845: VecGetArrayRead(vcoords,&v);
1846: for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1847: VecRestoreArrayRead(vcoords,&v);
1848: }
1849: VecDestroy(&vcoords);
1850: PCSetCoordinates(pc,cdim,nl,coords);
1851: PetscFree(coords);
1852: PetscFree(ctxs[0]);
1853: PetscFree2(funcs,ctxs);
1854: }
1855: }
1856: return(0);
1857: }
1859: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1860: {
1861: Mat_IS *matis = (Mat_IS*)(pc->pmat->data);
1862: PetscErrorCode ierr;
1863: IS nis;
1864: const PetscInt *idxs;
1865: PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1866: PetscBool *ld;
1869: if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1870: if (mop == MPI_LAND) {
1871: /* init rootdata with true */
1872: ld = (PetscBool*) matis->sf_rootdata;
1873: for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1874: } else {
1875: PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
1876: }
1877: PetscArrayzero(matis->sf_leafdata,n);
1878: ISGetLocalSize(*is,&nd);
1879: ISGetIndices(*is,&idxs);
1880: ld = (PetscBool*) matis->sf_leafdata;
1881: for (i=0;i<nd;i++)
1882: if (-1 < idxs[i] && idxs[i] < n)
1883: ld[idxs[i]] = PETSC_TRUE;
1884: ISRestoreIndices(*is,&idxs);
1885: PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1886: PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1887: PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
1888: PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata,MPI_REPLACE);
1889: if (mop == MPI_LAND) {
1890: PetscMalloc1(nd,&nidxs);
1891: } else {
1892: PetscMalloc1(n,&nidxs);
1893: }
1894: for (i=0,nnd=0;i<n;i++)
1895: if (ld[i])
1896: nidxs[nnd++] = i;
1897: ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1898: ISDestroy(is);
1899: *is = nis;
1900: return(0);
1901: }
1903: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1904: {
1905: PC_IS *pcis = (PC_IS*)(pc->data);
1906: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
1907: PetscErrorCode ierr;
1910: if (!pcbddc->benign_have_null) {
1911: return(0);
1912: }
1913: if (pcbddc->ChangeOfBasisMatrix) {
1914: Vec swap;
1916: MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1917: swap = pcbddc->work_change;
1918: pcbddc->work_change = r;
1919: r = swap;
1920: }
1921: VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1922: VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1923: PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);
1924: KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1925: PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0],pc,0,0,0);
1926: KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
1927: VecSet(z,0.);
1928: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1929: VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1930: if (pcbddc->ChangeOfBasisMatrix) {
1931: pcbddc->work_change = r;
1932: VecCopy(z,pcbddc->work_change);
1933: MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1934: }
1935: return(0);
1936: }
1938: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1939: {
1940: PCBDDCBenignMatMult_ctx ctx;
1941: PetscErrorCode ierr;
1942: PetscBool apply_right,apply_left,reset_x;
1945: MatShellGetContext(A,&ctx);
1946: if (transpose) {
1947: apply_right = ctx->apply_left;
1948: apply_left = ctx->apply_right;
1949: } else {
1950: apply_right = ctx->apply_right;
1951: apply_left = ctx->apply_left;
1952: }
1953: reset_x = PETSC_FALSE;
1954: if (apply_right) {
1955: const PetscScalar *ax;
1956: PetscInt nl,i;
1958: VecGetLocalSize(x,&nl);
1959: VecGetArrayRead(x,&ax);
1960: PetscArraycpy(ctx->work,ax,nl);
1961: VecRestoreArrayRead(x,&ax);
1962: for (i=0;i<ctx->benign_n;i++) {
1963: PetscScalar sum,val;
1964: const PetscInt *idxs;
1965: PetscInt nz,j;
1966: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1967: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1968: sum = 0.;
1969: if (ctx->apply_p0) {
1970: val = ctx->work[idxs[nz-1]];
1971: for (j=0;j<nz-1;j++) {
1972: sum += ctx->work[idxs[j]];
1973: ctx->work[idxs[j]] += val;
1974: }
1975: } else {
1976: for (j=0;j<nz-1;j++) {
1977: sum += ctx->work[idxs[j]];
1978: }
1979: }
1980: ctx->work[idxs[nz-1]] -= sum;
1981: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1982: }
1983: VecPlaceArray(x,ctx->work);
1984: reset_x = PETSC_TRUE;
1985: }
1986: if (transpose) {
1987: MatMultTranspose(ctx->A,x,y);
1988: } else {
1989: MatMult(ctx->A,x,y);
1990: }
1991: if (reset_x) {
1992: VecResetArray(x);
1993: }
1994: if (apply_left) {
1995: PetscScalar *ay;
1996: PetscInt i;
1998: VecGetArray(y,&ay);
1999: for (i=0;i<ctx->benign_n;i++) {
2000: PetscScalar sum,val;
2001: const PetscInt *idxs;
2002: PetscInt nz,j;
2003: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
2004: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
2005: val = -ay[idxs[nz-1]];
2006: if (ctx->apply_p0) {
2007: sum = 0.;
2008: for (j=0;j<nz-1;j++) {
2009: sum += ay[idxs[j]];
2010: ay[idxs[j]] += val;
2011: }
2012: ay[idxs[nz-1]] += sum;
2013: } else {
2014: for (j=0;j<nz-1;j++) {
2015: ay[idxs[j]] += val;
2016: }
2017: ay[idxs[nz-1]] = 0.;
2018: }
2019: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
2020: }
2021: VecRestoreArray(y,&ay);
2022: }
2023: return(0);
2024: }
2026: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2027: {
2031: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
2032: return(0);
2033: }
2035: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2036: {
2040: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
2041: return(0);
2042: }
2044: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2045: {
2046: PC_IS *pcis = (PC_IS*)pc->data;
2047: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2048: PCBDDCBenignMatMult_ctx ctx;
2049: PetscErrorCode ierr;
2052: if (!restore) {
2053: Mat A_IB,A_BI;
2054: PetscScalar *work;
2055: PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2057: if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
2058: if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
2059: PetscMalloc1(pcis->n,&work);
2060: MatCreate(PETSC_COMM_SELF,&A_IB);
2061: MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
2062: MatSetType(A_IB,MATSHELL);
2063: MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
2064: MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
2065: PetscNew(&ctx);
2066: MatShellSetContext(A_IB,ctx);
2067: ctx->apply_left = PETSC_TRUE;
2068: ctx->apply_right = PETSC_FALSE;
2069: ctx->apply_p0 = PETSC_FALSE;
2070: ctx->benign_n = pcbddc->benign_n;
2071: if (reuse) {
2072: ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2073: ctx->free = PETSC_FALSE;
2074: } else { /* TODO: could be optimized for successive solves */
2075: ISLocalToGlobalMapping N_to_D;
2076: PetscInt i;
2078: ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
2079: PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
2080: for (i=0;i<pcbddc->benign_n;i++) {
2081: ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
2082: }
2083: ISLocalToGlobalMappingDestroy(&N_to_D);
2084: ctx->free = PETSC_TRUE;
2085: }
2086: ctx->A = pcis->A_IB;
2087: ctx->work = work;
2088: MatSetUp(A_IB);
2089: MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
2090: MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
2091: pcis->A_IB = A_IB;
2093: /* A_BI as A_IB^T */
2094: MatCreateTranspose(A_IB,&A_BI);
2095: pcbddc->benign_original_mat = pcis->A_BI;
2096: pcis->A_BI = A_BI;
2097: } else {
2098: if (!pcbddc->benign_original_mat) {
2099: return(0);
2100: }
2101: MatShellGetContext(pcis->A_IB,&ctx);
2102: MatDestroy(&pcis->A_IB);
2103: pcis->A_IB = ctx->A;
2104: ctx->A = NULL;
2105: MatDestroy(&pcis->A_BI);
2106: pcis->A_BI = pcbddc->benign_original_mat;
2107: pcbddc->benign_original_mat = NULL;
2108: if (ctx->free) {
2109: PetscInt i;
2110: for (i=0;i<ctx->benign_n;i++) {
2111: ISDestroy(&ctx->benign_zerodiag_subs[i]);
2112: }
2113: PetscFree(ctx->benign_zerodiag_subs);
2114: }
2115: PetscFree(ctx->work);
2116: PetscFree(ctx);
2117: }
2118: return(0);
2119: }
2121: /* used just in bddc debug mode */
2122: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2123: {
2124: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2125: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2126: Mat An;
2130: MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2131: MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2132: if (is1) {
2133: MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2134: MatDestroy(&An);
2135: } else {
2136: *B = An;
2137: }
2138: return(0);
2139: }
2141: /* TODO: add reuse flag */
2142: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2143: {
2144: Mat Bt;
2145: PetscScalar *a,*bdata;
2146: const PetscInt *ii,*ij;
2147: PetscInt m,n,i,nnz,*bii,*bij;
2148: PetscBool flg_row;
2152: MatGetSize(A,&n,&m);
2153: MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2154: MatSeqAIJGetArray(A,&a);
2155: nnz = n;
2156: for (i=0;i<ii[n];i++) {
2157: if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2158: }
2159: PetscMalloc1(n+1,&bii);
2160: PetscMalloc1(nnz,&bij);
2161: PetscMalloc1(nnz,&bdata);
2162: nnz = 0;
2163: bii[0] = 0;
2164: for (i=0;i<n;i++) {
2165: PetscInt j;
2166: for (j=ii[i];j<ii[i+1];j++) {
2167: PetscScalar entry = a[j];
2168: if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2169: bij[nnz] = ij[j];
2170: bdata[nnz] = entry;
2171: nnz++;
2172: }
2173: }
2174: bii[i+1] = nnz;
2175: }
2176: MatSeqAIJRestoreArray(A,&a);
2177: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2178: MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2179: {
2180: Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2181: b->free_a = PETSC_TRUE;
2182: b->free_ij = PETSC_TRUE;
2183: }
2184: if (*B == A) {
2185: MatDestroy(&A);
2186: }
2187: *B = Bt;
2188: return(0);
2189: }
2191: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv)
2192: {
2193: Mat B = NULL;
2194: DM dm;
2195: IS is_dummy,*cc_n;
2196: ISLocalToGlobalMapping l2gmap_dummy;
2197: PCBDDCGraph graph;
2198: PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL;
2199: PetscInt i,n;
2200: PetscInt *xadj,*adjncy;
2201: PetscBool isplex = PETSC_FALSE;
2202: PetscErrorCode ierr;
2205: if (ncc) *ncc = 0;
2206: if (cc) *cc = NULL;
2207: if (primalv) *primalv = NULL;
2208: PCBDDCGraphCreate(&graph);
2209: MatGetDM(pc->pmat,&dm);
2210: if (!dm) {
2211: PCGetDM(pc,&dm);
2212: }
2213: if (dm) {
2214: PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2215: }
2216: if (filter) isplex = PETSC_FALSE;
2218: if (isplex) { /* this code has been modified from plexpartition.c */
2219: PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots;
2220: PetscInt *adj = NULL;
2221: IS cellNumbering;
2222: const PetscInt *cellNum;
2223: PetscBool useCone, useClosure;
2224: PetscSection section;
2225: PetscSegBuffer adjBuffer;
2226: PetscSF sfPoint;
2229: DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2230: DMGetPointSF(dm, &sfPoint);
2231: PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2232: /* Build adjacency graph via a section/segbuffer */
2233: PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);
2234: PetscSectionSetChart(section, pStart, pEnd);
2235: PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2236: /* Always use FVM adjacency to create partitioner graph */
2237: DMGetBasicAdjacency(dm, &useCone, &useClosure);
2238: DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);
2239: DMPlexGetCellNumbering(dm, &cellNumbering);
2240: ISGetIndices(cellNumbering, &cellNum);
2241: for (n = 0, p = pStart; p < pEnd; p++) {
2242: /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2243: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2244: adjSize = PETSC_DETERMINE;
2245: DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2246: for (a = 0; a < adjSize; ++a) {
2247: const PetscInt point = adj[a];
2248: if (pStart <= point && point < pEnd) {
2249: PetscInt *PETSC_RESTRICT pBuf;
2250: PetscSectionAddDof(section, p, 1);
2251: PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2252: *pBuf = point;
2253: }
2254: }
2255: n++;
2256: }
2257: DMSetBasicAdjacency(dm, useCone, useClosure);
2258: /* Derive CSR graph from section/segbuffer */
2259: PetscSectionSetUp(section);
2260: PetscSectionGetStorageSize(section, &size);
2261: PetscMalloc1(n+1, &xadj);
2262: for (idx = 0, p = pStart; p < pEnd; p++) {
2263: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2264: PetscSectionGetOffset(section, p, &(xadj[idx++]));
2265: }
2266: xadj[n] = size;
2267: PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2268: /* Clean up */
2269: PetscSegBufferDestroy(&adjBuffer);
2270: PetscSectionDestroy(§ion);
2271: PetscFree(adj);
2272: graph->xadj = xadj;
2273: graph->adjncy = adjncy;
2274: } else {
2275: Mat A;
2276: PetscBool isseqaij, flg_row;
2278: MatISGetLocalMat(pc->pmat,&A);
2279: if (!A->rmap->N || !A->cmap->N) {
2280: PCBDDCGraphDestroy(&graph);
2281: return(0);
2282: }
2283: PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2284: if (!isseqaij && filter) {
2285: PetscBool isseqdense;
2287: PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2288: if (!isseqdense) {
2289: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2290: } else { /* TODO: rectangular case and LDA */
2291: PetscScalar *array;
2292: PetscReal chop=1.e-6;
2294: MatDuplicate(A,MAT_COPY_VALUES,&B);
2295: MatDenseGetArray(B,&array);
2296: MatGetSize(B,&n,NULL);
2297: for (i=0;i<n;i++) {
2298: PetscInt j;
2299: for (j=i+1;j<n;j++) {
2300: PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2301: if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2302: if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2303: }
2304: }
2305: MatDenseRestoreArray(B,&array);
2306: MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2307: }
2308: } else {
2309: PetscObjectReference((PetscObject)A);
2310: B = A;
2311: }
2312: MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2314: /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2315: if (filter) {
2316: PetscScalar *data;
2317: PetscInt j,cum;
2319: PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2320: MatSeqAIJGetArray(B,&data);
2321: cum = 0;
2322: for (i=0;i<n;i++) {
2323: PetscInt t;
2325: for (j=xadj[i];j<xadj[i+1];j++) {
2326: if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2327: continue;
2328: }
2329: adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2330: }
2331: t = xadj_filtered[i];
2332: xadj_filtered[i] = cum;
2333: cum += t;
2334: }
2335: MatSeqAIJRestoreArray(B,&data);
2336: graph->xadj = xadj_filtered;
2337: graph->adjncy = adjncy_filtered;
2338: } else {
2339: graph->xadj = xadj;
2340: graph->adjncy = adjncy;
2341: }
2342: }
2343: /* compute local connected components using PCBDDCGraph */
2344: ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2345: ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2346: ISDestroy(&is_dummy);
2347: PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2348: ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2349: PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2350: PCBDDCGraphComputeConnectedComponents(graph);
2352: /* partial clean up */
2353: PetscFree2(xadj_filtered,adjncy_filtered);
2354: if (B) {
2355: PetscBool flg_row;
2356: MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2357: MatDestroy(&B);
2358: }
2359: if (isplex) {
2360: PetscFree(xadj);
2361: PetscFree(adjncy);
2362: }
2364: /* get back data */
2365: if (isplex) {
2366: if (ncc) *ncc = graph->ncc;
2367: if (cc || primalv) {
2368: Mat A;
2369: PetscBT btv,btvt;
2370: PetscSection subSection;
2371: PetscInt *ids,cum,cump,*cids,*pids;
2373: DMPlexGetSubdomainSection(dm,&subSection);
2374: MatISGetLocalMat(pc->pmat,&A);
2375: PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2376: PetscBTCreate(A->rmap->n,&btv);
2377: PetscBTCreate(A->rmap->n,&btvt);
2379: cids[0] = 0;
2380: for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2381: PetscInt j;
2383: PetscBTMemzero(A->rmap->n,btvt);
2384: for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2385: PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2387: DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2388: for (k = 0; k < 2*size; k += 2) {
2389: PetscInt s, pp, p = closure[k], off, dof, cdof;
2391: PetscSectionGetConstraintDof(subSection,p,&cdof);
2392: PetscSectionGetOffset(subSection,p,&off);
2393: PetscSectionGetDof(subSection,p,&dof);
2394: for (s = 0; s < dof-cdof; s++) {
2395: if (PetscBTLookupSet(btvt,off+s)) continue;
2396: if (!PetscBTLookup(btv,off+s)) {
2397: ids[cum++] = off+s;
2398: } else { /* cross-vertex */
2399: pids[cump++] = off+s;
2400: }
2401: }
2402: DMPlexGetTreeParent(dm,p,&pp,NULL);
2403: if (pp != p) {
2404: PetscSectionGetConstraintDof(subSection,pp,&cdof);
2405: PetscSectionGetOffset(subSection,pp,&off);
2406: PetscSectionGetDof(subSection,pp,&dof);
2407: for (s = 0; s < dof-cdof; s++) {
2408: if (PetscBTLookupSet(btvt,off+s)) continue;
2409: if (!PetscBTLookup(btv,off+s)) {
2410: ids[cum++] = off+s;
2411: } else { /* cross-vertex */
2412: pids[cump++] = off+s;
2413: }
2414: }
2415: }
2416: }
2417: DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2418: }
2419: cids[i+1] = cum;
2420: /* mark dofs as already assigned */
2421: for (j = cids[i]; j < cids[i+1]; j++) {
2422: PetscBTSet(btv,ids[j]);
2423: }
2424: }
2425: if (cc) {
2426: PetscMalloc1(graph->ncc,&cc_n);
2427: for (i = 0; i < graph->ncc; i++) {
2428: ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2429: }
2430: *cc = cc_n;
2431: }
2432: if (primalv) {
2433: ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2434: }
2435: PetscFree3(ids,cids,pids);
2436: PetscBTDestroy(&btv);
2437: PetscBTDestroy(&btvt);
2438: }
2439: } else {
2440: if (ncc) *ncc = graph->ncc;
2441: if (cc) {
2442: PetscMalloc1(graph->ncc,&cc_n);
2443: for (i=0;i<graph->ncc;i++) {
2444: ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2445: }
2446: *cc = cc_n;
2447: }
2448: }
2449: /* clean up graph */
2450: graph->xadj = NULL;
2451: graph->adjncy = NULL;
2452: PCBDDCGraphDestroy(&graph);
2453: return(0);
2454: }
2456: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2457: {
2458: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2459: PC_IS* pcis = (PC_IS*)(pc->data);
2460: IS dirIS = NULL;
2461: PetscInt i;
2465: PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2466: if (zerodiag) {
2467: Mat A;
2468: Vec vec3_N;
2469: PetscScalar *vals;
2470: const PetscInt *idxs;
2471: PetscInt nz,*count;
2473: /* p0 */
2474: VecSet(pcis->vec1_N,0.);
2475: PetscMalloc1(pcis->n,&vals);
2476: ISGetLocalSize(zerodiag,&nz);
2477: ISGetIndices(zerodiag,&idxs);
2478: for (i=0;i<nz;i++) vals[i] = 1.;
2479: VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2480: VecAssemblyBegin(pcis->vec1_N);
2481: VecAssemblyEnd(pcis->vec1_N);
2482: /* v_I */
2483: VecSetRandom(pcis->vec2_N,NULL);
2484: for (i=0;i<nz;i++) vals[i] = 0.;
2485: VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2486: ISRestoreIndices(zerodiag,&idxs);
2487: ISGetIndices(pcis->is_B_local,&idxs);
2488: for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2489: VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2490: ISRestoreIndices(pcis->is_B_local,&idxs);
2491: if (dirIS) {
2492: PetscInt n;
2494: ISGetLocalSize(dirIS,&n);
2495: ISGetIndices(dirIS,&idxs);
2496: for (i=0;i<n;i++) vals[i] = 0.;
2497: VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2498: ISRestoreIndices(dirIS,&idxs);
2499: }
2500: VecAssemblyBegin(pcis->vec2_N);
2501: VecAssemblyEnd(pcis->vec2_N);
2502: VecDuplicate(pcis->vec1_N,&vec3_N);
2503: VecSet(vec3_N,0.);
2504: MatISGetLocalMat(pc->pmat,&A);
2505: MatMult(A,pcis->vec1_N,vec3_N);
2506: VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2507: if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2508: PetscFree(vals);
2509: VecDestroy(&vec3_N);
2511: /* there should not be any pressure dofs lying on the interface */
2512: PetscCalloc1(pcis->n,&count);
2513: ISGetIndices(pcis->is_B_local,&idxs);
2514: for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2515: ISRestoreIndices(pcis->is_B_local,&idxs);
2516: ISGetIndices(zerodiag,&idxs);
2517: for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]);
2518: ISRestoreIndices(zerodiag,&idxs);
2519: PetscFree(count);
2520: }
2521: ISDestroy(&dirIS);
2523: /* check PCBDDCBenignGetOrSetP0 */
2524: VecSetRandom(pcis->vec1_global,NULL);
2525: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2526: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2527: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2528: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2529: for (i=0;i<pcbddc->benign_n;i++) {
2530: PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2531: if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2532: }
2533: return(0);
2534: }
2536: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2537: {
2538: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2539: IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs;
2540: PetscInt nz,n,benign_n,bsp = 1;
2541: PetscInt *interior_dofs,n_interior_dofs,nneu;
2542: PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;
2546: if (reuse) goto project_b0;
2547: PetscSFDestroy(&pcbddc->benign_sf);
2548: MatDestroy(&pcbddc->benign_B0);
2549: for (n=0;n<pcbddc->benign_n;n++) {
2550: ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2551: }
2552: PetscFree(pcbddc->benign_zerodiag_subs);
2553: has_null_pressures = PETSC_TRUE;
2554: have_null = PETSC_TRUE;
2555: /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2556: Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2557: Checks if all the pressure dofs in each subdomain have a zero diagonal
2558: If not, a change of basis on pressures is not needed
2559: since the local Schur complements are already SPD
2560: */
2561: if (pcbddc->n_ISForDofsLocal) {
2562: IS iP = NULL;
2563: PetscInt p,*pp;
2564: PetscBool flg;
2566: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);
2567: n = pcbddc->n_ISForDofsLocal;
2568: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2569: PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);
2570: PetscOptionsEnd();
2571: if (!flg) {
2572: n = 1;
2573: pp[0] = pcbddc->n_ISForDofsLocal-1;
2574: }
2576: bsp = 0;
2577: for (p=0;p<n;p++) {
2578: PetscInt bs;
2580: if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]);
2581: ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2582: bsp += bs;
2583: }
2584: PetscMalloc1(bsp,&bzerodiag);
2585: bsp = 0;
2586: for (p=0;p<n;p++) {
2587: const PetscInt *idxs;
2588: PetscInt b,bs,npl,*bidxs;
2590: ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2591: ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);
2592: ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2593: PetscMalloc1(npl/bs,&bidxs);
2594: for (b=0;b<bs;b++) {
2595: PetscInt i;
2597: for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b];
2598: ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);
2599: bsp++;
2600: }
2601: PetscFree(bidxs);
2602: ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2603: }
2604: ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);
2606: /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2607: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2608: if (iP) {
2609: IS newpressures;
2611: ISDifference(pressures,iP,&newpressures);
2612: ISDestroy(&pressures);
2613: pressures = newpressures;
2614: }
2615: ISSorted(pressures,&sorted);
2616: if (!sorted) {
2617: ISSort(pressures);
2618: }
2619: PetscFree(pp);
2620: }
2622: /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2623: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2624: if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2625: MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2626: ISSorted(zerodiag,&sorted);
2627: if (!sorted) {
2628: ISSort(zerodiag);
2629: }
2630: PetscObjectReference((PetscObject)zerodiag);
2631: zerodiag_save = zerodiag;
2632: ISGetLocalSize(zerodiag,&nz);
2633: if (!nz) {
2634: if (n) have_null = PETSC_FALSE;
2635: has_null_pressures = PETSC_FALSE;
2636: ISDestroy(&zerodiag);
2637: }
2638: recompute_zerodiag = PETSC_FALSE;
2640: /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2641: zerodiag_subs = NULL;
2642: benign_n = 0;
2643: n_interior_dofs = 0;
2644: interior_dofs = NULL;
2645: nneu = 0;
2646: if (pcbddc->NeumannBoundariesLocal) {
2647: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2648: }
2649: checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2650: if (checkb) { /* need to compute interior nodes */
2651: PetscInt n,i,j;
2652: PetscInt n_neigh,*neigh,*n_shared,**shared;
2653: PetscInt *iwork;
2655: ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2656: ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2657: PetscCalloc1(n,&iwork);
2658: PetscMalloc1(n,&interior_dofs);
2659: for (i=1;i<n_neigh;i++)
2660: for (j=0;j<n_shared[i];j++)
2661: iwork[shared[i][j]] += 1;
2662: for (i=0;i<n;i++)
2663: if (!iwork[i])
2664: interior_dofs[n_interior_dofs++] = i;
2665: PetscFree(iwork);
2666: ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2667: }
2668: if (has_null_pressures) {
2669: IS *subs;
2670: PetscInt nsubs,i,j,nl;
2671: const PetscInt *idxs;
2672: PetscScalar *array;
2673: Vec *work;
2674: Mat_IS* matis = (Mat_IS*)(pc->pmat->data);
2676: subs = pcbddc->local_subs;
2677: nsubs = pcbddc->n_local_subs;
2678: /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2679: if (checkb) {
2680: VecDuplicateVecs(matis->y,2,&work);
2681: ISGetLocalSize(zerodiag,&nl);
2682: ISGetIndices(zerodiag,&idxs);
2683: /* work[0] = 1_p */
2684: VecSet(work[0],0.);
2685: VecGetArray(work[0],&array);
2686: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2687: VecRestoreArray(work[0],&array);
2688: /* work[0] = 1_v */
2689: VecSet(work[1],1.);
2690: VecGetArray(work[1],&array);
2691: for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2692: VecRestoreArray(work[1],&array);
2693: ISRestoreIndices(zerodiag,&idxs);
2694: }
2696: if (nsubs > 1 || bsp > 1) {
2697: IS *is;
2698: PetscInt b,totb;
2700: totb = bsp;
2701: is = bsp > 1 ? bzerodiag : &zerodiag;
2702: nsubs = PetscMax(nsubs,1);
2703: PetscCalloc1(nsubs*totb,&zerodiag_subs);
2704: for (b=0;b<totb;b++) {
2705: for (i=0;i<nsubs;i++) {
2706: ISLocalToGlobalMapping l2g;
2707: IS t_zerodiag_subs;
2708: PetscInt nl;
2710: if (subs) {
2711: ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2712: } else {
2713: IS tis;
2715: MatGetLocalSize(pcbddc->local_mat,&nl,NULL);
2716: ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);
2717: ISLocalToGlobalMappingCreateIS(tis,&l2g);
2718: ISDestroy(&tis);
2719: }
2720: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);
2721: ISGetLocalSize(t_zerodiag_subs,&nl);
2722: if (nl) {
2723: PetscBool valid = PETSC_TRUE;
2725: if (checkb) {
2726: VecSet(matis->x,0);
2727: ISGetLocalSize(subs[i],&nl);
2728: ISGetIndices(subs[i],&idxs);
2729: VecGetArray(matis->x,&array);
2730: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2731: VecRestoreArray(matis->x,&array);
2732: ISRestoreIndices(subs[i],&idxs);
2733: VecPointwiseMult(matis->x,work[0],matis->x);
2734: MatMult(matis->A,matis->x,matis->y);
2735: VecPointwiseMult(matis->y,work[1],matis->y);
2736: VecGetArray(matis->y,&array);
2737: for (j=0;j<n_interior_dofs;j++) {
2738: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2739: valid = PETSC_FALSE;
2740: break;
2741: }
2742: }
2743: VecRestoreArray(matis->y,&array);
2744: }
2745: if (valid && nneu) {
2746: const PetscInt *idxs;
2747: PetscInt nzb;
2749: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2750: ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2751: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2752: if (nzb) valid = PETSC_FALSE;
2753: }
2754: if (valid && pressures) {
2755: IS t_pressure_subs,tmp;
2756: PetscInt i1,i2;
2758: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2759: ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);
2760: ISGetLocalSize(tmp,&i1);
2761: ISGetLocalSize(t_zerodiag_subs,&i2);
2762: if (i2 != i1) valid = PETSC_FALSE;
2763: ISDestroy(&t_pressure_subs);
2764: ISDestroy(&tmp);
2765: }
2766: if (valid) {
2767: ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);
2768: benign_n++;
2769: } else recompute_zerodiag = PETSC_TRUE;
2770: }
2771: ISDestroy(&t_zerodiag_subs);
2772: ISLocalToGlobalMappingDestroy(&l2g);
2773: }
2774: }
2775: } else { /* there's just one subdomain (or zero if they have not been detected */
2776: PetscBool valid = PETSC_TRUE;
2778: if (nneu) valid = PETSC_FALSE;
2779: if (valid && pressures) {
2780: ISEqual(pressures,zerodiag,&valid);
2781: }
2782: if (valid && checkb) {
2783: MatMult(matis->A,work[0],matis->x);
2784: VecPointwiseMult(matis->x,work[1],matis->x);
2785: VecGetArray(matis->x,&array);
2786: for (j=0;j<n_interior_dofs;j++) {
2787: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2788: valid = PETSC_FALSE;
2789: break;
2790: }
2791: }
2792: VecRestoreArray(matis->x,&array);
2793: }
2794: if (valid) {
2795: benign_n = 1;
2796: PetscMalloc1(benign_n,&zerodiag_subs);
2797: PetscObjectReference((PetscObject)zerodiag);
2798: zerodiag_subs[0] = zerodiag;
2799: }
2800: }
2801: if (checkb) {
2802: VecDestroyVecs(2,&work);
2803: }
2804: }
2805: PetscFree(interior_dofs);
2807: if (!benign_n) {
2808: PetscInt n;
2810: ISDestroy(&zerodiag);
2811: recompute_zerodiag = PETSC_FALSE;
2812: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2813: if (n) have_null = PETSC_FALSE;
2814: }
2816: /* final check for null pressures */
2817: if (zerodiag && pressures) {
2818: ISEqual(pressures,zerodiag,&have_null);
2819: }
2821: if (recompute_zerodiag) {
2822: ISDestroy(&zerodiag);
2823: if (benign_n == 1) {
2824: PetscObjectReference((PetscObject)zerodiag_subs[0]);
2825: zerodiag = zerodiag_subs[0];
2826: } else {
2827: PetscInt i,nzn,*new_idxs;
2829: nzn = 0;
2830: for (i=0;i<benign_n;i++) {
2831: PetscInt ns;
2832: ISGetLocalSize(zerodiag_subs[i],&ns);
2833: nzn += ns;
2834: }
2835: PetscMalloc1(nzn,&new_idxs);
2836: nzn = 0;
2837: for (i=0;i<benign_n;i++) {
2838: PetscInt ns,*idxs;
2839: ISGetLocalSize(zerodiag_subs[i],&ns);
2840: ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2841: PetscArraycpy(new_idxs+nzn,idxs,ns);
2842: ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2843: nzn += ns;
2844: }
2845: PetscSortInt(nzn,new_idxs);
2846: ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2847: }
2848: have_null = PETSC_FALSE;
2849: }
2851: /* determines if the coarse solver will be singular or not */
2852: MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
2854: /* Prepare matrix to compute no-net-flux */
2855: if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2856: Mat A,loc_divudotp;
2857: ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2858: IS row,col,isused = NULL;
2859: PetscInt M,N,n,st,n_isused;
2861: if (pressures) {
2862: isused = pressures;
2863: } else {
2864: isused = zerodiag_save;
2865: }
2866: MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2867: MatISGetLocalMat(pc->pmat,&A);
2868: MatGetLocalSize(A,&n,NULL);
2869: if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2870: n_isused = 0;
2871: if (isused) {
2872: ISGetLocalSize(isused,&n_isused);
2873: }
2874: MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2875: st = st-n_isused;
2876: if (n) {
2877: const PetscInt *gidxs;
2879: MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2880: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2881: /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2882: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2883: ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2884: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2885: } else {
2886: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2887: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2888: ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2889: }
2890: MatGetSize(pc->pmat,NULL,&N);
2891: ISGetSize(row,&M);
2892: ISLocalToGlobalMappingCreateIS(row,&rl2g);
2893: ISLocalToGlobalMappingCreateIS(col,&cl2g);
2894: ISDestroy(&row);
2895: ISDestroy(&col);
2896: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2897: MatSetType(pcbddc->divudotp,MATIS);
2898: MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2899: MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2900: ISLocalToGlobalMappingDestroy(&rl2g);
2901: ISLocalToGlobalMappingDestroy(&cl2g);
2902: MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2903: MatDestroy(&loc_divudotp);
2904: MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2905: MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2906: }
2907: ISDestroy(&zerodiag_save);
2908: ISDestroy(&pressures);
2909: if (bzerodiag) {
2910: PetscInt i;
2912: for (i=0;i<bsp;i++) {
2913: ISDestroy(&bzerodiag[i]);
2914: }
2915: PetscFree(bzerodiag);
2916: }
2917: pcbddc->benign_n = benign_n;
2918: pcbddc->benign_zerodiag_subs = zerodiag_subs;
2920: /* determines if the problem has subdomains with 0 pressure block */
2921: have_null = (PetscBool)(!!pcbddc->benign_n);
2922: MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2924: project_b0:
2925: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2926: /* change of basis and p0 dofs */
2927: if (pcbddc->benign_n) {
2928: PetscInt i,s,*nnz;
2930: /* local change of basis for pressures */
2931: MatDestroy(&pcbddc->benign_change);
2932: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2933: MatSetType(pcbddc->benign_change,MATAIJ);
2934: MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2935: PetscMalloc1(n,&nnz);
2936: for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */
2937: for (i=0;i<pcbddc->benign_n;i++) {
2938: const PetscInt *idxs;
2939: PetscInt nzs,j;
2941: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);
2942: ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2943: for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2944: nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2945: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2946: }
2947: MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2948: MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2949: PetscFree(nnz);
2950: /* set identity by default */
2951: for (i=0;i<n;i++) {
2952: MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);
2953: }
2954: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2955: PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2956: /* set change on pressures */
2957: for (s=0;s<pcbddc->benign_n;s++) {
2958: PetscScalar *array;
2959: const PetscInt *idxs;
2960: PetscInt nzs;
2962: ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);
2963: ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2964: for (i=0;i<nzs-1;i++) {
2965: PetscScalar vals[2];
2966: PetscInt cols[2];
2968: cols[0] = idxs[i];
2969: cols[1] = idxs[nzs-1];
2970: vals[0] = 1.;
2971: vals[1] = 1.;
2972: MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2973: }
2974: PetscMalloc1(nzs,&array);
2975: for (i=0;i<nzs-1;i++) array[i] = -1.;
2976: array[nzs-1] = 1.;
2977: MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2978: /* store local idxs for p0 */
2979: pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2980: ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2981: PetscFree(array);
2982: }
2983: MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2984: MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2986: /* project if needed */
2987: if (pcbddc->benign_change_explicit) {
2988: Mat M;
2990: MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2991: MatDestroy(&pcbddc->local_mat);
2992: MatSeqAIJCompress(M,&pcbddc->local_mat);
2993: MatDestroy(&M);
2994: }
2995: /* store global idxs for p0 */
2996: ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2997: }
2998: *zerodiaglocal = zerodiag;
2999: return(0);
3000: }
3002: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
3003: {
3004: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3005: PetscScalar *array;
3009: if (!pcbddc->benign_sf) {
3010: PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
3011: PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
3012: }
3013: if (get) {
3014: VecGetArrayRead(v,(const PetscScalar**)&array);
3015: PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE);
3016: PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0,MPI_REPLACE);
3017: VecRestoreArrayRead(v,(const PetscScalar**)&array);
3018: } else {
3019: VecGetArray(v,&array);
3020: PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE);
3021: PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPI_REPLACE);
3022: VecRestoreArray(v,&array);
3023: }
3024: return(0);
3025: }
3027: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3028: {
3029: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3033: /* TODO: add error checking
3034: - avoid nested pop (or push) calls.
3035: - cannot push before pop.
3036: - cannot call this if pcbddc->local_mat is NULL
3037: */
3038: if (!pcbddc->benign_n) {
3039: return(0);
3040: }
3041: if (pop) {
3042: if (pcbddc->benign_change_explicit) {
3043: IS is_p0;
3044: MatReuse reuse;
3046: /* extract B_0 */
3047: reuse = MAT_INITIAL_MATRIX;
3048: if (pcbddc->benign_B0) {
3049: reuse = MAT_REUSE_MATRIX;
3050: }
3051: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
3052: MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
3053: /* remove rows and cols from local problem */
3054: MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
3055: MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
3056: MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
3057: ISDestroy(&is_p0);
3058: } else {
3059: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
3060: PetscScalar *vals;
3061: PetscInt i,n,*idxs_ins;
3063: VecGetLocalSize(matis->y,&n);
3064: PetscMalloc2(n,&idxs_ins,n,&vals);
3065: if (!pcbddc->benign_B0) {
3066: PetscInt *nnz;
3067: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
3068: MatSetType(pcbddc->benign_B0,MATAIJ);
3069: MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
3070: PetscMalloc1(pcbddc->benign_n,&nnz);
3071: for (i=0;i<pcbddc->benign_n;i++) {
3072: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
3073: nnz[i] = n - nnz[i];
3074: }
3075: MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
3076: MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
3077: PetscFree(nnz);
3078: }
3080: for (i=0;i<pcbddc->benign_n;i++) {
3081: PetscScalar *array;
3082: PetscInt *idxs,j,nz,cum;
3084: VecSet(matis->x,0.);
3085: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
3086: ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3087: for (j=0;j<nz;j++) vals[j] = 1.;
3088: VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
3089: VecAssemblyBegin(matis->x);
3090: VecAssemblyEnd(matis->x);
3091: VecSet(matis->y,0.);
3092: MatMult(matis->A,matis->x,matis->y);
3093: VecGetArray(matis->y,&array);
3094: cum = 0;
3095: for (j=0;j<n;j++) {
3096: if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3097: vals[cum] = array[j];
3098: idxs_ins[cum] = j;
3099: cum++;
3100: }
3101: }
3102: MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
3103: VecRestoreArray(matis->y,&array);
3104: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3105: }
3106: MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3107: MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3108: PetscFree2(idxs_ins,vals);
3109: }
3110: } else { /* push */
3111: if (pcbddc->benign_change_explicit) {
3112: PetscInt i;
3114: for (i=0;i<pcbddc->benign_n;i++) {
3115: PetscScalar *B0_vals;
3116: PetscInt *B0_cols,B0_ncol;
3118: MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3119: MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
3120: MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
3121: MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
3122: MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3123: }
3124: MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3125: MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3126: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!");
3127: }
3128: return(0);
3129: }
3131: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3132: {
3133: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3134: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3135: PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork;
3136: PetscBLASInt *B_iwork,*B_ifail;
3137: PetscScalar *work,lwork;
3138: PetscScalar *St,*S,*eigv;
3139: PetscScalar *Sarray,*Starray;
3140: PetscReal *eigs,thresh,lthresh,uthresh;
3141: PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3142: PetscBool allocated_S_St;
3143: #if defined(PETSC_USE_COMPLEX)
3144: PetscReal *rwork;
3145: #endif
3146: PetscErrorCode ierr;
3149: if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
3150: if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3151: if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef);
3152: PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3154: if (pcbddc->dbg_flag) {
3155: PetscViewerFlush(pcbddc->dbg_viewer);
3156: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3157: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
3158: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
3159: }
3161: if (pcbddc->dbg_flag) {
3162: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
3163: }
3165: /* max size of subsets */
3166: mss = 0;
3167: for (i=0;i<sub_schurs->n_subs;i++) {
3168: PetscInt subset_size;
3170: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3171: mss = PetscMax(mss,subset_size);
3172: }
3174: /* min/max and threshold */
3175: nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3176: nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3177: nmax = PetscMax(nmin,nmax);
3178: allocated_S_St = PETSC_FALSE;
3179: if (nmin || !sub_schurs->is_posdef) { /* XXX */
3180: allocated_S_St = PETSC_TRUE;
3181: }
3183: /* allocate lapack workspace */
3184: cum = cum2 = 0;
3185: maxneigs = 0;
3186: for (i=0;i<sub_schurs->n_subs;i++) {
3187: PetscInt n,subset_size;
3189: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3190: n = PetscMin(subset_size,nmax);
3191: cum += subset_size;
3192: cum2 += subset_size*n;
3193: maxneigs = PetscMax(maxneigs,n);
3194: }
3195: lwork = 0;
3196: if (mss) {
3197: if (sub_schurs->is_symmetric) {
3198: PetscScalar sdummy = 0.;
3199: PetscBLASInt B_itype = 1;
3200: PetscBLASInt B_N = mss, idummy = 0;
3201: PetscReal rdummy = 0.,zero = 0.0;
3202: PetscReal eps = 0.0; /* dlamch? */
3204: B_lwork = -1;
3205: /* some implementations may complain about NULL pointers, even if we are querying */
3206: S = &sdummy;
3207: St = &sdummy;
3208: eigs = &rdummy;
3209: eigv = &sdummy;
3210: B_iwork = &idummy;
3211: B_ifail = &idummy;
3212: #if defined(PETSC_USE_COMPLEX)
3213: rwork = &rdummy;
3214: #endif
3215: thresh = 1.0;
3216: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3217: #if defined(PETSC_USE_COMPLEX)
3218: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3219: #else
3220: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3221: #endif
3222: if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3223: PetscFPTrapPop();
3224: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3225: }
3227: nv = 0;
3228: if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3229: ISGetLocalSize(sub_schurs->is_vertices,&nv);
3230: }
3231: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3232: if (allocated_S_St) {
3233: PetscMalloc2(mss*mss,&S,mss*mss,&St);
3234: }
3235: PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3236: #if defined(PETSC_USE_COMPLEX)
3237: PetscMalloc1(7*mss,&rwork);
3238: #endif
3239: PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3240: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3241: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3242: nv+cum,&pcbddc->adaptive_constraints_idxs,
3243: nv+cum2,&pcbddc->adaptive_constraints_data);
3244: PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs);
3246: maxneigs = 0;
3247: cum = cumarray = 0;
3248: pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3249: pcbddc->adaptive_constraints_data_ptr[0] = 0;
3250: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3251: const PetscInt *idxs;
3253: ISGetIndices(sub_schurs->is_vertices,&idxs);
3254: for (cum=0;cum<nv;cum++) {
3255: pcbddc->adaptive_constraints_n[cum] = 1;
3256: pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3257: pcbddc->adaptive_constraints_data[cum] = 1.0;
3258: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3259: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3260: }
3261: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3262: }
3264: if (mss) { /* multilevel */
3265: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3266: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3267: }
3269: lthresh = pcbddc->adaptive_threshold[0];
3270: uthresh = pcbddc->adaptive_threshold[1];
3271: for (i=0;i<sub_schurs->n_subs;i++) {
3272: const PetscInt *idxs;
3273: PetscReal upper,lower;
3274: PetscInt j,subset_size,eigs_start = 0;
3275: PetscBLASInt B_N;
3276: PetscBool same_data = PETSC_FALSE;
3277: PetscBool scal = PETSC_FALSE;
3279: if (pcbddc->use_deluxe_scaling) {
3280: upper = PETSC_MAX_REAL;
3281: lower = uthresh;
3282: } else {
3283: if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling");
3284: upper = 1./uthresh;
3285: lower = 0.;
3286: }
3287: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3288: ISGetIndices(sub_schurs->is_subs[i],&idxs);
3289: PetscBLASIntCast(subset_size,&B_N);
3290: /* this is experimental: we assume the dofs have been properly grouped to have
3291: the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3292: if (!sub_schurs->is_posdef) {
3293: Mat T;
3295: for (j=0;j<subset_size;j++) {
3296: if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3297: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);
3298: MatScale(T,-1.0);
3299: MatDestroy(&T);
3300: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);
3301: MatScale(T,-1.0);
3302: MatDestroy(&T);
3303: if (sub_schurs->change_primal_sub) {
3304: PetscInt nz,k;
3305: const PetscInt *idxs;
3307: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);
3308: ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);
3309: for (k=0;k<nz;k++) {
3310: *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3311: *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0;
3312: }
3313: ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);
3314: }
3315: scal = PETSC_TRUE;
3316: break;
3317: }
3318: }
3319: }
3321: if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3322: if (sub_schurs->is_symmetric) {
3323: PetscInt j,k;
3324: if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3325: PetscArrayzero(S,subset_size*subset_size);
3326: PetscArrayzero(St,subset_size*subset_size);
3327: }
3328: for (j=0;j<subset_size;j++) {
3329: for (k=j;k<subset_size;k++) {
3330: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3331: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3332: }
3333: }
3334: } else {
3335: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3336: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3337: }
3338: } else {
3339: S = Sarray + cumarray;
3340: St = Starray + cumarray;
3341: }
3342: /* see if we can save some work */
3343: if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3344: PetscArraycmp(S,St,subset_size*subset_size,&same_data);
3345: }
3347: if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3348: B_neigs = 0;
3349: } else {
3350: if (sub_schurs->is_symmetric) {
3351: PetscBLASInt B_itype = 1;
3352: PetscBLASInt B_IL, B_IU;
3353: PetscReal eps = -1.0; /* dlamch? */
3354: PetscInt nmin_s;
3355: PetscBool compute_range;
3357: B_neigs = 0;
3358: compute_range = (PetscBool)!same_data;
3359: if (nmin >= subset_size) compute_range = PETSC_FALSE;
3361: if (pcbddc->dbg_flag) {
3362: PetscInt nc = 0;
3364: if (sub_schurs->change_primal_sub) {
3365: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);
3366: }
3367: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);
3368: }
3370: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3371: if (compute_range) {
3373: /* ask for eigenvalues larger than thresh */
3374: if (sub_schurs->is_posdef) {
3375: #if defined(PETSC_USE_COMPLEX)
3376: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3377: #else
3378: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3379: #endif
3380: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3381: } else { /* no theory so far, but it works nicely */
3382: PetscInt recipe = 0,recipe_m = 1;
3383: PetscReal bb[2];
3385: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);
3386: switch (recipe) {
3387: case 0:
3388: if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3389: else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3390: #if defined(PETSC_USE_COMPLEX)
3391: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3392: #else
3393: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3394: #endif
3395: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3396: break;
3397: case 1:
3398: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3399: #if defined(PETSC_USE_COMPLEX)
3400: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3401: #else
3402: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3403: #endif
3404: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3405: if (!scal) {
3406: PetscBLASInt B_neigs2 = 0;
3408: bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3409: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3410: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3411: #if defined(PETSC_USE_COMPLEX)
3412: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3413: #else
3414: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3415: #endif
3416: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3417: B_neigs += B_neigs2;
3418: }
3419: break;
3420: case 2:
3421: if (scal) {
3422: bb[0] = PETSC_MIN_REAL;
3423: bb[1] = 0;
3424: #if defined(PETSC_USE_COMPLEX)
3425: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3426: #else
3427: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3428: #endif
3429: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3430: } else {
3431: PetscBLASInt B_neigs2 = 0;
3432: PetscBool import = PETSC_FALSE;
3434: lthresh = PetscMax(lthresh,0.0);
3435: if (lthresh > 0.0) {
3436: bb[0] = PETSC_MIN_REAL;
3437: bb[1] = lthresh*lthresh;
3439: import = PETSC_TRUE;
3440: #if defined(PETSC_USE_COMPLEX)
3441: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3442: #else
3443: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3444: #endif
3445: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3446: }
3447: bb[0] = PetscMax(lthresh*lthresh,uthresh);
3448: bb[1] = PETSC_MAX_REAL;
3449: if (import) {
3450: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3451: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3452: }
3453: #if defined(PETSC_USE_COMPLEX)
3454: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3455: #else
3456: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3457: #endif
3458: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3459: B_neigs += B_neigs2;
3460: }
3461: break;
3462: case 3:
3463: if (scal) {
3464: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);
3465: } else {
3466: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);
3467: }
3468: if (!scal) {
3469: bb[0] = uthresh;
3470: bb[1] = PETSC_MAX_REAL;
3471: #if defined(PETSC_USE_COMPLEX)
3472: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3473: #else
3474: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3475: #endif
3476: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3477: }
3478: if (recipe_m > 0 && B_N - B_neigs > 0) {
3479: PetscBLASInt B_neigs2 = 0;
3481: B_IL = 1;
3482: PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);
3483: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3484: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3485: #if defined(PETSC_USE_COMPLEX)
3486: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3487: #else
3488: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3489: #endif
3490: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3491: B_neigs += B_neigs2;
3492: }
3493: break;
3494: case 4:
3495: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3496: #if defined(PETSC_USE_COMPLEX)
3497: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3498: #else
3499: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3500: #endif
3501: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3502: {
3503: PetscBLASInt B_neigs2 = 0;
3505: bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3506: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3507: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3508: #if defined(PETSC_USE_COMPLEX)
3509: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3510: #else
3511: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3512: #endif
3513: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3514: B_neigs += B_neigs2;
3515: }
3516: break;
3517: case 5: /* same as before: first compute all eigenvalues, then filter */
3518: #if defined(PETSC_USE_COMPLEX)
3519: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3520: #else
3521: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3522: #endif
3523: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3524: {
3525: PetscInt e,k,ne;
3526: for (e=0,ne=0;e<B_neigs;e++) {
3527: if (eigs[e] < lthresh || eigs[e] > uthresh) {
3528: for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3529: eigs[ne] = eigs[e];
3530: ne++;
3531: }
3532: }
3533: PetscArraycpy(eigv,S,B_N*ne);
3534: B_neigs = ne;
3535: }
3536: break;
3537: default:
3538: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3539: }
3540: }
3541: } else if (!same_data) { /* this is just to see all the eigenvalues */
3542: B_IU = PetscMax(1,PetscMin(B_N,nmax));
3543: B_IL = 1;
3544: #if defined(PETSC_USE_COMPLEX)
3545: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3546: #else
3547: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3548: #endif
3549: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3550: } else { /* same_data is true, so just get the adaptive functional requested by the user */
3551: PetscInt k;
3552: if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3553: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3554: PetscBLASIntCast(nmax,&B_neigs);
3555: nmin = nmax;
3556: PetscArrayzero(eigv,subset_size*nmax);
3557: for (k=0;k<nmax;k++) {
3558: eigs[k] = 1./PETSC_SMALL;
3559: eigv[k*(subset_size+1)] = 1.0;
3560: }
3561: }
3562: PetscFPTrapPop();
3563: if (B_ierr) {
3564: if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3565: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3566: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3567: }
3569: if (B_neigs > nmax) {
3570: if (pcbddc->dbg_flag) {
3571: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);
3572: }
3573: if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3574: B_neigs = nmax;
3575: }
3577: nmin_s = PetscMin(nmin,B_N);
3578: if (B_neigs < nmin_s) {
3579: PetscBLASInt B_neigs2 = 0;
3581: if (pcbddc->use_deluxe_scaling) {
3582: if (scal) {
3583: B_IU = nmin_s;
3584: B_IL = B_neigs + 1;
3585: } else {
3586: B_IL = B_N - nmin_s + 1;
3587: B_IU = B_N - B_neigs;
3588: }
3589: } else {
3590: B_IL = B_neigs + 1;
3591: B_IU = nmin_s;
3592: }
3593: if (pcbddc->dbg_flag) {
3594: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3595: }
3596: if (sub_schurs->is_symmetric) {
3597: PetscInt j,k;
3598: for (j=0;j<subset_size;j++) {
3599: for (k=j;k<subset_size;k++) {
3600: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3601: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3602: }
3603: }
3604: } else {
3605: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3606: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3607: }
3608: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3609: #if defined(PETSC_USE_COMPLEX)
3610: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3611: #else
3612: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3613: #endif
3614: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3615: PetscFPTrapPop();
3616: B_neigs += B_neigs2;
3617: }
3618: if (B_ierr) {
3619: if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3620: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3621: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3622: }
3623: if (pcbddc->dbg_flag) {
3624: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);
3625: for (j=0;j<B_neigs;j++) {
3626: if (eigs[j] == 0.0) {
3627: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");
3628: } else {
3629: if (pcbddc->use_deluxe_scaling) {
3630: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);
3631: } else {
3632: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);
3633: }
3634: }
3635: }
3636: }
3637: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3638: }
3639: /* change the basis back to the original one */
3640: if (sub_schurs->change) {
3641: Mat change,phi,phit;
3643: if (pcbddc->dbg_flag > 2) {
3644: PetscInt ii;
3645: for (ii=0;ii<B_neigs;ii++) {
3646: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3647: for (j=0;j<B_N;j++) {
3648: #if defined(PETSC_USE_COMPLEX)
3649: PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3650: PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3651: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3652: #else
3653: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3654: #endif
3655: }
3656: }
3657: }
3658: KSPGetOperators(sub_schurs->change[i],&change,NULL);
3659: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3660: MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3661: MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3662: MatDestroy(&phit);
3663: MatDestroy(&phi);
3664: }
3665: maxneigs = PetscMax(B_neigs,maxneigs);
3666: pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3667: if (B_neigs) {
3668: PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size);
3670: if (pcbddc->dbg_flag > 1) {
3671: PetscInt ii;
3672: for (ii=0;ii<B_neigs;ii++) {
3673: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3674: for (j=0;j<B_N;j++) {
3675: #if defined(PETSC_USE_COMPLEX)
3676: PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3677: PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3678: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3679: #else
3680: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3681: #endif
3682: }
3683: }
3684: }
3685: PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size);
3686: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3687: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3688: cum++;
3689: }
3690: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3691: /* shift for next computation */
3692: cumarray += subset_size*subset_size;
3693: }
3694: if (pcbddc->dbg_flag) {
3695: PetscViewerFlush(pcbddc->dbg_viewer);
3696: }
3698: if (mss) {
3699: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3700: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3701: /* destroy matrices (junk) */
3702: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3703: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3704: }
3705: if (allocated_S_St) {
3706: PetscFree2(S,St);
3707: }
3708: PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3709: #if defined(PETSC_USE_COMPLEX)
3710: PetscFree(rwork);
3711: #endif
3712: if (pcbddc->dbg_flag) {
3713: PetscInt maxneigs_r;
3714: MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3715: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);
3716: }
3717: PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3718: return(0);
3719: }
3721: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3722: {
3723: PetscScalar *coarse_submat_vals;
3727: /* Setup local scatters R_to_B and (optionally) R_to_D */
3728: /* PCBDDCSetUpLocalWorkVectors should be called first! */
3729: PCBDDCSetUpLocalScatters(pc);
3731: /* Setup local neumann solver ksp_R */
3732: /* PCBDDCSetUpLocalScatters should be called first! */
3733: PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);
3735: /*
3736: Setup local correction and local part of coarse basis.
3737: Gives back the dense local part of the coarse matrix in column major ordering
3738: */
3739: PCBDDCSetUpCorrection(pc,&coarse_submat_vals);
3741: /* Compute total number of coarse nodes and setup coarse solver */
3742: PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);
3744: /* free */
3745: PetscFree(coarse_submat_vals);
3746: return(0);
3747: }
3749: PetscErrorCode PCBDDCResetCustomization(PC pc)
3750: {
3751: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3755: ISDestroy(&pcbddc->user_primal_vertices);
3756: ISDestroy(&pcbddc->user_primal_vertices_local);
3757: ISDestroy(&pcbddc->NeumannBoundaries);
3758: ISDestroy(&pcbddc->NeumannBoundariesLocal);
3759: ISDestroy(&pcbddc->DirichletBoundaries);
3760: MatNullSpaceDestroy(&pcbddc->onearnullspace);
3761: PetscFree(pcbddc->onearnullvecs_state);
3762: ISDestroy(&pcbddc->DirichletBoundariesLocal);
3763: PCBDDCSetDofsSplitting(pc,0,NULL);
3764: PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3765: return(0);
3766: }
3768: PetscErrorCode PCBDDCResetTopography(PC pc)
3769: {
3770: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3771: PetscInt i;
3775: MatDestroy(&pcbddc->nedcG);
3776: ISDestroy(&pcbddc->nedclocal);
3777: MatDestroy(&pcbddc->discretegradient);
3778: MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3779: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3780: MatDestroy(&pcbddc->switch_static_change);
3781: VecDestroy(&pcbddc->work_change);
3782: MatDestroy(&pcbddc->ConstraintMatrix);
3783: MatDestroy(&pcbddc->divudotp);
3784: ISDestroy(&pcbddc->divudotp_vl2l);
3785: PCBDDCGraphDestroy(&pcbddc->mat_graph);
3786: for (i=0;i<pcbddc->n_local_subs;i++) {
3787: ISDestroy(&pcbddc->local_subs[i]);
3788: }
3789: pcbddc->n_local_subs = 0;
3790: PetscFree(pcbddc->local_subs);
3791: PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3792: pcbddc->graphanalyzed = PETSC_FALSE;
3793: pcbddc->recompute_topography = PETSC_TRUE;
3794: pcbddc->corner_selected = PETSC_FALSE;
3795: return(0);
3796: }
3798: PetscErrorCode PCBDDCResetSolvers(PC pc)
3799: {
3800: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3804: VecDestroy(&pcbddc->coarse_vec);
3805: if (pcbddc->coarse_phi_B) {
3806: PetscScalar *array;
3807: MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3808: PetscFree(array);
3809: }
3810: MatDestroy(&pcbddc->coarse_phi_B);
3811: MatDestroy(&pcbddc->coarse_phi_D);
3812: MatDestroy(&pcbddc->coarse_psi_B);
3813: MatDestroy(&pcbddc->coarse_psi_D);
3814: VecDestroy(&pcbddc->vec1_P);
3815: VecDestroy(&pcbddc->vec1_C);
3816: MatDestroy(&pcbddc->local_auxmat2);
3817: MatDestroy(&pcbddc->local_auxmat1);
3818: VecDestroy(&pcbddc->vec1_R);
3819: VecDestroy(&pcbddc->vec2_R);
3820: ISDestroy(&pcbddc->is_R_local);
3821: VecScatterDestroy(&pcbddc->R_to_B);
3822: VecScatterDestroy(&pcbddc->R_to_D);
3823: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3824: KSPReset(pcbddc->ksp_D);
3825: KSPReset(pcbddc->ksp_R);
3826: KSPReset(pcbddc->coarse_ksp);
3827: MatDestroy(&pcbddc->local_mat);
3828: PetscFree(pcbddc->primal_indices_local_idxs);
3829: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3830: PetscFree(pcbddc->global_primal_indices);
3831: ISDestroy(&pcbddc->coarse_subassembling);
3832: MatDestroy(&pcbddc->benign_change);
3833: VecDestroy(&pcbddc->benign_vec);
3834: PCBDDCBenignShellMat(pc,PETSC_TRUE);
3835: MatDestroy(&pcbddc->benign_B0);
3836: PetscSFDestroy(&pcbddc->benign_sf);
3837: if (pcbddc->benign_zerodiag_subs) {
3838: PetscInt i;
3839: for (i=0;i<pcbddc->benign_n;i++) {
3840: ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3841: }
3842: PetscFree(pcbddc->benign_zerodiag_subs);
3843: }
3844: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3845: return(0);
3846: }
3848: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3849: {
3850: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3851: PC_IS *pcis = (PC_IS*)pc->data;
3852: VecType impVecType;
3853: PetscInt n_constraints,n_R,old_size;
3857: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3858: n_R = pcis->n - pcbddc->n_vertices;
3859: VecGetType(pcis->vec1_N,&impVecType);
3860: /* local work vectors (try to avoid unneeded work)*/
3861: /* R nodes */
3862: old_size = -1;
3863: if (pcbddc->vec1_R) {
3864: VecGetSize(pcbddc->vec1_R,&old_size);
3865: }
3866: if (n_R != old_size) {
3867: VecDestroy(&pcbddc->vec1_R);
3868: VecDestroy(&pcbddc->vec2_R);
3869: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3870: VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3871: VecSetType(pcbddc->vec1_R,impVecType);
3872: VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3873: }
3874: /* local primal dofs */
3875: old_size = -1;
3876: if (pcbddc->vec1_P) {
3877: VecGetSize(pcbddc->vec1_P,&old_size);
3878: }
3879: if (pcbddc->local_primal_size != old_size) {
3880: VecDestroy(&pcbddc->vec1_P);
3881: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3882: VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3883: VecSetType(pcbddc->vec1_P,impVecType);
3884: }
3885: /* local explicit constraints */
3886: old_size = -1;
3887: if (pcbddc->vec1_C) {
3888: VecGetSize(pcbddc->vec1_C,&old_size);
3889: }
3890: if (n_constraints && n_constraints != old_size) {
3891: VecDestroy(&pcbddc->vec1_C);
3892: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3893: VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3894: VecSetType(pcbddc->vec1_C,impVecType);
3895: }
3896: return(0);
3897: }
3899: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3900: {
3901: PetscErrorCode ierr;
3902: /* pointers to pcis and pcbddc */
3903: PC_IS* pcis = (PC_IS*)pc->data;
3904: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3905: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3906: /* submatrices of local problem */
3907: Mat A_RV,A_VR,A_VV,local_auxmat2_R;
3908: /* submatrices of local coarse problem */
3909: Mat S_VV,S_CV,S_VC,S_CC;
3910: /* working matrices */
3911: Mat C_CR;
3912: /* additional working stuff */
3913: PC pc_R;
3914: Mat F,Brhs = NULL;
3915: Vec dummy_vec;
3916: PetscBool isLU,isCHOL,need_benign_correction,sparserhs;
3917: PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
3918: PetscScalar *work;
3919: PetscInt *idx_V_B;
3920: PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3921: PetscInt i,n_R,n_D,n_B;
3922: PetscScalar one=1.0,m_one=-1.0;
3925: if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
3926: PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
3928: /* Set Non-overlapping dimensions */
3929: n_vertices = pcbddc->n_vertices;
3930: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3931: n_B = pcis->n_B;
3932: n_D = pcis->n - n_B;
3933: n_R = pcis->n - n_vertices;
3935: /* vertices in boundary numbering */
3936: PetscMalloc1(n_vertices,&idx_V_B);
3937: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3938: if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i);
3940: /* Subdomain contribution (Non-overlapping) to coarse matrix */
3941: PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3942: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3943: MatDenseSetLDA(S_VV,pcbddc->local_primal_size);
3944: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3945: MatDenseSetLDA(S_CV,pcbddc->local_primal_size);
3946: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3947: MatDenseSetLDA(S_VC,pcbddc->local_primal_size);
3948: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3949: MatDenseSetLDA(S_CC,pcbddc->local_primal_size);
3951: /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3952: KSPGetPC(pcbddc->ksp_R,&pc_R);
3953: PCSetUp(pc_R);
3954: PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3955: PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3956: lda_rhs = n_R;
3957: need_benign_correction = PETSC_FALSE;
3958: if (isLU || isCHOL) {
3959: PCFactorGetMatrix(pc_R,&F);
3960: } else if (sub_schurs && sub_schurs->reuse_solver) {
3961: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3962: MatFactorType type;
3964: F = reuse_solver->F;
3965: MatGetFactorType(F,&type);
3966: if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3967: if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
3968: MatGetSize(F,&lda_rhs,NULL);
3969: need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3970: } else F = NULL;
3972: /* determine if we can use a sparse right-hand side */
3973: sparserhs = PETSC_FALSE;
3974: if (F) {
3975: MatSolverType solver;
3977: MatFactorGetSolverType(F,&solver);
3978: PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3979: }
3981: /* allocate workspace */
3982: n = 0;
3983: if (n_constraints) {
3984: n += lda_rhs*n_constraints;
3985: }
3986: if (n_vertices) {
3987: n = PetscMax(2*lda_rhs*n_vertices,n);
3988: n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3989: }
3990: if (!pcbddc->symmetric_primal) {
3991: n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3992: }
3993: PetscMalloc1(n,&work);
3995: /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3996: dummy_vec = NULL;
3997: if (need_benign_correction && lda_rhs != n_R && F) {
3998: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);
3999: VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);
4000: VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);
4001: }
4003: MatDestroy(&pcbddc->local_auxmat1);
4004: MatDestroy(&pcbddc->local_auxmat2);
4006: /* Precompute stuffs needed for preprocessing and application of BDDC*/
4007: if (n_constraints) {
4008: Mat M3,C_B;
4009: IS is_aux;
4010: PetscScalar *array,*array2;
4012: /* Extract constraints on R nodes: C_{CR} */
4013: ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
4014: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
4015: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4017: /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4018: /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4019: if (!sparserhs) {
4020: PetscArrayzero(work,lda_rhs*n_constraints);
4021: for (i=0;i<n_constraints;i++) {
4022: const PetscScalar *row_cmat_values;
4023: const PetscInt *row_cmat_indices;
4024: PetscInt size_of_constraint,j;
4026: MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4027: for (j=0;j<size_of_constraint;j++) {
4028: work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
4029: }
4030: MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4031: }
4032: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
4033: } else {
4034: Mat tC_CR;
4036: MatScale(C_CR,-1.0);
4037: if (lda_rhs != n_R) {
4038: PetscScalar *aa;
4039: PetscInt r,*ii,*jj;
4040: PetscBool done;
4042: MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4043: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4044: MatSeqAIJGetArray(C_CR,&aa);
4045: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
4046: MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4047: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4048: } else {
4049: PetscObjectReference((PetscObject)C_CR);
4050: tC_CR = C_CR;
4051: }
4052: MatCreateTranspose(tC_CR,&Brhs);
4053: MatDestroy(&tC_CR);
4054: }
4055: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
4056: if (F) {
4057: if (need_benign_correction) {
4058: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4060: /* rhs is already zero on interior dofs, no need to change the rhs */
4061: PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n);
4062: }
4063: MatMatSolve(F,Brhs,local_auxmat2_R);
4064: if (need_benign_correction) {
4065: PetscScalar *marr;
4066: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4068: MatDenseGetArray(local_auxmat2_R,&marr);
4069: if (lda_rhs != n_R) {
4070: for (i=0;i<n_constraints;i++) {
4071: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4072: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4073: VecResetArray(dummy_vec);
4074: }
4075: } else {
4076: for (i=0;i<n_constraints;i++) {
4077: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4078: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4079: VecResetArray(pcbddc->vec1_R);
4080: }
4081: }
4082: MatDenseRestoreArray(local_auxmat2_R,&marr);
4083: }
4084: } else {
4085: PetscScalar *marr;
4087: MatDenseGetArray(local_auxmat2_R,&marr);
4088: for (i=0;i<n_constraints;i++) {
4089: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4090: VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
4091: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4092: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4093: VecResetArray(pcbddc->vec1_R);
4094: VecResetArray(pcbddc->vec2_R);
4095: }
4096: MatDenseRestoreArray(local_auxmat2_R,&marr);
4097: }
4098: if (sparserhs) {
4099: MatScale(C_CR,-1.0);
4100: }
4101: MatDestroy(&Brhs);
4102: if (!pcbddc->switch_static) {
4103: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
4104: MatDenseGetArray(pcbddc->local_auxmat2,&array);
4105: MatDenseGetArray(local_auxmat2_R,&array2);
4106: for (i=0;i<n_constraints;i++) {
4107: VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
4108: VecPlaceArray(pcis->vec1_B,array+i*n_B);
4109: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4110: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4111: VecResetArray(pcis->vec1_B);
4112: VecResetArray(pcbddc->vec1_R);
4113: }
4114: MatDenseRestoreArray(local_auxmat2_R,&array2);
4115: MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
4116: MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4117: } else {
4118: if (lda_rhs != n_R) {
4119: IS dummy;
4121: ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
4122: MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
4123: ISDestroy(&dummy);
4124: } else {
4125: PetscObjectReference((PetscObject)local_auxmat2_R);
4126: pcbddc->local_auxmat2 = local_auxmat2_R;
4127: }
4128: MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4129: }
4130: ISDestroy(&is_aux);
4131: /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */
4132: MatScale(M3,m_one);
4133: if (isCHOL) {
4134: MatCholeskyFactor(M3,NULL,NULL);
4135: } else {
4136: MatLUFactor(M3,NULL,NULL,NULL);
4137: }
4138: MatSeqDenseInvertFactors_Private(M3);
4139: /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4140: MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
4141: MatDestroy(&C_B);
4142: MatCopy(M3,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4143: MatDestroy(&M3);
4144: }
4146: /* Get submatrices from subdomain matrix */
4147: if (n_vertices) {
4148: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4149: PetscBool oldpin;
4150: #endif
4151: PetscBool isaij;
4152: IS is_aux;
4154: if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4155: IS tis;
4157: ISDuplicate(pcbddc->is_R_local,&tis);
4158: ISSort(tis);
4159: ISComplement(tis,0,pcis->n,&is_aux);
4160: ISDestroy(&tis);
4161: } else {
4162: ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
4163: }
4164: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4165: oldpin = pcbddc->local_mat->boundtocpu;
4166: #endif
4167: MatBindToCPU(pcbddc->local_mat,PETSC_TRUE);
4168: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
4169: MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
4170: PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij);
4171: if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4172: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4173: }
4174: MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
4175: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4176: MatBindToCPU(pcbddc->local_mat,oldpin);
4177: #endif
4178: ISDestroy(&is_aux);
4179: }
4181: /* Matrix of coarse basis functions (local) */
4182: if (pcbddc->coarse_phi_B) {
4183: PetscInt on_B,on_primal,on_D=n_D;
4184: if (pcbddc->coarse_phi_D) {
4185: MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
4186: }
4187: MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
4188: if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4189: PetscScalar *marray;
4191: MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
4192: PetscFree(marray);
4193: MatDestroy(&pcbddc->coarse_phi_B);
4194: MatDestroy(&pcbddc->coarse_psi_B);
4195: MatDestroy(&pcbddc->coarse_phi_D);
4196: MatDestroy(&pcbddc->coarse_psi_D);
4197: }
4198: }
4200: if (!pcbddc->coarse_phi_B) {
4201: PetscScalar *marr;
4203: /* memory size */
4204: n = n_B*pcbddc->local_primal_size;
4205: if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4206: if (!pcbddc->symmetric_primal) n *= 2;
4207: PetscCalloc1(n,&marr);
4208: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
4209: marr += n_B*pcbddc->local_primal_size;
4210: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4211: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
4212: marr += n_D*pcbddc->local_primal_size;
4213: }
4214: if (!pcbddc->symmetric_primal) {
4215: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
4216: marr += n_B*pcbddc->local_primal_size;
4217: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4218: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
4219: }
4220: } else {
4221: PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4222: pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4223: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4224: PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4225: pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4226: }
4227: }
4228: }
4230: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4231: p0_lidx_I = NULL;
4232: if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4233: const PetscInt *idxs;
4235: ISGetIndices(pcis->is_I_local,&idxs);
4236: PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
4237: for (i=0;i<pcbddc->benign_n;i++) {
4238: PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
4239: }
4240: ISRestoreIndices(pcis->is_I_local,&idxs);
4241: }
4243: /* vertices */
4244: if (n_vertices) {
4245: PetscBool restoreavr = PETSC_FALSE;
4247: MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);
4249: if (n_R) {
4250: Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4251: PetscBLASInt B_N,B_one = 1;
4252: const PetscScalar *x;
4253: PetscScalar *y;
4255: MatScale(A_RV,m_one);
4256: if (need_benign_correction) {
4257: ISLocalToGlobalMapping RtoN;
4258: IS is_p0;
4259: PetscInt *idxs_p0,n;
4261: PetscMalloc1(pcbddc->benign_n,&idxs_p0);
4262: ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
4263: ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
4264: if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n);
4265: ISLocalToGlobalMappingDestroy(&RtoN);
4266: ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
4267: MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
4268: ISDestroy(&is_p0);
4269: }
4271: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
4272: if (!sparserhs || need_benign_correction) {
4273: if (lda_rhs == n_R) {
4274: MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
4275: } else {
4276: PetscScalar *av,*array;
4277: const PetscInt *xadj,*adjncy;
4278: PetscInt n;
4279: PetscBool flg_row;
4281: array = work+lda_rhs*n_vertices;
4282: PetscArrayzero(array,lda_rhs*n_vertices);
4283: MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
4284: MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4285: MatSeqAIJGetArray(A_RV,&av);
4286: for (i=0;i<n;i++) {
4287: PetscInt j;
4288: for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4289: }
4290: MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4291: MatDestroy(&A_RV);
4292: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
4293: }
4294: if (need_benign_correction) {
4295: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4296: PetscScalar *marr;
4298: MatDenseGetArray(A_RV,&marr);
4299: /* need \Phi^T A_RV = (I+L)A_RV, L given by
4301: | 0 0 0 | (V)
4302: L = | 0 0 -1 | (P-p0)
4303: | 0 0 -1 | (p0)
4305: */
4306: for (i=0;i<reuse_solver->benign_n;i++) {
4307: const PetscScalar *vals;
4308: const PetscInt *idxs,*idxs_zero;
4309: PetscInt n,j,nz;
4311: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4312: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4313: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4314: for (j=0;j<n;j++) {
4315: PetscScalar val = vals[j];
4316: PetscInt k,col = idxs[j];
4317: for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4318: }
4319: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4320: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4321: }
4322: MatDenseRestoreArray(A_RV,&marr);
4323: }
4324: PetscObjectReference((PetscObject)A_RV);
4325: Brhs = A_RV;
4326: } else {
4327: Mat tA_RVT,A_RVT;
4329: if (!pcbddc->symmetric_primal) {
4330: /* A_RV already scaled by -1 */
4331: MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
4332: } else {
4333: restoreavr = PETSC_TRUE;
4334: MatScale(A_VR,-1.0);
4335: PetscObjectReference((PetscObject)A_VR);
4336: A_RVT = A_VR;
4337: }
4338: if (lda_rhs != n_R) {
4339: PetscScalar *aa;
4340: PetscInt r,*ii,*jj;
4341: PetscBool done;
4343: MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4344: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4345: MatSeqAIJGetArray(A_RVT,&aa);
4346: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
4347: MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4348: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4349: } else {
4350: PetscObjectReference((PetscObject)A_RVT);
4351: tA_RVT = A_RVT;
4352: }
4353: MatCreateTranspose(tA_RVT,&Brhs);
4354: MatDestroy(&tA_RVT);
4355: MatDestroy(&A_RVT);
4356: }
4357: if (F) {
4358: /* need to correct the rhs */
4359: if (need_benign_correction) {
4360: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4361: PetscScalar *marr;
4363: MatDenseGetArray(Brhs,&marr);
4364: if (lda_rhs != n_R) {
4365: for (i=0;i<n_vertices;i++) {
4366: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4367: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
4368: VecResetArray(dummy_vec);
4369: }
4370: } else {
4371: for (i=0;i<n_vertices;i++) {
4372: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4373: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
4374: VecResetArray(pcbddc->vec1_R);
4375: }
4376: }
4377: MatDenseRestoreArray(Brhs,&marr);
4378: }
4379: MatMatSolve(F,Brhs,A_RRmA_RV);
4380: if (restoreavr) {
4381: MatScale(A_VR,-1.0);
4382: }
4383: /* need to correct the solution */
4384: if (need_benign_correction) {
4385: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4386: PetscScalar *marr;
4388: MatDenseGetArray(A_RRmA_RV,&marr);
4389: if (lda_rhs != n_R) {
4390: for (i=0;i<n_vertices;i++) {
4391: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4392: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4393: VecResetArray(dummy_vec);
4394: }
4395: } else {
4396: for (i=0;i<n_vertices;i++) {
4397: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4398: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4399: VecResetArray(pcbddc->vec1_R);
4400: }
4401: }
4402: MatDenseRestoreArray(A_RRmA_RV,&marr);
4403: }
4404: } else {
4405: MatDenseGetArray(Brhs,&y);
4406: for (i=0;i<n_vertices;i++) {
4407: VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
4408: VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
4409: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4410: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4411: VecResetArray(pcbddc->vec1_R);
4412: VecResetArray(pcbddc->vec2_R);
4413: }
4414: MatDenseRestoreArray(Brhs,&y);
4415: }
4416: MatDestroy(&A_RV);
4417: MatDestroy(&Brhs);
4418: /* S_VV and S_CV */
4419: if (n_constraints) {
4420: Mat B;
4422: PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices);
4423: for (i=0;i<n_vertices;i++) {
4424: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4425: VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4426: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4427: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4428: VecResetArray(pcis->vec1_B);
4429: VecResetArray(pcbddc->vec1_R);
4430: }
4431: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4432: /* Reuse dense S_C = pcbddc->local_auxmat1 * B */
4433: MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV);
4434: MatProductSetType(S_CV,MATPRODUCT_AB);
4435: MatProductSetFromOptions(S_CV);
4436: MatProductSymbolic(S_CV);
4437: MatProductNumeric(S_CV);
4438: MatProductClear(S_CV);
4440: MatDestroy(&B);
4441: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4442: /* Reuse B = local_auxmat2_R * S_CV */
4443: MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B);
4444: MatProductSetType(B,MATPRODUCT_AB);
4445: MatProductSetFromOptions(B);
4446: MatProductSymbolic(B);
4447: MatProductNumeric(B);
4449: MatScale(S_CV,m_one);
4450: PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4451: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4452: MatDestroy(&B);
4453: }
4454: if (lda_rhs != n_R) {
4455: MatDestroy(&A_RRmA_RV);
4456: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4457: MatDenseSetLDA(A_RRmA_RV,lda_rhs);
4458: }
4459: MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4460: /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4461: if (need_benign_correction) {
4462: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4463: PetscScalar *marr,*sums;
4465: PetscMalloc1(n_vertices,&sums);
4466: MatDenseGetArray(S_VVt,&marr);
4467: for (i=0;i<reuse_solver->benign_n;i++) {
4468: const PetscScalar *vals;
4469: const PetscInt *idxs,*idxs_zero;
4470: PetscInt n,j,nz;
4472: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4473: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4474: for (j=0;j<n_vertices;j++) {
4475: PetscInt k;
4476: sums[j] = 0.;
4477: for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4478: }
4479: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4480: for (j=0;j<n;j++) {
4481: PetscScalar val = vals[j];
4482: PetscInt k;
4483: for (k=0;k<n_vertices;k++) {
4484: marr[idxs[j]+k*n_vertices] += val*sums[k];
4485: }
4486: }
4487: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4488: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4489: }
4490: PetscFree(sums);
4491: MatDenseRestoreArray(S_VVt,&marr);
4492: MatDestroy(&A_RV_bcorr);
4493: }
4494: MatDestroy(&A_RRmA_RV);
4495: PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4496: MatDenseGetArrayRead(A_VV,&x);
4497: MatDenseGetArray(S_VVt,&y);
4498: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4499: MatDenseRestoreArrayRead(A_VV,&x);
4500: MatDenseRestoreArray(S_VVt,&y);
4501: MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4502: MatDestroy(&S_VVt);
4503: } else {
4504: MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4505: }
4506: MatDestroy(&A_VV);
4508: /* coarse basis functions */
4509: for (i=0;i<n_vertices;i++) {
4510: PetscScalar *y;
4512: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4513: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4514: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4515: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4516: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4517: y[n_B*i+idx_V_B[i]] = 1.0;
4518: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4519: VecResetArray(pcis->vec1_B);
4521: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4522: PetscInt j;
4524: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4525: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4526: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4527: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4528: VecResetArray(pcis->vec1_D);
4529: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4530: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4531: }
4532: VecResetArray(pcbddc->vec1_R);
4533: }
4534: /* if n_R == 0 the object is not destroyed */
4535: MatDestroy(&A_RV);
4536: }
4537: VecDestroy(&dummy_vec);
4539: if (n_constraints) {
4540: Mat B;
4542: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4543: MatScale(S_CC,m_one);
4544: MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B);
4545: MatProductSetType(B,MATPRODUCT_AB);
4546: MatProductSetFromOptions(B);
4547: MatProductSymbolic(B);
4548: MatProductNumeric(B);
4550: MatScale(S_CC,m_one);
4551: if (n_vertices) {
4552: if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4553: MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4554: } else {
4555: Mat S_VCt;
4557: if (lda_rhs != n_R) {
4558: MatDestroy(&B);
4559: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4560: MatDenseSetLDA(B,lda_rhs);
4561: }
4562: MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4563: MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4564: MatDestroy(&S_VCt);
4565: }
4566: }
4567: MatDestroy(&B);
4568: /* coarse basis functions */
4569: for (i=0;i<n_constraints;i++) {
4570: PetscScalar *y;
4572: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4573: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4574: VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4575: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4576: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4577: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4578: VecResetArray(pcis->vec1_B);
4579: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4580: PetscInt j;
4582: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4583: VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4584: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4585: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4586: VecResetArray(pcis->vec1_D);
4587: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4588: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4589: }
4590: VecResetArray(pcbddc->vec1_R);
4591: }
4592: }
4593: if (n_constraints) {
4594: MatDestroy(&local_auxmat2_R);
4595: }
4596: PetscFree(p0_lidx_I);
4598: /* coarse matrix entries relative to B_0 */
4599: if (pcbddc->benign_n) {
4600: Mat B0_B,B0_BPHI;
4601: IS is_dummy;
4602: const PetscScalar *data;
4603: PetscInt j;
4605: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4606: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4607: ISDestroy(&is_dummy);
4608: MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4609: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4610: MatDenseGetArrayRead(B0_BPHI,&data);
4611: for (j=0;j<pcbddc->benign_n;j++) {
4612: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4613: for (i=0;i<pcbddc->local_primal_size;i++) {
4614: coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4615: coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4616: }
4617: }
4618: MatDenseRestoreArrayRead(B0_BPHI,&data);
4619: MatDestroy(&B0_B);
4620: MatDestroy(&B0_BPHI);
4621: }
4623: /* compute other basis functions for non-symmetric problems */
4624: if (!pcbddc->symmetric_primal) {
4625: Mat B_V=NULL,B_C=NULL;
4626: PetscScalar *marray;
4628: if (n_constraints) {
4629: Mat S_CCT,C_CRT;
4631: MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4632: MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4633: MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4634: MatDestroy(&S_CCT);
4635: if (n_vertices) {
4636: Mat S_VCT;
4638: MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4639: MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4640: MatDestroy(&S_VCT);
4641: }
4642: MatDestroy(&C_CRT);
4643: } else {
4644: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4645: }
4646: if (n_vertices && n_R) {
4647: PetscScalar *av,*marray;
4648: const PetscInt *xadj,*adjncy;
4649: PetscInt n;
4650: PetscBool flg_row;
4652: /* B_V = B_V - A_VR^T */
4653: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4654: MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4655: MatSeqAIJGetArray(A_VR,&av);
4656: MatDenseGetArray(B_V,&marray);
4657: for (i=0;i<n;i++) {
4658: PetscInt j;
4659: for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4660: }
4661: MatDenseRestoreArray(B_V,&marray);
4662: MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4663: MatDestroy(&A_VR);
4664: }
4666: /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4667: if (n_vertices) {
4668: MatDenseGetArray(B_V,&marray);
4669: for (i=0;i<n_vertices;i++) {
4670: VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4671: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4672: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4673: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4674: VecResetArray(pcbddc->vec1_R);
4675: VecResetArray(pcbddc->vec2_R);
4676: }
4677: MatDenseRestoreArray(B_V,&marray);
4678: }
4679: if (B_C) {
4680: MatDenseGetArray(B_C,&marray);
4681: for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4682: VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4683: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4684: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4685: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4686: VecResetArray(pcbddc->vec1_R);
4687: VecResetArray(pcbddc->vec2_R);
4688: }
4689: MatDenseRestoreArray(B_C,&marray);
4690: }
4691: /* coarse basis functions */
4692: for (i=0;i<pcbddc->local_primal_size;i++) {
4693: PetscScalar *y;
4695: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4696: MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4697: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4698: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4699: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4700: if (i<n_vertices) {
4701: y[n_B*i+idx_V_B[i]] = 1.0;
4702: }
4703: MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4704: VecResetArray(pcis->vec1_B);
4706: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4707: MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4708: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4709: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4710: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4711: VecResetArray(pcis->vec1_D);
4712: MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4713: }
4714: VecResetArray(pcbddc->vec1_R);
4715: }
4716: MatDestroy(&B_V);
4717: MatDestroy(&B_C);
4718: }
4720: /* free memory */
4721: PetscFree(idx_V_B);
4722: MatDestroy(&S_VV);
4723: MatDestroy(&S_CV);
4724: MatDestroy(&S_VC);
4725: MatDestroy(&S_CC);
4726: PetscFree(work);
4727: if (n_vertices) {
4728: MatDestroy(&A_VR);
4729: }
4730: if (n_constraints) {
4731: MatDestroy(&C_CR);
4732: }
4733: PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
4735: /* Checking coarse_sub_mat and coarse basis functios */
4736: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4737: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4738: if (pcbddc->dbg_flag) {
4739: Mat coarse_sub_mat;
4740: Mat AUXMAT,TM1,TM2,TM3,TM4;
4741: Mat coarse_phi_D,coarse_phi_B;
4742: Mat coarse_psi_D,coarse_psi_B;
4743: Mat A_II,A_BB,A_IB,A_BI;
4744: Mat C_B,CPHI;
4745: IS is_dummy;
4746: Vec mones;
4747: MatType checkmattype=MATSEQAIJ;
4748: PetscReal real_value;
4750: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4751: Mat A;
4752: PCBDDCBenignProject(pc,NULL,NULL,&A);
4753: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4754: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4755: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4756: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4757: MatDestroy(&A);
4758: } else {
4759: MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4760: MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4761: MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4762: MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4763: }
4764: MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4765: MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4766: if (!pcbddc->symmetric_primal) {
4767: MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4768: MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4769: }
4770: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
4772: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4773: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4774: PetscViewerFlush(pcbddc->dbg_viewer);
4775: if (!pcbddc->symmetric_primal) {
4776: MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4777: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4778: MatDestroy(&AUXMAT);
4779: MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4780: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4781: MatDestroy(&AUXMAT);
4782: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4783: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4784: MatDestroy(&AUXMAT);
4785: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4786: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4787: MatDestroy(&AUXMAT);
4788: } else {
4789: MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4790: MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4791: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4792: MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4793: MatDestroy(&AUXMAT);
4794: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4795: MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4796: MatDestroy(&AUXMAT);
4797: }
4798: MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4799: MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4800: MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4801: MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4802: if (pcbddc->benign_n) {
4803: Mat B0_B,B0_BPHI;
4804: const PetscScalar *data2;
4805: PetscScalar *data;
4806: PetscInt j;
4808: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4809: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4810: MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4811: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4812: MatDenseGetArray(TM1,&data);
4813: MatDenseGetArrayRead(B0_BPHI,&data2);
4814: for (j=0;j<pcbddc->benign_n;j++) {
4815: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4816: for (i=0;i<pcbddc->local_primal_size;i++) {
4817: data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4818: data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4819: }
4820: }
4821: MatDenseRestoreArray(TM1,&data);
4822: MatDenseRestoreArrayRead(B0_BPHI,&data2);
4823: MatDestroy(&B0_B);
4824: ISDestroy(&is_dummy);
4825: MatDestroy(&B0_BPHI);
4826: }
4827: #if 0
4828: {
4829: PetscViewer viewer;
4830: char filename[256];
4831: sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4832: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4833: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4834: PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4835: MatView(coarse_sub_mat,viewer);
4836: PetscObjectSetName((PetscObject)TM1,"projected");
4837: MatView(TM1,viewer);
4838: if (pcbddc->coarse_phi_B) {
4839: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4840: MatView(pcbddc->coarse_phi_B,viewer);
4841: }
4842: if (pcbddc->coarse_phi_D) {
4843: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4844: MatView(pcbddc->coarse_phi_D,viewer);
4845: }
4846: if (pcbddc->coarse_psi_B) {
4847: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4848: MatView(pcbddc->coarse_psi_B,viewer);
4849: }
4850: if (pcbddc->coarse_psi_D) {
4851: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4852: MatView(pcbddc->coarse_psi_D,viewer);
4853: }
4854: PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4855: MatView(pcbddc->local_mat,viewer);
4856: PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4857: MatView(pcbddc->ConstraintMatrix,viewer);
4858: PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4859: ISView(pcis->is_I_local,viewer);
4860: PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4861: ISView(pcis->is_B_local,viewer);
4862: PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4863: ISView(pcbddc->is_R_local,viewer);
4864: PetscViewerDestroy(&viewer);
4865: }
4866: #endif
4867: MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4868: MatNorm(TM1,NORM_FROBENIUS,&real_value);
4869: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4870: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);
4872: /* check constraints */
4873: ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4874: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4875: if (!pcbddc->benign_n) { /* TODO: add benign case */
4876: MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4877: } else {
4878: PetscScalar *data;
4879: Mat tmat;
4880: MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4881: MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4882: MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4883: MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4884: MatDestroy(&tmat);
4885: }
4886: MatCreateVecs(CPHI,&mones,NULL);
4887: VecSet(mones,-1.0);
4888: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4889: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4890: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4891: if (!pcbddc->symmetric_primal) {
4892: MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4893: VecSet(mones,-1.0);
4894: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4895: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4896: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4897: }
4898: MatDestroy(&C_B);
4899: MatDestroy(&CPHI);
4900: ISDestroy(&is_dummy);
4901: VecDestroy(&mones);
4902: PetscViewerFlush(pcbddc->dbg_viewer);
4903: MatDestroy(&A_II);
4904: MatDestroy(&A_BB);
4905: MatDestroy(&A_IB);
4906: MatDestroy(&A_BI);
4907: MatDestroy(&TM1);
4908: MatDestroy(&TM2);
4909: MatDestroy(&TM3);
4910: MatDestroy(&TM4);
4911: MatDestroy(&coarse_phi_D);
4912: MatDestroy(&coarse_phi_B);
4913: if (!pcbddc->symmetric_primal) {
4914: MatDestroy(&coarse_psi_D);
4915: MatDestroy(&coarse_psi_B);
4916: }
4917: MatDestroy(&coarse_sub_mat);
4918: }
4919: /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */
4920: {
4921: PetscBool gpu;
4923: PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu);
4924: if (gpu) {
4925: if (pcbddc->local_auxmat1) {
4926: MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1);
4927: }
4928: if (pcbddc->local_auxmat2) {
4929: MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2);
4930: }
4931: if (pcbddc->coarse_phi_B) {
4932: MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B);
4933: }
4934: if (pcbddc->coarse_phi_D) {
4935: MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D);
4936: }
4937: if (pcbddc->coarse_psi_B) {
4938: MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B);
4939: }
4940: if (pcbddc->coarse_psi_D) {
4941: MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D);
4942: }
4943: }
4944: }
4945: /* get back data */
4946: *coarse_submat_vals_n = coarse_submat_vals;
4947: return(0);
4948: }
4950: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4951: {
4952: Mat *work_mat;
4953: IS isrow_s,iscol_s;
4954: PetscBool rsorted,csorted;
4955: PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;
4959: ISSorted(isrow,&rsorted);
4960: ISSorted(iscol,&csorted);
4961: ISGetLocalSize(isrow,&rsize);
4962: ISGetLocalSize(iscol,&csize);
4964: if (!rsorted) {
4965: const PetscInt *idxs;
4966: PetscInt *idxs_sorted,i;
4968: PetscMalloc1(rsize,&idxs_perm_r);
4969: PetscMalloc1(rsize,&idxs_sorted);
4970: for (i=0;i<rsize;i++) {
4971: idxs_perm_r[i] = i;
4972: }
4973: ISGetIndices(isrow,&idxs);
4974: PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4975: for (i=0;i<rsize;i++) {
4976: idxs_sorted[i] = idxs[idxs_perm_r[i]];
4977: }
4978: ISRestoreIndices(isrow,&idxs);
4979: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4980: } else {
4981: PetscObjectReference((PetscObject)isrow);
4982: isrow_s = isrow;
4983: }
4985: if (!csorted) {
4986: if (isrow == iscol) {
4987: PetscObjectReference((PetscObject)isrow_s);
4988: iscol_s = isrow_s;
4989: } else {
4990: const PetscInt *idxs;
4991: PetscInt *idxs_sorted,i;
4993: PetscMalloc1(csize,&idxs_perm_c);
4994: PetscMalloc1(csize,&idxs_sorted);
4995: for (i=0;i<csize;i++) {
4996: idxs_perm_c[i] = i;
4997: }
4998: ISGetIndices(iscol,&idxs);
4999: PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
5000: for (i=0;i<csize;i++) {
5001: idxs_sorted[i] = idxs[idxs_perm_c[i]];
5002: }
5003: ISRestoreIndices(iscol,&idxs);
5004: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
5005: }
5006: } else {
5007: PetscObjectReference((PetscObject)iscol);
5008: iscol_s = iscol;
5009: }
5011: MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);
5013: if (!rsorted || !csorted) {
5014: Mat new_mat;
5015: IS is_perm_r,is_perm_c;
5017: if (!rsorted) {
5018: PetscInt *idxs_r,i;
5019: PetscMalloc1(rsize,&idxs_r);
5020: for (i=0;i<rsize;i++) {
5021: idxs_r[idxs_perm_r[i]] = i;
5022: }
5023: PetscFree(idxs_perm_r);
5024: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
5025: } else {
5026: ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
5027: }
5028: ISSetPermutation(is_perm_r);
5030: if (!csorted) {
5031: if (isrow_s == iscol_s) {
5032: PetscObjectReference((PetscObject)is_perm_r);
5033: is_perm_c = is_perm_r;
5034: } else {
5035: PetscInt *idxs_c,i;
5036: if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
5037: PetscMalloc1(csize,&idxs_c);
5038: for (i=0;i<csize;i++) {
5039: idxs_c[idxs_perm_c[i]] = i;
5040: }
5041: PetscFree(idxs_perm_c);
5042: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
5043: }
5044: } else {
5045: ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
5046: }
5047: ISSetPermutation(is_perm_c);
5049: MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
5050: MatDestroy(&work_mat[0]);
5051: work_mat[0] = new_mat;
5052: ISDestroy(&is_perm_r);
5053: ISDestroy(&is_perm_c);
5054: }
5056: PetscObjectReference((PetscObject)work_mat[0]);
5057: *B = work_mat[0];
5058: MatDestroyMatrices(1,&work_mat);
5059: ISDestroy(&isrow_s);
5060: ISDestroy(&iscol_s);
5061: return(0);
5062: }
5064: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5065: {
5066: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5067: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5068: Mat new_mat,lA;
5069: IS is_local,is_global;
5070: PetscInt local_size;
5071: PetscBool isseqaij;
5075: MatDestroy(&pcbddc->local_mat);
5076: MatGetSize(matis->A,&local_size,NULL);
5077: ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
5078: ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
5079: ISDestroy(&is_local);
5080: MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
5081: ISDestroy(&is_global);
5083: if (pcbddc->dbg_flag) {
5084: Vec x,x_change;
5085: PetscReal error;
5087: MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
5088: VecSetRandom(x,NULL);
5089: MatMult(ChangeOfBasisMatrix,x,x_change);
5090: VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5091: VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5092: MatMult(new_mat,matis->x,matis->y);
5093: if (!pcbddc->change_interior) {
5094: const PetscScalar *x,*y,*v;
5095: PetscReal lerror = 0.;
5096: PetscInt i;
5098: VecGetArrayRead(matis->x,&x);
5099: VecGetArrayRead(matis->y,&y);
5100: VecGetArrayRead(matis->counter,&v);
5101: for (i=0;i<local_size;i++)
5102: if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
5103: lerror = PetscAbsScalar(x[i]-y[i]);
5104: VecRestoreArrayRead(matis->x,&x);
5105: VecRestoreArrayRead(matis->y,&y);
5106: VecRestoreArrayRead(matis->counter,&v);
5107: MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
5108: if (error > PETSC_SMALL) {
5109: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5110: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error);
5111: } else {
5112: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error);
5113: }
5114: }
5115: }
5116: VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5117: VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5118: VecAXPY(x,-1.0,x_change);
5119: VecNorm(x,NORM_INFINITY,&error);
5120: if (error > PETSC_SMALL) {
5121: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5122: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
5123: } else {
5124: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error);
5125: }
5126: }
5127: VecDestroy(&x);
5128: VecDestroy(&x_change);
5129: }
5131: /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5132: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);
5134: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5135: PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
5136: if (isseqaij) {
5137: MatDestroy(&pcbddc->local_mat);
5138: MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5139: if (lA) {
5140: Mat work;
5141: MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5142: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5143: MatDestroy(&work);
5144: }
5145: } else {
5146: Mat work_mat;
5148: MatDestroy(&pcbddc->local_mat);
5149: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5150: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5151: MatDestroy(&work_mat);
5152: if (lA) {
5153: Mat work;
5154: MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5155: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5156: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5157: MatDestroy(&work);
5158: }
5159: }
5160: if (matis->A->symmetric_set) {
5161: MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
5162: #if !defined(PETSC_USE_COMPLEX)
5163: MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
5164: #endif
5165: }
5166: MatDestroy(&new_mat);
5167: return(0);
5168: }
5170: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5171: {
5172: PC_IS* pcis = (PC_IS*)(pc->data);
5173: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5174: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5175: PetscInt *idx_R_local=NULL;
5176: PetscInt n_vertices,i,j,n_R,n_D,n_B;
5177: PetscInt vbs,bs;
5178: PetscBT bitmask=NULL;
5179: PetscErrorCode ierr;
5182: /*
5183: No need to setup local scatters if
5184: - primal space is unchanged
5185: AND
5186: - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5187: AND
5188: - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5189: */
5190: if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5191: return(0);
5192: }
5193: /* destroy old objects */
5194: ISDestroy(&pcbddc->is_R_local);
5195: VecScatterDestroy(&pcbddc->R_to_B);
5196: VecScatterDestroy(&pcbddc->R_to_D);
5197: /* Set Non-overlapping dimensions */
5198: n_B = pcis->n_B;
5199: n_D = pcis->n - n_B;
5200: n_vertices = pcbddc->n_vertices;
5202: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
5204: /* create auxiliary bitmask and allocate workspace */
5205: if (!sub_schurs || !sub_schurs->reuse_solver) {
5206: PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
5207: PetscBTCreate(pcis->n,&bitmask);
5208: for (i=0;i<n_vertices;i++) {
5209: PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
5210: }
5212: for (i=0, n_R=0; i<pcis->n; i++) {
5213: if (!PetscBTLookup(bitmask,i)) {
5214: idx_R_local[n_R++] = i;
5215: }
5216: }
5217: } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5218: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5220: ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5221: ISGetLocalSize(reuse_solver->is_R,&n_R);
5222: }
5224: /* Block code */
5225: vbs = 1;
5226: MatGetBlockSize(pcbddc->local_mat,&bs);
5227: if (bs>1 && !(n_vertices%bs)) {
5228: PetscBool is_blocked = PETSC_TRUE;
5229: PetscInt *vary;
5230: if (!sub_schurs || !sub_schurs->reuse_solver) {
5231: PetscMalloc1(pcis->n/bs,&vary);
5232: PetscArrayzero(vary,pcis->n/bs);
5233: /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5234: /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5235: for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5236: for (i=0; i<pcis->n/bs; i++) {
5237: if (vary[i]!=0 && vary[i]!=bs) {
5238: is_blocked = PETSC_FALSE;
5239: break;
5240: }
5241: }
5242: PetscFree(vary);
5243: } else {
5244: /* Verify directly the R set */
5245: for (i=0; i<n_R/bs; i++) {
5246: PetscInt j,node=idx_R_local[bs*i];
5247: for (j=1; j<bs; j++) {
5248: if (node != idx_R_local[bs*i+j]-j) {
5249: is_blocked = PETSC_FALSE;
5250: break;
5251: }
5252: }
5253: }
5254: }
5255: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5256: vbs = bs;
5257: for (i=0;i<n_R/vbs;i++) {
5258: idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5259: }
5260: }
5261: }
5262: ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
5263: if (sub_schurs && sub_schurs->reuse_solver) {
5264: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5266: ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5267: ISDestroy(&reuse_solver->is_R);
5268: PetscObjectReference((PetscObject)pcbddc->is_R_local);
5269: reuse_solver->is_R = pcbddc->is_R_local;
5270: } else {
5271: PetscFree(idx_R_local);
5272: }
5274: /* print some info if requested */
5275: if (pcbddc->dbg_flag) {
5276: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5277: PetscViewerFlush(pcbddc->dbg_viewer);
5278: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5279: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
5280: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);
5281: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
5282: PetscViewerFlush(pcbddc->dbg_viewer);
5283: }
5285: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5286: if (!sub_schurs || !sub_schurs->reuse_solver) {
5287: IS is_aux1,is_aux2;
5288: PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
5290: ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5291: PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
5292: PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
5293: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5294: for (i=0; i<n_D; i++) {
5295: PetscBTSet(bitmask,is_indices[i]);
5296: }
5297: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5298: for (i=0, j=0; i<n_R; i++) {
5299: if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5300: aux_array1[j++] = i;
5301: }
5302: }
5303: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5304: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5305: for (i=0, j=0; i<n_B; i++) {
5306: if (!PetscBTLookup(bitmask,is_indices[i])) {
5307: aux_array2[j++] = i;
5308: }
5309: }
5310: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5311: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
5312: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
5313: ISDestroy(&is_aux1);
5314: ISDestroy(&is_aux2);
5316: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5317: PetscMalloc1(n_D,&aux_array1);
5318: for (i=0, j=0; i<n_R; i++) {
5319: if (PetscBTLookup(bitmask,idx_R_local[i])) {
5320: aux_array1[j++] = i;
5321: }
5322: }
5323: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5324: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5325: ISDestroy(&is_aux1);
5326: }
5327: PetscBTDestroy(&bitmask);
5328: ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5329: } else {
5330: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5331: IS tis;
5332: PetscInt schur_size;
5334: ISGetLocalSize(reuse_solver->is_B,&schur_size);
5335: ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
5336: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
5337: ISDestroy(&tis);
5338: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5339: ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
5340: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5341: ISDestroy(&tis);
5342: }
5343: }
5344: return(0);
5345: }
5347: static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5348: {
5349: MatNullSpace NullSpace;
5350: Mat dmat;
5351: const Vec *nullvecs;
5352: Vec v,v2,*nullvecs2;
5353: VecScatter sct = NULL;
5354: PetscContainer c;
5355: PetscScalar *ddata;
5356: PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs;
5357: PetscBool nnsp_has_cnst;
5361: if (!is && !B) { /* MATIS */
5362: Mat_IS* matis = (Mat_IS*)A->data;
5364: if (!B) {
5365: MatISGetLocalMat(A,&B);
5366: }
5367: sct = matis->cctx;
5368: PetscObjectReference((PetscObject)sct);
5369: } else {
5370: MatGetNullSpace(B,&NullSpace);
5371: if (!NullSpace) {
5372: MatGetNearNullSpace(B,&NullSpace);
5373: }
5374: if (NullSpace) return(0);
5375: }
5376: MatGetNullSpace(A,&NullSpace);
5377: if (!NullSpace) {
5378: MatGetNearNullSpace(A,&NullSpace);
5379: }
5380: if (!NullSpace) return(0);
5382: MatCreateVecs(A,&v,NULL);
5383: MatCreateVecs(B,&v2,NULL);
5384: if (!sct) {
5385: VecScatterCreate(v,is,v2,NULL,&sct);
5386: }
5387: MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);
5388: bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst;
5389: PetscMalloc1(bsiz,&nullvecs2);
5390: VecGetBlockSize(v2,&bs);
5391: VecGetSize(v2,&N);
5392: VecGetLocalSize(v2,&n);
5393: PetscMalloc1(n*bsiz,&ddata);
5394: for (k=0;k<nnsp_size;k++) {
5395: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k]);
5396: VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5397: VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5398: }
5399: if (nnsp_has_cnst) {
5400: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size]);
5401: VecSet(nullvecs2[nnsp_size],1.0);
5402: }
5403: PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2);
5404: MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace);
5406: MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat);
5407: PetscContainerCreate(PetscObjectComm((PetscObject)B),&c);
5408: PetscContainerSetPointer(c,ddata);
5409: PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault);
5410: PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c);
5411: PetscContainerDestroy(&c);
5412: PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);
5413: MatDestroy(&dmat);
5415: for (k=0;k<bsiz;k++) {
5416: VecDestroy(&nullvecs2[k]);
5417: }
5418: PetscFree(nullvecs2);
5419: MatSetNearNullSpace(B,NullSpace);
5420: MatNullSpaceDestroy(&NullSpace);
5421: VecDestroy(&v);
5422: VecDestroy(&v2);
5423: VecScatterDestroy(&sct);
5424: return(0);
5425: }
5427: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5428: {
5429: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
5430: PC_IS *pcis = (PC_IS*)pc->data;
5431: PC pc_temp;
5432: Mat A_RR;
5433: MatNullSpace nnsp;
5434: MatReuse reuse;
5435: PetscScalar m_one = -1.0;
5436: PetscReal value;
5437: PetscInt n_D,n_R;
5438: PetscBool issbaij,opts;
5440: void (*f)(void) = NULL;
5441: char dir_prefix[256],neu_prefix[256],str_level[16];
5442: size_t len;
5445: PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5446: /* approximate solver, propagate NearNullSpace if needed */
5447: if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5448: MatNullSpace gnnsp1,gnnsp2;
5449: PetscBool lhas,ghas;
5451: MatGetNearNullSpace(pcbddc->local_mat,&nnsp);
5452: MatGetNearNullSpace(pc->pmat,&gnnsp1);
5453: MatGetNullSpace(pc->pmat,&gnnsp2);
5454: lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5455: MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
5456: if (!ghas && (gnnsp1 || gnnsp2)) {
5457: MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL);
5458: }
5459: }
5461: /* compute prefixes */
5462: PetscStrcpy(dir_prefix,"");
5463: PetscStrcpy(neu_prefix,"");
5464: if (!pcbddc->current_level) {
5465: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));
5466: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));
5467: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5468: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5469: } else {
5470: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
5471: PetscStrlen(((PetscObject)pc)->prefix,&len);
5472: len -= 15; /* remove "pc_bddc_coarse_" */
5473: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5474: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5475: /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5476: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
5477: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
5478: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5479: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5480: PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));
5481: PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));
5482: }
5484: /* DIRICHLET PROBLEM */
5485: if (dirichlet) {
5486: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5487: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5488: if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
5489: if (pcbddc->dbg_flag) {
5490: Mat A_IIn;
5492: PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
5493: MatDestroy(&pcis->A_II);
5494: pcis->A_II = A_IIn;
5495: }
5496: }
5497: if (pcbddc->local_mat->symmetric_set) {
5498: MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5499: }
5500: /* Matrix for Dirichlet problem is pcis->A_II */
5501: n_D = pcis->n - pcis->n_B;
5502: opts = PETSC_FALSE;
5503: if (!pcbddc->ksp_D) { /* create object if not yet build */
5504: opts = PETSC_TRUE;
5505: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
5506: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
5507: /* default */
5508: KSPSetType(pcbddc->ksp_D,KSPPREONLY);
5509: KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
5510: PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);
5511: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5512: if (issbaij) {
5513: PCSetType(pc_temp,PCCHOLESKY);
5514: } else {
5515: PCSetType(pc_temp,PCLU);
5516: }
5517: KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);
5518: }
5519: MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);
5520: KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);
5521: /* Allow user's customization */
5522: if (opts) {
5523: KSPSetFromOptions(pcbddc->ksp_D);
5524: }
5525: MatGetNearNullSpace(pcis->pA_II,&nnsp);
5526: if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5527: MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);
5528: }
5529: MatGetNearNullSpace(pcis->pA_II,&nnsp);
5530: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5531: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5532: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5533: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5534: const PetscInt *idxs;
5535: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5537: ISGetLocalSize(pcis->is_I_local,&nl);
5538: ISGetIndices(pcis->is_I_local,&idxs);
5539: PetscMalloc1(nl*cdim,&scoords);
5540: for (i=0;i<nl;i++) {
5541: for (d=0;d<cdim;d++) {
5542: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5543: }
5544: }
5545: ISRestoreIndices(pcis->is_I_local,&idxs);
5546: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5547: PetscFree(scoords);
5548: }
5549: if (sub_schurs && sub_schurs->reuse_solver) {
5550: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5552: KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
5553: }
5555: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5556: if (!n_D) {
5557: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5558: PCSetType(pc_temp,PCNONE);
5559: }
5560: KSPSetUp(pcbddc->ksp_D);
5561: /* set ksp_D into pcis data */
5562: PetscObjectReference((PetscObject)pcbddc->ksp_D);
5563: KSPDestroy(&pcis->ksp_D);
5564: pcis->ksp_D = pcbddc->ksp_D;
5565: }
5567: /* NEUMANN PROBLEM */
5568: A_RR = NULL;
5569: if (neumann) {
5570: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5571: PetscInt ibs,mbs;
5572: PetscBool issbaij, reuse_neumann_solver;
5573: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5575: reuse_neumann_solver = PETSC_FALSE;
5576: if (sub_schurs && sub_schurs->reuse_solver) {
5577: IS iP;
5579: reuse_neumann_solver = PETSC_TRUE;
5580: PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
5581: if (iP) reuse_neumann_solver = PETSC_FALSE;
5582: }
5583: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5584: ISGetSize(pcbddc->is_R_local,&n_R);
5585: if (pcbddc->ksp_R) { /* already created ksp */
5586: PetscInt nn_R;
5587: KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
5588: PetscObjectReference((PetscObject)A_RR);
5589: MatGetSize(A_RR,&nn_R,NULL);
5590: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5591: KSPReset(pcbddc->ksp_R);
5592: MatDestroy(&A_RR);
5593: reuse = MAT_INITIAL_MATRIX;
5594: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5595: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5596: MatDestroy(&A_RR);
5597: reuse = MAT_INITIAL_MATRIX;
5598: } else { /* safe to reuse the matrix */
5599: reuse = MAT_REUSE_MATRIX;
5600: }
5601: }
5602: /* last check */
5603: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5604: MatDestroy(&A_RR);
5605: reuse = MAT_INITIAL_MATRIX;
5606: }
5607: } else { /* first time, so we need to create the matrix */
5608: reuse = MAT_INITIAL_MATRIX;
5609: }
5610: /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5611: TODO: Get Rid of these conversions */
5612: MatGetBlockSize(pcbddc->local_mat,&mbs);
5613: ISGetBlockSize(pcbddc->is_R_local,&ibs);
5614: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5615: if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5616: if (matis->A == pcbddc->local_mat) {
5617: MatDestroy(&pcbddc->local_mat);
5618: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5619: } else {
5620: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5621: }
5622: } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5623: if (matis->A == pcbddc->local_mat) {
5624: MatDestroy(&pcbddc->local_mat);
5625: MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5626: } else {
5627: MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5628: }
5629: }
5630: /* extract A_RR */
5631: if (reuse_neumann_solver) {
5632: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5634: if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5635: MatDestroy(&A_RR);
5636: if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5637: PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5638: } else {
5639: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5640: }
5641: } else {
5642: MatDestroy(&A_RR);
5643: PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5644: PetscObjectReference((PetscObject)A_RR);
5645: }
5646: } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5647: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5648: }
5649: if (pcbddc->local_mat->symmetric_set) {
5650: MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5651: }
5652: opts = PETSC_FALSE;
5653: if (!pcbddc->ksp_R) { /* create object if not present */
5654: opts = PETSC_TRUE;
5655: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5656: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5657: /* default */
5658: KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5659: KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5660: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5661: PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5662: if (issbaij) {
5663: PCSetType(pc_temp,PCCHOLESKY);
5664: } else {
5665: PCSetType(pc_temp,PCLU);
5666: }
5667: KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);
5668: }
5669: KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5670: MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);
5671: if (opts) { /* Allow user's customization once */
5672: KSPSetFromOptions(pcbddc->ksp_R);
5673: }
5674: MatGetNearNullSpace(A_RR,&nnsp);
5675: if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5676: MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);
5677: }
5678: MatGetNearNullSpace(A_RR,&nnsp);
5679: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5680: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5681: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5682: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5683: const PetscInt *idxs;
5684: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5686: ISGetLocalSize(pcbddc->is_R_local,&nl);
5687: ISGetIndices(pcbddc->is_R_local,&idxs);
5688: PetscMalloc1(nl*cdim,&scoords);
5689: for (i=0;i<nl;i++) {
5690: for (d=0;d<cdim;d++) {
5691: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5692: }
5693: }
5694: ISRestoreIndices(pcbddc->is_R_local,&idxs);
5695: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5696: PetscFree(scoords);
5697: }
5699: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5700: if (!n_R) {
5701: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5702: PCSetType(pc_temp,PCNONE);
5703: }
5704: /* Reuse solver if it is present */
5705: if (reuse_neumann_solver) {
5706: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5708: KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5709: }
5710: KSPSetUp(pcbddc->ksp_R);
5711: }
5713: if (pcbddc->dbg_flag) {
5714: PetscViewerFlush(pcbddc->dbg_viewer);
5715: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5716: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5717: }
5718: PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5720: /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5721: if (pcbddc->NullSpace_corr[0]) {
5722: PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5723: }
5724: if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5725: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5726: }
5727: if (neumann && pcbddc->NullSpace_corr[2]) {
5728: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5729: }
5730: /* check Dirichlet and Neumann solvers */
5731: if (pcbddc->dbg_flag) {
5732: if (dirichlet) { /* Dirichlet */
5733: VecSetRandom(pcis->vec1_D,NULL);
5734: MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5735: KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5736: KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
5737: VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5738: VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5739: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5740: PetscViewerFlush(pcbddc->dbg_viewer);
5741: }
5742: if (neumann) { /* Neumann */
5743: VecSetRandom(pcbddc->vec1_R,NULL);
5744: MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5745: KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5746: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
5747: VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5748: VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5749: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5750: PetscViewerFlush(pcbddc->dbg_viewer);
5751: }
5752: }
5753: /* free Neumann problem's matrix */
5754: MatDestroy(&A_RR);
5755: return(0);
5756: }
5758: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5759: {
5760: PetscErrorCode ierr;
5761: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5762: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5763: PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
5766: if (!reuse_solver) {
5767: VecSet(pcbddc->vec1_R,0.);
5768: }
5769: if (!pcbddc->switch_static) {
5770: if (applytranspose && pcbddc->local_auxmat1) {
5771: MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5772: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5773: }
5774: if (!reuse_solver) {
5775: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5776: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5777: } else {
5778: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5780: VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5781: VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5782: }
5783: } else {
5784: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5785: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5786: VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5787: VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5788: if (applytranspose && pcbddc->local_auxmat1) {
5789: MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5790: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5791: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5792: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5793: }
5794: }
5795: PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);
5796: if (!reuse_solver || pcbddc->switch_static) {
5797: if (applytranspose) {
5798: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5799: } else {
5800: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5801: }
5802: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);
5803: } else {
5804: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5806: if (applytranspose) {
5807: MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5808: } else {
5809: MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5810: }
5811: }
5812: PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1],pc,0,0,0);
5813: VecSet(inout_B,0.);
5814: if (!pcbddc->switch_static) {
5815: if (!reuse_solver) {
5816: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5817: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5818: } else {
5819: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5821: VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5822: VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5823: }
5824: if (!applytranspose && pcbddc->local_auxmat1) {
5825: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5826: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5827: }
5828: } else {
5829: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5830: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5831: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5832: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5833: if (!applytranspose && pcbddc->local_auxmat1) {
5834: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5835: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5836: }
5837: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5838: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5839: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5840: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5841: }
5842: return(0);
5843: }
5845: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5846: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5847: {
5849: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5850: PC_IS* pcis = (PC_IS*) (pc->data);
5851: const PetscScalar zero = 0.0;
5854: /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5855: if (!pcbddc->benign_apply_coarse_only) {
5856: if (applytranspose) {
5857: MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5858: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5859: } else {
5860: MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5861: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5862: }
5863: } else {
5864: VecSet(pcbddc->vec1_P,zero);
5865: }
5867: /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5868: if (pcbddc->benign_n) {
5869: PetscScalar *array;
5870: PetscInt j;
5872: VecGetArray(pcbddc->vec1_P,&array);
5873: for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5874: VecRestoreArray(pcbddc->vec1_P,&array);
5875: }
5877: /* start communications from local primal nodes to rhs of coarse solver */
5878: VecSet(pcbddc->coarse_vec,zero);
5879: PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5880: PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);
5882: /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5883: if (pcbddc->coarse_ksp) {
5884: Mat coarse_mat;
5885: Vec rhs,sol;
5886: MatNullSpace nullsp;
5887: PetscBool isbddc = PETSC_FALSE;
5889: if (pcbddc->benign_have_null) {
5890: PC coarse_pc;
5892: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5893: PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5894: /* we need to propagate to coarser levels the need for a possible benign correction */
5895: if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5896: PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5897: coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5898: coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5899: }
5900: }
5901: KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5902: KSPGetSolution(pcbddc->coarse_ksp,&sol);
5903: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5904: if (applytranspose) {
5905: if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5906: PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5907: KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5908: PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5909: KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5910: MatGetTransposeNullSpace(coarse_mat,&nullsp);
5911: if (nullsp) {
5912: MatNullSpaceRemove(nullsp,sol);
5913: }
5914: } else {
5915: MatGetNullSpace(coarse_mat,&nullsp);
5916: if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5917: PC coarse_pc;
5919: if (nullsp) {
5920: MatNullSpaceRemove(nullsp,rhs);
5921: }
5922: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5923: PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5924: PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5925: PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5926: } else {
5927: PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5928: KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5929: PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2],pc,0,0,0);
5930: KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5931: if (nullsp) {
5932: MatNullSpaceRemove(nullsp,sol);
5933: }
5934: }
5935: }
5936: /* we don't need the benign correction at coarser levels anymore */
5937: if (pcbddc->benign_have_null && isbddc) {
5938: PC coarse_pc;
5939: PC_BDDC* coarsepcbddc;
5941: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5942: coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5943: coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5944: coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5945: }
5946: }
5948: /* Local solution on R nodes */
5949: if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5950: PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5951: }
5952: /* communications from coarse sol to local primal nodes */
5953: PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5954: PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);
5956: /* Sum contributions from the two levels */
5957: if (!pcbddc->benign_apply_coarse_only) {
5958: if (applytranspose) {
5959: MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5960: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5961: } else {
5962: MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5963: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5964: }
5965: /* store p0 */
5966: if (pcbddc->benign_n) {
5967: PetscScalar *array;
5968: PetscInt j;
5970: VecGetArray(pcbddc->vec1_P,&array);
5971: for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5972: VecRestoreArray(pcbddc->vec1_P,&array);
5973: }
5974: } else { /* expand the coarse solution */
5975: if (applytranspose) {
5976: MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5977: } else {
5978: MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5979: }
5980: }
5981: return(0);
5982: }
5984: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5985: {
5986: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5987: Vec from,to;
5988: const PetscScalar *array;
5989: PetscErrorCode ierr;
5992: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5993: from = pcbddc->coarse_vec;
5994: to = pcbddc->vec1_P;
5995: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5996: Vec tvec;
5998: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5999: VecResetArray(tvec);
6000: KSPGetSolution(pcbddc->coarse_ksp,&tvec);
6001: VecGetArrayRead(tvec,&array);
6002: VecPlaceArray(from,array);
6003: VecRestoreArrayRead(tvec,&array);
6004: }
6005: } else { /* from local to global -> put data in coarse right hand side */
6006: from = pcbddc->vec1_P;
6007: to = pcbddc->coarse_vec;
6008: }
6009: VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
6010: return(0);
6011: }
6013: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
6014: {
6015: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
6016: Vec from,to;
6017: const PetscScalar *array;
6018: PetscErrorCode ierr;
6021: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6022: from = pcbddc->coarse_vec;
6023: to = pcbddc->vec1_P;
6024: } else { /* from local to global -> put data in coarse right hand side */
6025: from = pcbddc->vec1_P;
6026: to = pcbddc->coarse_vec;
6027: }
6028: VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
6029: if (smode == SCATTER_FORWARD) {
6030: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6031: Vec tvec;
6033: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
6034: VecGetArrayRead(to,&array);
6035: VecPlaceArray(tvec,array);
6036: VecRestoreArrayRead(to,&array);
6037: }
6038: } else {
6039: if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6040: VecResetArray(from);
6041: }
6042: }
6043: return(0);
6044: }
6046: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6047: {
6048: PetscErrorCode ierr;
6049: PC_IS* pcis = (PC_IS*)(pc->data);
6050: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
6051: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
6052: /* one and zero */
6053: PetscScalar one=1.0,zero=0.0;
6054: /* space to store constraints and their local indices */
6055: PetscScalar *constraints_data;
6056: PetscInt *constraints_idxs,*constraints_idxs_B;
6057: PetscInt *constraints_idxs_ptr,*constraints_data_ptr;
6058: PetscInt *constraints_n;
6059: /* iterators */
6060: PetscInt i,j,k,total_counts,total_counts_cc,cum;
6061: /* BLAS integers */
6062: PetscBLASInt lwork,lierr;
6063: PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1;
6064: PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC;
6065: /* reuse */
6066: PetscInt olocal_primal_size,olocal_primal_size_cc;
6067: PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult;
6068: /* change of basis */
6069: PetscBool qr_needed;
6070: PetscBT change_basis,qr_needed_idx;
6071: /* auxiliary stuff */
6072: PetscInt *nnz,*is_indices;
6073: PetscInt ncc;
6074: /* some quantities */
6075: PetscInt n_vertices,total_primal_vertices,valid_constraints;
6076: PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
6077: PetscReal tol; /* tolerance for retaining eigenmodes */
6080: tol = PetscSqrtReal(PETSC_SMALL);
6081: /* Destroy Mat objects computed previously */
6082: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
6083: MatDestroy(&pcbddc->ConstraintMatrix);
6084: MatDestroy(&pcbddc->switch_static_change);
6085: /* save info on constraints from previous setup (if any) */
6086: olocal_primal_size = pcbddc->local_primal_size;
6087: olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6088: PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
6089: PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc);
6090: PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc);
6091: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
6092: PetscFree(pcbddc->primal_indices_local_idxs);
6094: if (!pcbddc->adaptive_selection) {
6095: IS ISForVertices,*ISForFaces,*ISForEdges;
6096: MatNullSpace nearnullsp;
6097: const Vec *nearnullvecs;
6098: Vec *localnearnullsp;
6099: PetscScalar *array;
6100: PetscInt n_ISForFaces,n_ISForEdges,nnsp_size;
6101: PetscBool nnsp_has_cnst;
6102: /* LAPACK working arrays for SVD or POD */
6103: PetscBool skip_lapack,boolforchange;
6104: PetscScalar *work;
6105: PetscReal *singular_vals;
6106: #if defined(PETSC_USE_COMPLEX)
6107: PetscReal *rwork;
6108: #endif
6109: PetscScalar *temp_basis = NULL,*correlation_mat = NULL;
6110: PetscBLASInt dummy_int=1;
6111: PetscScalar dummy_scalar=1.;
6112: PetscBool use_pod = PETSC_FALSE;
6114: /* MKL SVD with same input gives different results on different processes! */
6115: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL)
6116: use_pod = PETSC_TRUE;
6117: #endif
6118: /* Get index sets for faces, edges and vertices from graph */
6119: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
6120: /* print some info */
6121: if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6122: PetscInt nv;
6124: PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
6125: ISGetSize(ISForVertices,&nv);
6126: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
6127: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6128: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
6129: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
6130: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
6131: PetscViewerFlush(pcbddc->dbg_viewer);
6132: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
6133: }
6135: /* free unneeded index sets */
6136: if (!pcbddc->use_vertices) {
6137: ISDestroy(&ISForVertices);
6138: }
6139: if (!pcbddc->use_edges) {
6140: for (i=0;i<n_ISForEdges;i++) {
6141: ISDestroy(&ISForEdges[i]);
6142: }
6143: PetscFree(ISForEdges);
6144: n_ISForEdges = 0;
6145: }
6146: if (!pcbddc->use_faces) {
6147: for (i=0;i<n_ISForFaces;i++) {
6148: ISDestroy(&ISForFaces[i]);
6149: }
6150: PetscFree(ISForFaces);
6151: n_ISForFaces = 0;
6152: }
6154: /* check if near null space is attached to global mat */
6155: if (pcbddc->use_nnsp) {
6156: MatGetNearNullSpace(pc->pmat,&nearnullsp);
6157: } else nearnullsp = NULL;
6159: if (nearnullsp) {
6160: MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
6161: /* remove any stored info */
6162: MatNullSpaceDestroy(&pcbddc->onearnullspace);
6163: PetscFree(pcbddc->onearnullvecs_state);
6164: /* store information for BDDC solver reuse */
6165: PetscObjectReference((PetscObject)nearnullsp);
6166: pcbddc->onearnullspace = nearnullsp;
6167: PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
6168: for (i=0;i<nnsp_size;i++) {
6169: PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
6170: }
6171: } else { /* if near null space is not provided BDDC uses constants by default */
6172: nnsp_size = 0;
6173: nnsp_has_cnst = PETSC_TRUE;
6174: }
6175: /* get max number of constraints on a single cc */
6176: max_constraints = nnsp_size;
6177: if (nnsp_has_cnst) max_constraints++;
6179: /*
6180: Evaluate maximum storage size needed by the procedure
6181: - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6182: - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6183: There can be multiple constraints per connected component
6184: */
6185: n_vertices = 0;
6186: if (ISForVertices) {
6187: ISGetSize(ISForVertices,&n_vertices);
6188: }
6189: ncc = n_vertices+n_ISForFaces+n_ISForEdges;
6190: PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);
6192: total_counts = n_ISForFaces+n_ISForEdges;
6193: total_counts *= max_constraints;
6194: total_counts += n_vertices;
6195: PetscBTCreate(total_counts,&change_basis);
6197: total_counts = 0;
6198: max_size_of_constraint = 0;
6199: for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
6200: IS used_is;
6201: if (i<n_ISForEdges) {
6202: used_is = ISForEdges[i];
6203: } else {
6204: used_is = ISForFaces[i-n_ISForEdges];
6205: }
6206: ISGetSize(used_is,&j);
6207: total_counts += j;
6208: max_size_of_constraint = PetscMax(j,max_size_of_constraint);
6209: }
6210: PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);
6212: /* get local part of global near null space vectors */
6213: PetscMalloc1(nnsp_size,&localnearnullsp);
6214: for (k=0;k<nnsp_size;k++) {
6215: VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
6216: VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6217: VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6218: }
6220: /* whether or not to skip lapack calls */
6221: skip_lapack = PETSC_TRUE;
6222: if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6224: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6225: if (!skip_lapack) {
6226: PetscScalar temp_work;
6228: if (use_pod) {
6229: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6230: PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
6231: PetscMalloc1(max_constraints,&singular_vals);
6232: PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
6233: #if defined(PETSC_USE_COMPLEX)
6234: PetscMalloc1(3*max_constraints,&rwork);
6235: #endif
6236: /* now we evaluate the optimal workspace using query with lwork=-1 */
6237: PetscBLASIntCast(max_constraints,&Blas_N);
6238: PetscBLASIntCast(max_constraints,&Blas_LDA);
6239: lwork = -1;
6240: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6241: #if !defined(PETSC_USE_COMPLEX)
6242: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
6243: #else
6244: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
6245: #endif
6246: PetscFPTrapPop();
6247: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
6248: } else {
6249: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6250: /* SVD */
6251: PetscInt max_n,min_n;
6252: max_n = max_size_of_constraint;
6253: min_n = max_constraints;
6254: if (max_size_of_constraint < max_constraints) {
6255: min_n = max_size_of_constraint;
6256: max_n = max_constraints;
6257: }
6258: PetscMalloc1(min_n,&singular_vals);
6259: #if defined(PETSC_USE_COMPLEX)
6260: PetscMalloc1(5*min_n,&rwork);
6261: #endif
6262: /* now we evaluate the optimal workspace using query with lwork=-1 */
6263: lwork = -1;
6264: PetscBLASIntCast(max_n,&Blas_M);
6265: PetscBLASIntCast(min_n,&Blas_N);
6266: PetscBLASIntCast(max_n,&Blas_LDA);
6267: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6268: #if !defined(PETSC_USE_COMPLEX)
6269: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
6270: #else
6271: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
6272: #endif
6273: PetscFPTrapPop();
6274: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
6275: #else
6276: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen");
6277: #endif /* on missing GESVD */
6278: }
6279: /* Allocate optimal workspace */
6280: PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
6281: PetscMalloc1(lwork,&work);
6282: }
6283: /* Now we can loop on constraining sets */
6284: total_counts = 0;
6285: constraints_idxs_ptr[0] = 0;
6286: constraints_data_ptr[0] = 0;
6287: /* vertices */
6288: if (n_vertices) {
6289: ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
6290: PetscArraycpy(constraints_idxs,is_indices,n_vertices);
6291: for (i=0;i<n_vertices;i++) {
6292: constraints_n[total_counts] = 1;
6293: constraints_data[total_counts] = 1.0;
6294: constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
6295: constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
6296: total_counts++;
6297: }
6298: ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
6299: n_vertices = total_counts;
6300: }
6302: /* edges and faces */
6303: total_counts_cc = total_counts;
6304: for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
6305: IS used_is;
6306: PetscBool idxs_copied = PETSC_FALSE;
6308: if (ncc<n_ISForEdges) {
6309: used_is = ISForEdges[ncc];
6310: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6311: } else {
6312: used_is = ISForFaces[ncc-n_ISForEdges];
6313: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6314: }
6315: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6317: ISGetSize(used_is,&size_of_constraint);
6318: ISGetIndices(used_is,(const PetscInt**)&is_indices);
6319: /* change of basis should not be performed on local periodic nodes */
6320: if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6321: if (nnsp_has_cnst) {
6322: PetscScalar quad_value;
6324: PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);
6325: idxs_copied = PETSC_TRUE;
6327: if (!pcbddc->use_nnsp_true) {
6328: quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6329: } else {
6330: quad_value = 1.0;
6331: }
6332: for (j=0;j<size_of_constraint;j++) {
6333: constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6334: }
6335: temp_constraints++;
6336: total_counts++;
6337: }
6338: for (k=0;k<nnsp_size;k++) {
6339: PetscReal real_value;
6340: PetscScalar *ptr_to_data;
6342: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6343: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6344: for (j=0;j<size_of_constraint;j++) {
6345: ptr_to_data[j] = array[is_indices[j]];
6346: }
6347: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6348: /* check if array is null on the connected component */
6349: PetscBLASIntCast(size_of_constraint,&Blas_N);
6350: PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6351: if (real_value > tol*size_of_constraint) { /* keep indices and values */
6352: temp_constraints++;
6353: total_counts++;
6354: if (!idxs_copied) {
6355: PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);
6356: idxs_copied = PETSC_TRUE;
6357: }
6358: }
6359: }
6360: ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
6361: valid_constraints = temp_constraints;
6362: if (!pcbddc->use_nnsp_true && temp_constraints) {
6363: if (temp_constraints == 1) { /* just normalize the constraint */
6364: PetscScalar norm,*ptr_to_data;
6366: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6367: PetscBLASIntCast(size_of_constraint,&Blas_N);
6368: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6369: norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6370: PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6371: } else { /* perform SVD */
6372: PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6374: if (use_pod) {
6375: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6376: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6377: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6378: the constraints basis will differ (by a complex factor with absolute value equal to 1)
6379: from that computed using LAPACKgesvd
6380: -> This is due to a different computation of eigenvectors in LAPACKheev
6381: -> The quality of the POD-computed basis will be the same */
6382: PetscArrayzero(correlation_mat,temp_constraints*temp_constraints);
6383: /* Store upper triangular part of correlation matrix */
6384: PetscBLASIntCast(size_of_constraint,&Blas_N);
6385: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6386: for (j=0;j<temp_constraints;j++) {
6387: for (k=0;k<j+1;k++) {
6388: PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6389: }
6390: }
6391: /* compute eigenvalues and eigenvectors of correlation matrix */
6392: PetscBLASIntCast(temp_constraints,&Blas_N);
6393: PetscBLASIntCast(temp_constraints,&Blas_LDA);
6394: #if !defined(PETSC_USE_COMPLEX)
6395: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6396: #else
6397: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6398: #endif
6399: PetscFPTrapPop();
6400: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
6401: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6402: j = 0;
6403: while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++;
6404: total_counts = total_counts-j;
6405: valid_constraints = temp_constraints-j;
6406: /* scale and copy POD basis into used quadrature memory */
6407: PetscBLASIntCast(size_of_constraint,&Blas_M);
6408: PetscBLASIntCast(temp_constraints,&Blas_N);
6409: PetscBLASIntCast(temp_constraints,&Blas_K);
6410: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6411: PetscBLASIntCast(temp_constraints,&Blas_LDB);
6412: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6413: if (j<temp_constraints) {
6414: PetscInt ii;
6415: for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6416: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6417: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6418: PetscFPTrapPop();
6419: for (k=0;k<temp_constraints-j;k++) {
6420: for (ii=0;ii<size_of_constraint;ii++) {
6421: ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6422: }
6423: }
6424: }
6425: } else {
6426: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6427: PetscBLASIntCast(size_of_constraint,&Blas_M);
6428: PetscBLASIntCast(temp_constraints,&Blas_N);
6429: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6430: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6431: #if !defined(PETSC_USE_COMPLEX)
6432: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6433: #else
6434: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6435: #endif
6436: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
6437: PetscFPTrapPop();
6438: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6439: k = temp_constraints;
6440: if (k > size_of_constraint) k = size_of_constraint;
6441: j = 0;
6442: while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++;
6443: valid_constraints = k-j;
6444: total_counts = total_counts-temp_constraints+valid_constraints;
6445: #else
6446: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen");
6447: #endif /* on missing GESVD */
6448: }
6449: }
6450: }
6451: /* update pointers information */
6452: if (valid_constraints) {
6453: constraints_n[total_counts_cc] = valid_constraints;
6454: constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6455: constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6456: /* set change_of_basis flag */
6457: if (boolforchange) {
6458: PetscBTSet(change_basis,total_counts_cc);
6459: }
6460: total_counts_cc++;
6461: }
6462: }
6463: /* free workspace */
6464: if (!skip_lapack) {
6465: PetscFree(work);
6466: #if defined(PETSC_USE_COMPLEX)
6467: PetscFree(rwork);
6468: #endif
6469: PetscFree(singular_vals);
6470: PetscFree(correlation_mat);
6471: PetscFree(temp_basis);
6472: }
6473: for (k=0;k<nnsp_size;k++) {
6474: VecDestroy(&localnearnullsp[k]);
6475: }
6476: PetscFree(localnearnullsp);
6477: /* free index sets of faces, edges and vertices */
6478: for (i=0;i<n_ISForFaces;i++) {
6479: ISDestroy(&ISForFaces[i]);
6480: }
6481: if (n_ISForFaces) {
6482: PetscFree(ISForFaces);
6483: }
6484: for (i=0;i<n_ISForEdges;i++) {
6485: ISDestroy(&ISForEdges[i]);
6486: }
6487: if (n_ISForEdges) {
6488: PetscFree(ISForEdges);
6489: }
6490: ISDestroy(&ISForVertices);
6491: } else {
6492: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6494: total_counts = 0;
6495: n_vertices = 0;
6496: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6497: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
6498: }
6499: max_constraints = 0;
6500: total_counts_cc = 0;
6501: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6502: total_counts += pcbddc->adaptive_constraints_n[i];
6503: if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6504: max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6505: }
6506: constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6507: constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6508: constraints_idxs = pcbddc->adaptive_constraints_idxs;
6509: constraints_data = pcbddc->adaptive_constraints_data;
6510: /* constraints_n differs from pcbddc->adaptive_constraints_n */
6511: PetscMalloc1(total_counts_cc,&constraints_n);
6512: total_counts_cc = 0;
6513: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6514: if (pcbddc->adaptive_constraints_n[i]) {
6515: constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6516: }
6517: }
6519: max_size_of_constraint = 0;
6520: for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6521: PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
6522: /* Change of basis */
6523: PetscBTCreate(total_counts_cc,&change_basis);
6524: if (pcbddc->use_change_of_basis) {
6525: for (i=0;i<sub_schurs->n_subs;i++) {
6526: if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6527: PetscBTSet(change_basis,i+n_vertices);
6528: }
6529: }
6530: }
6531: }
6532: pcbddc->local_primal_size = total_counts;
6533: PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);
6535: /* map constraints_idxs in boundary numbering */
6536: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
6537: if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i);
6539: /* Create constraint matrix */
6540: MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
6541: MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
6542: MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);
6544: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6545: /* determine if a QR strategy is needed for change of basis */
6546: qr_needed = pcbddc->use_qr_single;
6547: PetscBTCreate(total_counts_cc,&qr_needed_idx);
6548: total_primal_vertices=0;
6549: pcbddc->local_primal_size_cc = 0;
6550: for (i=0;i<total_counts_cc;i++) {
6551: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6552: if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6553: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6554: pcbddc->local_primal_size_cc += 1;
6555: } else if (PetscBTLookup(change_basis,i)) {
6556: for (k=0;k<constraints_n[i];k++) {
6557: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6558: }
6559: pcbddc->local_primal_size_cc += constraints_n[i];
6560: if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6561: PetscBTSet(qr_needed_idx,i);
6562: qr_needed = PETSC_TRUE;
6563: }
6564: } else {
6565: pcbddc->local_primal_size_cc += 1;
6566: }
6567: }
6568: /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6569: pcbddc->n_vertices = total_primal_vertices;
6570: /* permute indices in order to have a sorted set of vertices */
6571: PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
6572: PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
6573: PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices);
6574: for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
6576: /* nonzero structure of constraint matrix */
6577: /* and get reference dof for local constraints */
6578: PetscMalloc1(pcbddc->local_primal_size,&nnz);
6579: for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
6581: j = total_primal_vertices;
6582: total_counts = total_primal_vertices;
6583: cum = total_primal_vertices;
6584: for (i=n_vertices;i<total_counts_cc;i++) {
6585: if (!PetscBTLookup(change_basis,i)) {
6586: pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6587: pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6588: cum++;
6589: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6590: for (k=0;k<constraints_n[i];k++) {
6591: pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6592: nnz[j+k] = size_of_constraint;
6593: }
6594: j += constraints_n[i];
6595: }
6596: }
6597: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
6598: MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6599: PetscFree(nnz);
6601: /* set values in constraint matrix */
6602: for (i=0;i<total_primal_vertices;i++) {
6603: MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
6604: }
6605: total_counts = total_primal_vertices;
6606: for (i=n_vertices;i<total_counts_cc;i++) {
6607: if (!PetscBTLookup(change_basis,i)) {
6608: PetscInt *cols;
6610: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6611: cols = constraints_idxs+constraints_idxs_ptr[i];
6612: for (k=0;k<constraints_n[i];k++) {
6613: PetscInt row = total_counts+k;
6614: PetscScalar *vals;
6616: vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6617: MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6618: }
6619: total_counts += constraints_n[i];
6620: }
6621: }
6622: /* assembling */
6623: MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6624: MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6625: MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view");
6627: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6628: if (pcbddc->use_change_of_basis) {
6629: /* dual and primal dofs on a single cc */
6630: PetscInt dual_dofs,primal_dofs;
6631: /* working stuff for GEQRF */
6632: PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t;
6633: PetscBLASInt lqr_work;
6634: /* working stuff for UNGQR */
6635: PetscScalar *gqr_work = NULL,lgqr_work_t=0.0;
6636: PetscBLASInt lgqr_work;
6637: /* working stuff for TRTRS */
6638: PetscScalar *trs_rhs = NULL;
6639: PetscBLASInt Blas_NRHS;
6640: /* pointers for values insertion into change of basis matrix */
6641: PetscInt *start_rows,*start_cols;
6642: PetscScalar *start_vals;
6643: /* working stuff for values insertion */
6644: PetscBT is_primal;
6645: PetscInt *aux_primal_numbering_B;
6646: /* matrix sizes */
6647: PetscInt global_size,local_size;
6648: /* temporary change of basis */
6649: Mat localChangeOfBasisMatrix;
6650: /* extra space for debugging */
6651: PetscScalar *dbg_work = NULL;
6653: /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6654: MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6655: MatSetType(localChangeOfBasisMatrix,MATAIJ);
6656: MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6657: /* nonzeros for local mat */
6658: PetscMalloc1(pcis->n,&nnz);
6659: if (!pcbddc->benign_change || pcbddc->fake_change) {
6660: for (i=0;i<pcis->n;i++) nnz[i]=1;
6661: } else {
6662: const PetscInt *ii;
6663: PetscInt n;
6664: PetscBool flg_row;
6665: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6666: for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6667: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6668: }
6669: for (i=n_vertices;i<total_counts_cc;i++) {
6670: if (PetscBTLookup(change_basis,i)) {
6671: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6672: if (PetscBTLookup(qr_needed_idx,i)) {
6673: for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6674: } else {
6675: nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6676: for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6677: }
6678: }
6679: }
6680: MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6681: MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6682: PetscFree(nnz);
6683: /* Set interior change in the matrix */
6684: if (!pcbddc->benign_change || pcbddc->fake_change) {
6685: for (i=0;i<pcis->n;i++) {
6686: MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6687: }
6688: } else {
6689: const PetscInt *ii,*jj;
6690: PetscScalar *aa;
6691: PetscInt n;
6692: PetscBool flg_row;
6693: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6694: MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6695: for (i=0;i<n;i++) {
6696: MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6697: }
6698: MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6699: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6700: }
6702: if (pcbddc->dbg_flag) {
6703: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6704: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6705: }
6707: /* Now we loop on the constraints which need a change of basis */
6708: /*
6709: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6710: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
6712: Basic blocks of change of basis matrix T computed by
6714: - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
6716: | 1 0 ... 0 s_1/S |
6717: | 0 1 ... 0 s_2/S |
6718: | ... |
6719: | 0 ... 1 s_{n-1}/S |
6720: | -s_1/s_n ... -s_{n-1}/s_n s_n/S |
6722: with S = \sum_{i=1}^n s_i^2
6723: NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6724: in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
6726: - QR decomposition of constraints otherwise
6727: */
6728: if (qr_needed && max_size_of_constraint) {
6729: /* space to store Q */
6730: PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6731: /* array to store scaling factors for reflectors */
6732: PetscMalloc1(max_constraints,&qr_tau);
6733: /* first we issue queries for optimal work */
6734: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6735: PetscBLASIntCast(max_constraints,&Blas_N);
6736: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6737: lqr_work = -1;
6738: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6739: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6740: PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6741: PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6742: lgqr_work = -1;
6743: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6744: PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6745: PetscBLASIntCast(max_constraints,&Blas_K);
6746: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6747: if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6748: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6749: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr);
6750: PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6751: PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6752: /* array to store rhs and solution of triangular solver */
6753: PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6754: /* allocating workspace for check */
6755: if (pcbddc->dbg_flag) {
6756: PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6757: }
6758: }
6759: /* array to store whether a node is primal or not */
6760: PetscBTCreate(pcis->n_B,&is_primal);
6761: PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6762: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6763: if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i);
6764: for (i=0;i<total_primal_vertices;i++) {
6765: PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6766: }
6767: PetscFree(aux_primal_numbering_B);
6769: /* loop on constraints and see whether or not they need a change of basis and compute it */
6770: for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6771: size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6772: if (PetscBTLookup(change_basis,total_counts)) {
6773: /* get constraint info */
6774: primal_dofs = constraints_n[total_counts];
6775: dual_dofs = size_of_constraint-primal_dofs;
6777: if (pcbddc->dbg_flag) {
6778: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);
6779: }
6781: if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
6783: /* copy quadrature constraints for change of basis check */
6784: if (pcbddc->dbg_flag) {
6785: PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6786: }
6787: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6788: PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6790: /* compute QR decomposition of constraints */
6791: PetscBLASIntCast(size_of_constraint,&Blas_M);
6792: PetscBLASIntCast(primal_dofs,&Blas_N);
6793: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6794: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6795: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6796: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6797: PetscFPTrapPop();
6799: /* explicitly compute R^-T */
6800: PetscArrayzero(trs_rhs,primal_dofs*primal_dofs);
6801: for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6802: PetscBLASIntCast(primal_dofs,&Blas_N);
6803: PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6804: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6805: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6806: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6807: PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6808: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6809: PetscFPTrapPop();
6811: /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
6812: PetscBLASIntCast(size_of_constraint,&Blas_M);
6813: PetscBLASIntCast(size_of_constraint,&Blas_N);
6814: PetscBLASIntCast(primal_dofs,&Blas_K);
6815: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6816: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6817: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6818: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr);
6819: PetscFPTrapPop();
6821: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6822: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6823: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6824: PetscBLASIntCast(size_of_constraint,&Blas_M);
6825: PetscBLASIntCast(primal_dofs,&Blas_N);
6826: PetscBLASIntCast(primal_dofs,&Blas_K);
6827: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6828: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6829: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6830: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6831: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6832: PetscFPTrapPop();
6833: PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6835: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6836: start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6837: /* insert cols for primal dofs */
6838: for (j=0;j<primal_dofs;j++) {
6839: start_vals = &qr_basis[j*size_of_constraint];
6840: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6841: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6842: }
6843: /* insert cols for dual dofs */
6844: for (j=0,k=0;j<dual_dofs;k++) {
6845: if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6846: start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6847: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6848: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6849: j++;
6850: }
6851: }
6853: /* check change of basis */
6854: if (pcbddc->dbg_flag) {
6855: PetscInt ii,jj;
6856: PetscBool valid_qr=PETSC_TRUE;
6857: PetscBLASIntCast(primal_dofs,&Blas_M);
6858: PetscBLASIntCast(size_of_constraint,&Blas_N);
6859: PetscBLASIntCast(size_of_constraint,&Blas_K);
6860: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6861: PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6862: PetscBLASIntCast(primal_dofs,&Blas_LDC);
6863: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6864: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6865: PetscFPTrapPop();
6866: for (jj=0;jj<size_of_constraint;jj++) {
6867: for (ii=0;ii<primal_dofs;ii++) {
6868: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6869: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6870: }
6871: }
6872: if (!valid_qr) {
6873: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6874: for (jj=0;jj<size_of_constraint;jj++) {
6875: for (ii=0;ii<primal_dofs;ii++) {
6876: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6877: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6878: }
6879: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6880: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6881: }
6882: }
6883: }
6884: } else {
6885: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6886: }
6887: }
6888: } else { /* simple transformation block */
6889: PetscInt row,col;
6890: PetscScalar val,norm;
6892: PetscBLASIntCast(size_of_constraint,&Blas_N);
6893: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6894: for (j=0;j<size_of_constraint;j++) {
6895: PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6896: row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6897: if (!PetscBTLookup(is_primal,row_B)) {
6898: col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6899: MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6900: MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6901: } else {
6902: for (k=0;k<size_of_constraint;k++) {
6903: col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6904: if (row != col) {
6905: val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6906: } else {
6907: val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6908: }
6909: MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6910: }
6911: }
6912: }
6913: if (pcbddc->dbg_flag) {
6914: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6915: }
6916: }
6917: } else {
6918: if (pcbddc->dbg_flag) {
6919: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);
6920: }
6921: }
6922: }
6924: /* free workspace */
6925: if (qr_needed) {
6926: if (pcbddc->dbg_flag) {
6927: PetscFree(dbg_work);
6928: }
6929: PetscFree(trs_rhs);
6930: PetscFree(qr_tau);
6931: PetscFree(qr_work);
6932: PetscFree(gqr_work);
6933: PetscFree(qr_basis);
6934: }
6935: PetscBTDestroy(&is_primal);
6936: MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6937: MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6939: /* assembling of global change of variable */
6940: if (!pcbddc->fake_change) {
6941: Mat tmat;
6942: PetscInt bs;
6944: VecGetSize(pcis->vec1_global,&global_size);
6945: VecGetLocalSize(pcis->vec1_global,&local_size);
6946: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6947: MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6948: MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);
6949: MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);
6950: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6951: MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6952: MatGetBlockSize(pc->pmat,&bs);
6953: MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6954: MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6955: MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6956: MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6957: MatDestroy(&tmat);
6958: VecSet(pcis->vec1_global,0.0);
6959: VecSet(pcis->vec1_N,1.0);
6960: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6961: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6962: VecReciprocal(pcis->vec1_global);
6963: MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);
6965: /* check */
6966: if (pcbddc->dbg_flag) {
6967: PetscReal error;
6968: Vec x,x_change;
6970: VecDuplicate(pcis->vec1_global,&x);
6971: VecDuplicate(pcis->vec1_global,&x_change);
6972: VecSetRandom(x,NULL);
6973: VecCopy(x,pcis->vec1_global);
6974: VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6975: VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6976: MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6977: VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6978: VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6979: MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6980: VecAXPY(x,-1.0,x_change);
6981: VecNorm(x,NORM_INFINITY,&error);
6982: if (error > PETSC_SMALL) {
6983: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
6984: }
6985: VecDestroy(&x);
6986: VecDestroy(&x_change);
6987: }
6988: /* adapt sub_schurs computed (if any) */
6989: if (pcbddc->use_deluxe_scaling) {
6990: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
6992: if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6993: if (sub_schurs && sub_schurs->S_Ej_all) {
6994: Mat S_new,tmat;
6995: IS is_all_N,is_V_Sall = NULL;
6997: ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6998: MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6999: if (pcbddc->deluxe_zerorows) {
7000: ISLocalToGlobalMapping NtoSall;
7001: IS is_V;
7002: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
7003: ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
7004: ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
7005: ISLocalToGlobalMappingDestroy(&NtoSall);
7006: ISDestroy(&is_V);
7007: }
7008: ISDestroy(&is_all_N);
7009: MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
7010: MatDestroy(&sub_schurs->S_Ej_all);
7011: PetscObjectReference((PetscObject)S_new);
7012: if (pcbddc->deluxe_zerorows) {
7013: const PetscScalar *array;
7014: const PetscInt *idxs_V,*idxs_all;
7015: PetscInt i,n_V;
7017: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
7018: ISGetLocalSize(is_V_Sall,&n_V);
7019: ISGetIndices(is_V_Sall,&idxs_V);
7020: ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
7021: VecGetArrayRead(pcis->D,&array);
7022: for (i=0;i<n_V;i++) {
7023: PetscScalar val;
7024: PetscInt idx;
7026: idx = idxs_V[i];
7027: val = array[idxs_all[idxs_V[i]]];
7028: MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
7029: }
7030: MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
7031: MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
7032: VecRestoreArrayRead(pcis->D,&array);
7033: ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
7034: ISRestoreIndices(is_V_Sall,&idxs_V);
7035: }
7036: sub_schurs->S_Ej_all = S_new;
7037: MatDestroy(&S_new);
7038: if (sub_schurs->sum_S_Ej_all) {
7039: MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
7040: MatDestroy(&sub_schurs->sum_S_Ej_all);
7041: PetscObjectReference((PetscObject)S_new);
7042: if (pcbddc->deluxe_zerorows) {
7043: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
7044: }
7045: sub_schurs->sum_S_Ej_all = S_new;
7046: MatDestroy(&S_new);
7047: }
7048: ISDestroy(&is_V_Sall);
7049: MatDestroy(&tmat);
7050: }
7051: /* destroy any change of basis context in sub_schurs */
7052: if (sub_schurs && sub_schurs->change) {
7053: PetscInt i;
7055: for (i=0;i<sub_schurs->n_subs;i++) {
7056: KSPDestroy(&sub_schurs->change[i]);
7057: }
7058: PetscFree(sub_schurs->change);
7059: }
7060: }
7061: if (pcbddc->switch_static) { /* need to save the local change */
7062: pcbddc->switch_static_change = localChangeOfBasisMatrix;
7063: } else {
7064: MatDestroy(&localChangeOfBasisMatrix);
7065: }
7066: /* determine if any process has changed the pressures locally */
7067: pcbddc->change_interior = pcbddc->benign_have_null;
7068: } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7069: MatDestroy(&pcbddc->ConstraintMatrix);
7070: pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7071: pcbddc->use_qr_single = qr_needed;
7072: }
7073: } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7074: if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7075: PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
7076: pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7077: } else {
7078: Mat benign_global = NULL;
7079: if (pcbddc->benign_have_null) {
7080: Mat M;
7082: pcbddc->change_interior = PETSC_TRUE;
7083: VecCopy(matis->counter,pcis->vec1_N);
7084: VecReciprocal(pcis->vec1_N);
7085: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);
7086: if (pcbddc->benign_change) {
7087: MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
7088: MatDiagonalScale(M,pcis->vec1_N,NULL);
7089: } else {
7090: MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);
7091: MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);
7092: }
7093: MatISSetLocalMat(benign_global,M);
7094: MatDestroy(&M);
7095: MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);
7096: MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);
7097: }
7098: if (pcbddc->user_ChangeOfBasisMatrix) {
7099: MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
7100: MatDestroy(&benign_global);
7101: } else if (pcbddc->benign_have_null) {
7102: pcbddc->ChangeOfBasisMatrix = benign_global;
7103: }
7104: }
7105: if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7106: IS is_global;
7107: const PetscInt *gidxs;
7109: ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
7110: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
7111: ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
7112: MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
7113: ISDestroy(&is_global);
7114: }
7115: }
7116: if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
7117: VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
7118: }
7120: if (!pcbddc->fake_change) {
7121: /* add pressure dofs to set of primal nodes for numbering purposes */
7122: for (i=0;i<pcbddc->benign_n;i++) {
7123: pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
7124: pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7125: pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
7126: pcbddc->local_primal_size_cc++;
7127: pcbddc->local_primal_size++;
7128: }
7130: /* check if a new primal space has been introduced (also take into account benign trick) */
7131: pcbddc->new_primal_space_local = PETSC_TRUE;
7132: if (olocal_primal_size == pcbddc->local_primal_size) {
7133: PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);
7134: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7135: if (!pcbddc->new_primal_space_local) {
7136: PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);
7137: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7138: }
7139: }
7140: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7141: MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
7142: }
7143: PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);
7145: /* flush dbg viewer */
7146: if (pcbddc->dbg_flag) {
7147: PetscViewerFlush(pcbddc->dbg_viewer);
7148: }
7150: /* free workspace */
7151: PetscBTDestroy(&qr_needed_idx);
7152: PetscBTDestroy(&change_basis);
7153: if (!pcbddc->adaptive_selection) {
7154: PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
7155: PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
7156: } else {
7157: PetscFree5(pcbddc->adaptive_constraints_n,
7158: pcbddc->adaptive_constraints_idxs_ptr,
7159: pcbddc->adaptive_constraints_data_ptr,
7160: pcbddc->adaptive_constraints_idxs,
7161: pcbddc->adaptive_constraints_data);
7162: PetscFree(constraints_n);
7163: PetscFree(constraints_idxs_B);
7164: }
7165: return(0);
7166: }
7168: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7169: {
7170: ISLocalToGlobalMapping map;
7171: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
7172: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
7173: PetscInt i,N;
7174: PetscBool rcsr = PETSC_FALSE;
7175: PetscErrorCode ierr;
7178: if (pcbddc->recompute_topography) {
7179: pcbddc->graphanalyzed = PETSC_FALSE;
7180: /* Reset previously computed graph */
7181: PCBDDCGraphReset(pcbddc->mat_graph);
7182: /* Init local Graph struct */
7183: MatGetSize(pc->pmat,&N,NULL);
7184: MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
7185: PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);
7187: if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
7188: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
7189: }
7190: /* Check validity of the csr graph passed in by the user */
7191: if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
7193: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7194: if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7195: PetscInt *xadj,*adjncy;
7196: PetscInt nvtxs;
7197: PetscBool flg_row=PETSC_FALSE;
7199: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7200: if (flg_row) {
7201: PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
7202: pcbddc->computed_rowadj = PETSC_TRUE;
7203: }
7204: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7205: rcsr = PETSC_TRUE;
7206: }
7207: if (pcbddc->dbg_flag) {
7208: PetscViewerFlush(pcbddc->dbg_viewer);
7209: }
7211: if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7212: PetscReal *lcoords;
7213: PetscInt n;
7214: MPI_Datatype dimrealtype;
7216: /* TODO: support for blocked */
7217: if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n);
7218: MatGetLocalSize(matis->A,&n,NULL);
7219: PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);
7220: MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);
7221: MPI_Type_commit(&dimrealtype);
7222: PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE);
7223: PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords,MPI_REPLACE);
7224: MPI_Type_free(&dimrealtype);
7225: PetscFree(pcbddc->mat_graph->coords);
7227: pcbddc->mat_graph->coords = lcoords;
7228: pcbddc->mat_graph->cloc = PETSC_TRUE;
7229: pcbddc->mat_graph->cnloc = n;
7230: }
7231: if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs);
7232: pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
7234: /* Setup of Graph */
7235: pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
7236: PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);
7238: /* attach info on disconnected subdomains if present */
7239: if (pcbddc->n_local_subs) {
7240: PetscInt *local_subs,n,totn;
7242: MatGetLocalSize(matis->A,&n,NULL);
7243: PetscMalloc1(n,&local_subs);
7244: for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs;
7245: for (i=0;i<pcbddc->n_local_subs;i++) {
7246: const PetscInt *idxs;
7247: PetscInt nl,j;
7249: ISGetLocalSize(pcbddc->local_subs[i],&nl);
7250: ISGetIndices(pcbddc->local_subs[i],&idxs);
7251: for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
7252: ISRestoreIndices(pcbddc->local_subs[i],&idxs);
7253: }
7254: for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]);
7255: pcbddc->mat_graph->n_local_subs = totn + 1;
7256: pcbddc->mat_graph->local_subs = local_subs;
7257: }
7258: }
7260: if (!pcbddc->graphanalyzed) {
7261: /* Graph's connected components analysis */
7262: PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
7263: pcbddc->graphanalyzed = PETSC_TRUE;
7264: pcbddc->corner_selected = pcbddc->corner_selection;
7265: }
7266: if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7267: return(0);
7268: }
7270: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7271: {
7272: PetscInt i,j,n;
7273: PetscScalar *alphas;
7274: PetscReal norm,*onorms;
7278: n = *nio;
7279: if (!n) return(0);
7280: PetscMalloc2(n,&alphas,n,&onorms);
7281: VecNormalize(vecs[0],&norm);
7282: if (norm < PETSC_SMALL) {
7283: onorms[0] = 0.0;
7284: VecSet(vecs[0],0.0);
7285: } else {
7286: onorms[0] = norm;
7287: }
7289: for (i=1;i<n;i++) {
7290: VecMDot(vecs[i],i,vecs,alphas);
7291: for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
7292: VecMAXPY(vecs[i],i,alphas,vecs);
7293: VecNormalize(vecs[i],&norm);
7294: if (norm < PETSC_SMALL) {
7295: onorms[i] = 0.0;
7296: VecSet(vecs[i],0.0);
7297: } else {
7298: onorms[i] = norm;
7299: }
7300: }
7301: /* push nonzero vectors at the beginning */
7302: for (i=0;i<n;i++) {
7303: if (onorms[i] == 0.0) {
7304: for (j=i+1;j<n;j++) {
7305: if (onorms[j] != 0.0) {
7306: VecCopy(vecs[j],vecs[i]);
7307: onorms[j] = 0.0;
7308: }
7309: }
7310: }
7311: }
7312: for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7313: PetscFree2(alphas,onorms);
7314: return(0);
7315: }
7317: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
7318: {
7319: Mat A;
7320: PetscInt n_neighs,*neighs,*n_shared,**shared;
7321: PetscMPIInt size,rank,color;
7322: PetscInt *xadj,*adjncy;
7323: PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
7324: PetscInt im_active,active_procs,N,n,i,j,threshold = 2;
7325: PetscInt void_procs,*procs_candidates = NULL;
7326: PetscInt xadj_count,*count;
7327: PetscBool ismatis,use_vwgt=PETSC_FALSE;
7328: PetscSubcomm psubcomm;
7329: MPI_Comm subcomm;
7334: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7335: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7338: if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains);
7340: if (have_void) *have_void = PETSC_FALSE;
7341: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
7342: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
7343: MatISGetLocalMat(mat,&A);
7344: MatGetLocalSize(A,&n,NULL);
7345: im_active = !!n;
7346: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
7347: void_procs = size - active_procs;
7348: /* get ranks of of non-active processes in mat communicator */
7349: if (void_procs) {
7350: PetscInt ncand;
7352: if (have_void) *have_void = PETSC_TRUE;
7353: PetscMalloc1(size,&procs_candidates);
7354: MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
7355: for (i=0,ncand=0;i<size;i++) {
7356: if (!procs_candidates[i]) {
7357: procs_candidates[ncand++] = i;
7358: }
7359: }
7360: /* force n_subdomains to be not greater that the number of non-active processes */
7361: *n_subdomains = PetscMin(void_procs,*n_subdomains);
7362: }
7364: /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7365: number of subdomains requested 1 -> send to rank-0 or first candidate in voids */
7366: MatGetSize(mat,&N,NULL);
7367: if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7368: PetscInt issize,isidx,dest;
7369: if (*n_subdomains == 1) dest = 0;
7370: else dest = rank;
7371: if (im_active) {
7372: issize = 1;
7373: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7374: isidx = procs_candidates[dest];
7375: } else {
7376: isidx = dest;
7377: }
7378: } else {
7379: issize = 0;
7380: isidx = -1;
7381: }
7382: if (*n_subdomains != 1) *n_subdomains = active_procs;
7383: ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
7384: PetscFree(procs_candidates);
7385: return(0);
7386: }
7387: PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
7388: PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
7389: threshold = PetscMax(threshold,2);
7391: /* Get info on mapping */
7392: ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7394: /* build local CSR graph of subdomains' connectivity */
7395: PetscMalloc1(2,&xadj);
7396: xadj[0] = 0;
7397: xadj[1] = PetscMax(n_neighs-1,0);
7398: PetscMalloc1(xadj[1],&adjncy);
7399: PetscMalloc1(xadj[1],&adjncy_wgt);
7400: PetscCalloc1(n,&count);
7401: for (i=1;i<n_neighs;i++)
7402: for (j=0;j<n_shared[i];j++)
7403: count[shared[i][j]] += 1;
7405: xadj_count = 0;
7406: for (i=1;i<n_neighs;i++) {
7407: for (j=0;j<n_shared[i];j++) {
7408: if (count[shared[i][j]] < threshold) {
7409: adjncy[xadj_count] = neighs[i];
7410: adjncy_wgt[xadj_count] = n_shared[i];
7411: xadj_count++;
7412: break;
7413: }
7414: }
7415: }
7416: xadj[1] = xadj_count;
7417: PetscFree(count);
7418: ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7419: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7421: PetscMalloc1(1,&ranks_send_to_idx);
7423: /* Restrict work on active processes only */
7424: PetscMPIIntCast(im_active,&color);
7425: if (void_procs) {
7426: PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
7427: PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
7428: PetscSubcommSetTypeGeneral(psubcomm,color,rank);
7429: subcomm = PetscSubcommChild(psubcomm);
7430: } else {
7431: psubcomm = NULL;
7432: subcomm = PetscObjectComm((PetscObject)mat);
7433: }
7435: v_wgt = NULL;
7436: if (!color) {
7437: PetscFree(xadj);
7438: PetscFree(adjncy);
7439: PetscFree(adjncy_wgt);
7440: } else {
7441: Mat subdomain_adj;
7442: IS new_ranks,new_ranks_contig;
7443: MatPartitioning partitioner;
7444: PetscInt rstart=0,rend=0;
7445: PetscInt *is_indices,*oldranks;
7446: PetscMPIInt size;
7447: PetscBool aggregate;
7449: MPI_Comm_size(subcomm,&size);
7450: if (void_procs) {
7451: PetscInt prank = rank;
7452: PetscMalloc1(size,&oldranks);
7453: MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
7454: for (i=0;i<xadj[1];i++) {
7455: PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
7456: }
7457: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7458: } else {
7459: oldranks = NULL;
7460: }
7461: aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7462: if (aggregate) { /* TODO: all this part could be made more efficient */
7463: PetscInt lrows,row,ncols,*cols;
7464: PetscMPIInt nrank;
7465: PetscScalar *vals;
7467: MPI_Comm_rank(subcomm,&nrank);
7468: lrows = 0;
7469: if (nrank<redprocs) {
7470: lrows = size/redprocs;
7471: if (nrank<size%redprocs) lrows++;
7472: }
7473: MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
7474: MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
7475: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
7476: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
7477: row = nrank;
7478: ncols = xadj[1]-xadj[0];
7479: cols = adjncy;
7480: PetscMalloc1(ncols,&vals);
7481: for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7482: MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
7483: MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
7484: MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
7485: PetscFree(xadj);
7486: PetscFree(adjncy);
7487: PetscFree(adjncy_wgt);
7488: PetscFree(vals);
7489: if (use_vwgt) {
7490: Vec v;
7491: const PetscScalar *array;
7492: PetscInt nl;
7494: MatCreateVecs(subdomain_adj,&v,NULL);
7495: VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
7496: VecAssemblyBegin(v);
7497: VecAssemblyEnd(v);
7498: VecGetLocalSize(v,&nl);
7499: VecGetArrayRead(v,&array);
7500: PetscMalloc1(nl,&v_wgt);
7501: for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7502: VecRestoreArrayRead(v,&array);
7503: VecDestroy(&v);
7504: }
7505: } else {
7506: MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
7507: if (use_vwgt) {
7508: PetscMalloc1(1,&v_wgt);
7509: v_wgt[0] = n;
7510: }
7511: }
7512: /* MatView(subdomain_adj,0); */
7514: /* Partition */
7515: MatPartitioningCreate(subcomm,&partitioner);
7516: #if defined(PETSC_HAVE_PTSCOTCH)
7517: MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);
7518: #elif defined(PETSC_HAVE_PARMETIS)
7519: MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);
7520: #else
7521: MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);
7522: #endif
7523: MatPartitioningSetAdjacency(partitioner,subdomain_adj);
7524: if (v_wgt) {
7525: MatPartitioningSetVertexWeights(partitioner,v_wgt);
7526: }
7527: *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7528: MatPartitioningSetNParts(partitioner,*n_subdomains);
7529: MatPartitioningSetFromOptions(partitioner);
7530: MatPartitioningApply(partitioner,&new_ranks);
7531: /* MatPartitioningView(partitioner,0); */
7533: /* renumber new_ranks to avoid "holes" in new set of processors */
7534: ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
7535: ISDestroy(&new_ranks);
7536: ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7537: if (!aggregate) {
7538: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7539: if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7540: ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7541: } else if (oldranks) {
7542: ranks_send_to_idx[0] = oldranks[is_indices[0]];
7543: } else {
7544: ranks_send_to_idx[0] = is_indices[0];
7545: }
7546: } else {
7547: PetscInt idx = 0;
7548: PetscMPIInt tag;
7549: MPI_Request *reqs;
7551: PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
7552: PetscMalloc1(rend-rstart,&reqs);
7553: for (i=rstart;i<rend;i++) {
7554: MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
7555: }
7556: MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
7557: MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
7558: PetscFree(reqs);
7559: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7560: if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7561: ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7562: } else if (oldranks) {
7563: ranks_send_to_idx[0] = oldranks[idx];
7564: } else {
7565: ranks_send_to_idx[0] = idx;
7566: }
7567: }
7568: ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7569: /* clean up */
7570: PetscFree(oldranks);
7571: ISDestroy(&new_ranks_contig);
7572: MatDestroy(&subdomain_adj);
7573: MatPartitioningDestroy(&partitioner);
7574: }
7575: PetscSubcommDestroy(&psubcomm);
7576: PetscFree(procs_candidates);
7578: /* assemble parallel IS for sends */
7579: i = 1;
7580: if (!color) i=0;
7581: ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
7582: return(0);
7583: }
7585: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
7587: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7588: {
7589: Mat local_mat;
7590: IS is_sends_internal;
7591: PetscInt rows,cols,new_local_rows;
7592: PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7593: PetscBool ismatis,isdense,newisdense,destroy_mat;
7594: ISLocalToGlobalMapping l2gmap;
7595: PetscInt* l2gmap_indices;
7596: const PetscInt* is_indices;
7597: MatType new_local_type;
7598: /* buffers */
7599: PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7600: PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7601: PetscInt *recv_buffer_idxs_local;
7602: PetscScalar *ptr_vals,*recv_buffer_vals;
7603: const PetscScalar *send_buffer_vals;
7604: PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7605: /* MPI */
7606: MPI_Comm comm,comm_n;
7607: PetscSubcomm subcomm;
7608: PetscMPIInt n_sends,n_recvs,size;
7609: PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7610: PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7611: PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7612: MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7613: MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
7614: PetscErrorCode ierr;
7618: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7619: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7626: if (nvecs) {
7627: if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
7629: }
7630: /* further checks */
7631: MatISGetLocalMat(mat,&local_mat);
7632: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
7633: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7634: MatGetSize(local_mat,&rows,&cols);
7635: if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
7636: if (reuse && *mat_n) {
7637: PetscInt mrows,mcols,mnrows,mncols;
7639: PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
7640: if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
7641: MatGetSize(mat,&mrows,&mcols);
7642: MatGetSize(*mat_n,&mnrows,&mncols);
7643: if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
7644: if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
7645: }
7646: MatGetBlockSize(local_mat,&bs);
7649: /* prepare IS for sending if not provided */
7650: if (!is_sends) {
7651: if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
7652: PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
7653: } else {
7654: PetscObjectReference((PetscObject)is_sends);
7655: is_sends_internal = is_sends;
7656: }
7658: /* get comm */
7659: PetscObjectGetComm((PetscObject)mat,&comm);
7661: /* compute number of sends */
7662: ISGetLocalSize(is_sends_internal,&i);
7663: PetscMPIIntCast(i,&n_sends);
7665: /* compute number of receives */
7666: MPI_Comm_size(comm,&size);
7667: PetscMalloc1(size,&iflags);
7668: PetscArrayzero(iflags,size);
7669: ISGetIndices(is_sends_internal,&is_indices);
7670: for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7671: PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7672: PetscFree(iflags);
7674: /* restrict comm if requested */
7675: subcomm = NULL;
7676: destroy_mat = PETSC_FALSE;
7677: if (restrict_comm) {
7678: PetscMPIInt color,subcommsize;
7680: color = 0;
7681: if (restrict_full) {
7682: if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7683: } else {
7684: if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7685: }
7686: MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7687: subcommsize = size - subcommsize;
7688: /* check if reuse has been requested */
7689: if (reuse) {
7690: if (*mat_n) {
7691: PetscMPIInt subcommsize2;
7692: MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7693: if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7694: comm_n = PetscObjectComm((PetscObject)*mat_n);
7695: } else {
7696: comm_n = PETSC_COMM_SELF;
7697: }
7698: } else { /* MAT_INITIAL_MATRIX */
7699: PetscMPIInt rank;
7701: MPI_Comm_rank(comm,&rank);
7702: PetscSubcommCreate(comm,&subcomm);
7703: PetscSubcommSetNumber(subcomm,2);
7704: PetscSubcommSetTypeGeneral(subcomm,color,rank);
7705: comm_n = PetscSubcommChild(subcomm);
7706: }
7707: /* flag to destroy *mat_n if not significative */
7708: if (color) destroy_mat = PETSC_TRUE;
7709: } else {
7710: comm_n = comm;
7711: }
7713: /* prepare send/receive buffers */
7714: PetscMalloc1(size,&ilengths_idxs);
7715: PetscArrayzero(ilengths_idxs,size);
7716: PetscMalloc1(size,&ilengths_vals);
7717: PetscArrayzero(ilengths_vals,size);
7718: if (nis) {
7719: PetscCalloc1(size,&ilengths_idxs_is);
7720: }
7722: /* Get data from local matrices */
7723: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7724: /* TODO: See below some guidelines on how to prepare the local buffers */
7725: /*
7726: send_buffer_vals should contain the raw values of the local matrix
7727: send_buffer_idxs should contain:
7728: - MatType_PRIVATE type
7729: - PetscInt size_of_l2gmap
7730: - PetscInt global_row_indices[size_of_l2gmap]
7731: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7732: */
7733: else {
7734: MatDenseGetArrayRead(local_mat,&send_buffer_vals);
7735: ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7736: PetscMalloc1(i+2,&send_buffer_idxs);
7737: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7738: send_buffer_idxs[1] = i;
7739: ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7740: PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i);
7741: ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7742: PetscMPIIntCast(i,&len);
7743: for (i=0;i<n_sends;i++) {
7744: ilengths_vals[is_indices[i]] = len*len;
7745: ilengths_idxs[is_indices[i]] = len+2;
7746: }
7747: }
7748: PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7749: /* additional is (if any) */
7750: if (nis) {
7751: PetscMPIInt psum;
7752: PetscInt j;
7753: for (j=0,psum=0;j<nis;j++) {
7754: PetscInt plen;
7755: ISGetLocalSize(isarray[j],&plen);
7756: PetscMPIIntCast(plen,&len);
7757: psum += len+1; /* indices + lenght */
7758: }
7759: PetscMalloc1(psum,&send_buffer_idxs_is);
7760: for (j=0,psum=0;j<nis;j++) {
7761: PetscInt plen;
7762: const PetscInt *is_array_idxs;
7763: ISGetLocalSize(isarray[j],&plen);
7764: send_buffer_idxs_is[psum] = plen;
7765: ISGetIndices(isarray[j],&is_array_idxs);
7766: PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen);
7767: ISRestoreIndices(isarray[j],&is_array_idxs);
7768: psum += plen+1; /* indices + lenght */
7769: }
7770: for (i=0;i<n_sends;i++) {
7771: ilengths_idxs_is[is_indices[i]] = psum;
7772: }
7773: PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7774: }
7775: MatISRestoreLocalMat(mat,&local_mat);
7777: buf_size_idxs = 0;
7778: buf_size_vals = 0;
7779: buf_size_idxs_is = 0;
7780: buf_size_vecs = 0;
7781: for (i=0;i<n_recvs;i++) {
7782: buf_size_idxs += (PetscInt)olengths_idxs[i];
7783: buf_size_vals += (PetscInt)olengths_vals[i];
7784: if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7785: if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7786: }
7787: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7788: PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7789: PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7790: PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);
7792: /* get new tags for clean communications */
7793: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7794: PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7795: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7796: PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);
7798: /* allocate for requests */
7799: PetscMalloc1(n_sends,&send_req_idxs);
7800: PetscMalloc1(n_sends,&send_req_vals);
7801: PetscMalloc1(n_sends,&send_req_idxs_is);
7802: PetscMalloc1(n_sends,&send_req_vecs);
7803: PetscMalloc1(n_recvs,&recv_req_idxs);
7804: PetscMalloc1(n_recvs,&recv_req_vals);
7805: PetscMalloc1(n_recvs,&recv_req_idxs_is);
7806: PetscMalloc1(n_recvs,&recv_req_vecs);
7808: /* communications */
7809: ptr_idxs = recv_buffer_idxs;
7810: ptr_vals = recv_buffer_vals;
7811: ptr_idxs_is = recv_buffer_idxs_is;
7812: ptr_vecs = recv_buffer_vecs;
7813: for (i=0;i<n_recvs;i++) {
7814: source_dest = onodes[i];
7815: MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7816: MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7817: ptr_idxs += olengths_idxs[i];
7818: ptr_vals += olengths_vals[i];
7819: if (nis) {
7820: source_dest = onodes_is[i];
7821: MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7822: ptr_idxs_is += olengths_idxs_is[i];
7823: }
7824: if (nvecs) {
7825: source_dest = onodes[i];
7826: MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7827: ptr_vecs += olengths_idxs[i]-2;
7828: }
7829: }
7830: for (i=0;i<n_sends;i++) {
7831: PetscMPIIntCast(is_indices[i],&source_dest);
7832: MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7833: MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7834: if (nis) {
7835: MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7836: }
7837: if (nvecs) {
7838: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7839: MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7840: }
7841: }
7842: ISRestoreIndices(is_sends_internal,&is_indices);
7843: ISDestroy(&is_sends_internal);
7845: /* assemble new l2g map */
7846: MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7847: ptr_idxs = recv_buffer_idxs;
7848: new_local_rows = 0;
7849: for (i=0;i<n_recvs;i++) {
7850: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7851: ptr_idxs += olengths_idxs[i];
7852: }
7853: PetscMalloc1(new_local_rows,&l2gmap_indices);
7854: ptr_idxs = recv_buffer_idxs;
7855: new_local_rows = 0;
7856: for (i=0;i<n_recvs;i++) {
7857: PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1));
7858: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7859: ptr_idxs += olengths_idxs[i];
7860: }
7861: PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7862: ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7863: PetscFree(l2gmap_indices);
7865: /* infer new local matrix type from received local matrices type */
7866: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7867: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7868: if (n_recvs) {
7869: MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7870: ptr_idxs = recv_buffer_idxs;
7871: for (i=0;i<n_recvs;i++) {
7872: if ((PetscInt)new_local_type_private != *ptr_idxs) {
7873: new_local_type_private = MATAIJ_PRIVATE;
7874: break;
7875: }
7876: ptr_idxs += olengths_idxs[i];
7877: }
7878: switch (new_local_type_private) {
7879: case MATDENSE_PRIVATE:
7880: new_local_type = MATSEQAIJ;
7881: bs = 1;
7882: break;
7883: case MATAIJ_PRIVATE:
7884: new_local_type = MATSEQAIJ;
7885: bs = 1;
7886: break;
7887: case MATBAIJ_PRIVATE:
7888: new_local_type = MATSEQBAIJ;
7889: break;
7890: case MATSBAIJ_PRIVATE:
7891: new_local_type = MATSEQSBAIJ;
7892: break;
7893: default:
7894: SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7895: }
7896: } else { /* by default, new_local_type is seqaij */
7897: new_local_type = MATSEQAIJ;
7898: bs = 1;
7899: }
7901: /* create MATIS object if needed */
7902: if (!reuse) {
7903: MatGetSize(mat,&rows,&cols);
7904: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7905: } else {
7906: /* it also destroys the local matrices */
7907: if (*mat_n) {
7908: MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7909: } else { /* this is a fake object */
7910: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7911: }
7912: }
7913: MatISGetLocalMat(*mat_n,&local_mat);
7914: MatSetType(local_mat,new_local_type);
7916: MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);
7918: /* Global to local map of received indices */
7919: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7920: ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7921: ISLocalToGlobalMappingDestroy(&l2gmap);
7923: /* restore attributes -> type of incoming data and its size */
7924: buf_size_idxs = 0;
7925: for (i=0;i<n_recvs;i++) {
7926: recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7927: recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7928: buf_size_idxs += (PetscInt)olengths_idxs[i];
7929: }
7930: PetscFree(recv_buffer_idxs);
7932: /* set preallocation */
7933: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7934: if (!newisdense) {
7935: PetscInt *new_local_nnz=NULL;
7937: ptr_idxs = recv_buffer_idxs_local;
7938: if (n_recvs) {
7939: PetscCalloc1(new_local_rows,&new_local_nnz);
7940: }
7941: for (i=0;i<n_recvs;i++) {
7942: PetscInt j;
7943: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7944: for (j=0;j<*(ptr_idxs+1);j++) {
7945: new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7946: }
7947: } else {
7948: /* TODO */
7949: }
7950: ptr_idxs += olengths_idxs[i];
7951: }
7952: if (new_local_nnz) {
7953: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7954: MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7955: for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7956: MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7957: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7958: MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7959: } else {
7960: MatSetUp(local_mat);
7961: }
7962: PetscFree(new_local_nnz);
7963: } else {
7964: MatSetUp(local_mat);
7965: }
7967: /* set values */
7968: ptr_vals = recv_buffer_vals;
7969: ptr_idxs = recv_buffer_idxs_local;
7970: for (i=0;i<n_recvs;i++) {
7971: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7972: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7973: MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7974: MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7975: MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7976: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7977: } else {
7978: /* TODO */
7979: }
7980: ptr_idxs += olengths_idxs[i];
7981: ptr_vals += olengths_vals[i];
7982: }
7983: MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7984: MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7985: MatISRestoreLocalMat(*mat_n,&local_mat);
7986: MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7987: MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7988: PetscFree(recv_buffer_vals);
7990: #if 0
7991: if (!restrict_comm) { /* check */
7992: Vec lvec,rvec;
7993: PetscReal infty_error;
7995: MatCreateVecs(mat,&rvec,&lvec);
7996: VecSetRandom(rvec,NULL);
7997: MatMult(mat,rvec,lvec);
7998: VecScale(lvec,-1.0);
7999: MatMultAdd(*mat_n,rvec,lvec,lvec);
8000: VecNorm(lvec,NORM_INFINITY,&infty_error);
8001: PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
8002: VecDestroy(&rvec);
8003: VecDestroy(&lvec);
8004: }
8005: #endif
8007: /* assemble new additional is (if any) */
8008: if (nis) {
8009: PetscInt **temp_idxs,*count_is,j,psum;
8011: MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
8012: PetscCalloc1(nis,&count_is);
8013: ptr_idxs = recv_buffer_idxs_is;
8014: psum = 0;
8015: for (i=0;i<n_recvs;i++) {
8016: for (j=0;j<nis;j++) {
8017: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8018: count_is[j] += plen; /* increment counting of buffer for j-th IS */
8019: psum += plen;
8020: ptr_idxs += plen+1; /* shift pointer to received data */
8021: }
8022: }
8023: PetscMalloc1(nis,&temp_idxs);
8024: PetscMalloc1(psum,&temp_idxs[0]);
8025: for (i=1;i<nis;i++) {
8026: temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
8027: }
8028: PetscArrayzero(count_is,nis);
8029: ptr_idxs = recv_buffer_idxs_is;
8030: for (i=0;i<n_recvs;i++) {
8031: for (j=0;j<nis;j++) {
8032: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8033: PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen);
8034: count_is[j] += plen; /* increment starting point of buffer for j-th IS */
8035: ptr_idxs += plen+1; /* shift pointer to received data */
8036: }
8037: }
8038: for (i=0;i<nis;i++) {
8039: ISDestroy(&isarray[i]);
8040: PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
8041: ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
8042: }
8043: PetscFree(count_is);
8044: PetscFree(temp_idxs[0]);
8045: PetscFree(temp_idxs);
8046: }
8047: /* free workspace */
8048: PetscFree(recv_buffer_idxs_is);
8049: MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
8050: PetscFree(send_buffer_idxs);
8051: MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
8052: if (isdense) {
8053: MatISGetLocalMat(mat,&local_mat);
8054: MatDenseRestoreArrayRead(local_mat,&send_buffer_vals);
8055: MatISRestoreLocalMat(mat,&local_mat);
8056: } else {
8057: /* PetscFree(send_buffer_vals); */
8058: }
8059: if (nis) {
8060: MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
8061: PetscFree(send_buffer_idxs_is);
8062: }
8064: if (nvecs) {
8065: MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
8066: MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
8067: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
8068: VecDestroy(&nnsp_vec[0]);
8069: VecCreate(comm_n,&nnsp_vec[0]);
8070: VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
8071: VecSetType(nnsp_vec[0],VECSTANDARD);
8072: /* set values */
8073: ptr_vals = recv_buffer_vecs;
8074: ptr_idxs = recv_buffer_idxs_local;
8075: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
8076: for (i=0;i<n_recvs;i++) {
8077: PetscInt j;
8078: for (j=0;j<*(ptr_idxs+1);j++) {
8079: send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
8080: }
8081: ptr_idxs += olengths_idxs[i];
8082: ptr_vals += olengths_idxs[i]-2;
8083: }
8084: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
8085: VecAssemblyBegin(nnsp_vec[0]);
8086: VecAssemblyEnd(nnsp_vec[0]);
8087: }
8089: PetscFree(recv_buffer_vecs);
8090: PetscFree(recv_buffer_idxs_local);
8091: PetscFree(recv_req_idxs);
8092: PetscFree(recv_req_vals);
8093: PetscFree(recv_req_vecs);
8094: PetscFree(recv_req_idxs_is);
8095: PetscFree(send_req_idxs);
8096: PetscFree(send_req_vals);
8097: PetscFree(send_req_vecs);
8098: PetscFree(send_req_idxs_is);
8099: PetscFree(ilengths_vals);
8100: PetscFree(ilengths_idxs);
8101: PetscFree(olengths_vals);
8102: PetscFree(olengths_idxs);
8103: PetscFree(onodes);
8104: if (nis) {
8105: PetscFree(ilengths_idxs_is);
8106: PetscFree(olengths_idxs_is);
8107: PetscFree(onodes_is);
8108: }
8109: PetscSubcommDestroy(&subcomm);
8110: if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
8111: MatDestroy(mat_n);
8112: for (i=0;i<nis;i++) {
8113: ISDestroy(&isarray[i]);
8114: }
8115: if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8116: VecDestroy(&nnsp_vec[0]);
8117: }
8118: *mat_n = NULL;
8119: }
8120: return(0);
8121: }
8123: /* temporary hack into ksp private data structure */
8124: #include <petsc/private/kspimpl.h>
8126: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
8127: {
8128: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
8129: PC_IS *pcis = (PC_IS*)pc->data;
8130: Mat coarse_mat,coarse_mat_is,coarse_submat_dense;
8131: Mat coarsedivudotp = NULL;
8132: Mat coarseG,t_coarse_mat_is;
8133: MatNullSpace CoarseNullSpace = NULL;
8134: ISLocalToGlobalMapping coarse_islg;
8135: IS coarse_is,*isarray,corners;
8136: PetscInt i,im_active=-1,active_procs=-1;
8137: PetscInt nis,nisdofs,nisneu,nisvert;
8138: PetscInt coarse_eqs_per_proc;
8139: PC pc_temp;
8140: PCType coarse_pc_type;
8141: KSPType coarse_ksp_type;
8142: PetscBool multilevel_requested,multilevel_allowed;
8143: PetscBool coarse_reuse;
8144: PetscInt ncoarse,nedcfield;
8145: PetscBool compute_vecs = PETSC_FALSE;
8146: PetscScalar *array;
8147: MatReuse coarse_mat_reuse;
8148: PetscBool restr, full_restr, have_void;
8149: PetscMPIInt size;
8150: PetscErrorCode ierr;
8153: PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8154: /* Assign global numbering to coarse dofs */
8155: if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8156: PetscInt ocoarse_size;
8157: compute_vecs = PETSC_TRUE;
8159: pcbddc->new_primal_space = PETSC_TRUE;
8160: ocoarse_size = pcbddc->coarse_size;
8161: PetscFree(pcbddc->global_primal_indices);
8162: PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
8163: /* see if we can avoid some work */
8164: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8165: /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8166: if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8167: KSPReset(pcbddc->coarse_ksp);
8168: coarse_reuse = PETSC_FALSE;
8169: } else { /* we can safely reuse already computed coarse matrix */
8170: coarse_reuse = PETSC_TRUE;
8171: }
8172: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8173: coarse_reuse = PETSC_FALSE;
8174: }
8175: /* reset any subassembling information */
8176: if (!coarse_reuse || pcbddc->recompute_topography) {
8177: ISDestroy(&pcbddc->coarse_subassembling);
8178: }
8179: } else { /* primal space is unchanged, so we can reuse coarse matrix */
8180: coarse_reuse = PETSC_TRUE;
8181: }
8182: if (coarse_reuse && pcbddc->coarse_ksp) {
8183: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
8184: PetscObjectReference((PetscObject)coarse_mat);
8185: coarse_mat_reuse = MAT_REUSE_MATRIX;
8186: } else {
8187: coarse_mat = NULL;
8188: coarse_mat_reuse = MAT_INITIAL_MATRIX;
8189: }
8191: /* creates temporary l2gmap and IS for coarse indexes */
8192: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
8193: ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);
8195: /* creates temporary MATIS object for coarse matrix */
8196: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);
8197: MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
8198: MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
8199: MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8200: MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8201: MatDestroy(&coarse_submat_dense);
8203: /* count "active" (i.e. with positive local size) and "void" processes */
8204: im_active = !!(pcis->n);
8205: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8207: /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8208: /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8209: /* full_restr : just use the receivers from the subassembling pattern */
8210: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
8211: coarse_mat_is = NULL;
8212: multilevel_allowed = PETSC_FALSE;
8213: multilevel_requested = PETSC_FALSE;
8214: coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
8215: if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
8216: if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8217: if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8218: if (multilevel_requested) {
8219: ncoarse = active_procs/pcbddc->coarsening_ratio;
8220: restr = PETSC_FALSE;
8221: full_restr = PETSC_FALSE;
8222: } else {
8223: ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc);
8224: restr = PETSC_TRUE;
8225: full_restr = PETSC_TRUE;
8226: }
8227: if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8228: ncoarse = PetscMax(1,ncoarse);
8229: if (!pcbddc->coarse_subassembling) {
8230: if (pcbddc->coarsening_ratio > 1) {
8231: if (multilevel_requested) {
8232: PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8233: } else {
8234: PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8235: }
8236: } else {
8237: PetscMPIInt rank;
8239: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
8240: have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
8241: ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
8242: }
8243: } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8244: PetscInt psum;
8245: if (pcbddc->coarse_ksp) psum = 1;
8246: else psum = 0;
8247: MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8248: have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8249: }
8250: /* determine if we can go multilevel */
8251: if (multilevel_requested) {
8252: if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8253: else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
8254: }
8255: if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8257: /* dump subassembling pattern */
8258: if (pcbddc->dbg_flag && multilevel_allowed) {
8259: ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
8260: }
8261: /* compute dofs splitting and neumann boundaries for coarse dofs */
8262: nedcfield = -1;
8263: corners = NULL;
8264: if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8265: PetscInt *tidxs,*tidxs2,nout,tsize,i;
8266: const PetscInt *idxs;
8267: ISLocalToGlobalMapping tmap;
8269: /* create map between primal indices (in local representative ordering) and local primal numbering */
8270: ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
8271: /* allocate space for temporary storage */
8272: PetscMalloc1(pcbddc->local_primal_size,&tidxs);
8273: PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
8274: /* allocate for IS array */
8275: nisdofs = pcbddc->n_ISForDofsLocal;
8276: if (pcbddc->nedclocal) {
8277: if (pcbddc->nedfield > -1) {
8278: nedcfield = pcbddc->nedfield;
8279: } else {
8280: nedcfield = 0;
8281: if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs);
8282: nisdofs = 1;
8283: }
8284: }
8285: nisneu = !!pcbddc->NeumannBoundariesLocal;
8286: nisvert = 0; /* nisvert is not used */
8287: nis = nisdofs + nisneu + nisvert;
8288: PetscMalloc1(nis,&isarray);
8289: /* dofs splitting */
8290: for (i=0;i<nisdofs;i++) {
8291: /* ISView(pcbddc->ISForDofsLocal[i],0); */
8292: if (nedcfield != i) {
8293: ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
8294: ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
8295: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8296: ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
8297: } else {
8298: ISGetLocalSize(pcbddc->nedclocal,&tsize);
8299: ISGetIndices(pcbddc->nedclocal,&idxs);
8300: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8301: if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout);
8302: ISRestoreIndices(pcbddc->nedclocal,&idxs);
8303: }
8304: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8305: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
8306: /* ISView(isarray[i],0); */
8307: }
8308: /* neumann boundaries */
8309: if (pcbddc->NeumannBoundariesLocal) {
8310: /* ISView(pcbddc->NeumannBoundariesLocal,0); */
8311: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
8312: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8313: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8314: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8315: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8316: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
8317: /* ISView(isarray[nisdofs],0); */
8318: }
8319: /* coordinates */
8320: if (pcbddc->corner_selected) {
8321: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8322: ISGetLocalSize(corners,&tsize);
8323: ISGetIndices(corners,&idxs);
8324: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8325: if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout);
8326: ISRestoreIndices(corners,&idxs);
8327: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8328: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8329: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);
8330: }
8331: PetscFree(tidxs);
8332: PetscFree(tidxs2);
8333: ISLocalToGlobalMappingDestroy(&tmap);
8334: } else {
8335: nis = 0;
8336: nisdofs = 0;
8337: nisneu = 0;
8338: nisvert = 0;
8339: isarray = NULL;
8340: }
8341: /* destroy no longer needed map */
8342: ISLocalToGlobalMappingDestroy(&coarse_islg);
8344: /* subassemble */
8345: if (multilevel_allowed) {
8346: Vec vp[1];
8347: PetscInt nvecs = 0;
8348: PetscBool reuse,reuser;
8350: if (coarse_mat) reuse = PETSC_TRUE;
8351: else reuse = PETSC_FALSE;
8352: MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8353: vp[0] = NULL;
8354: if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8355: VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
8356: VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
8357: VecSetType(vp[0],VECSTANDARD);
8358: nvecs = 1;
8360: if (pcbddc->divudotp) {
8361: Mat B,loc_divudotp;
8362: Vec v,p;
8363: IS dummy;
8364: PetscInt np;
8366: MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
8367: MatGetSize(loc_divudotp,&np,NULL);
8368: ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
8369: MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
8370: MatCreateVecs(B,&v,&p);
8371: VecSet(p,1.);
8372: MatMultTranspose(B,p,v);
8373: VecDestroy(&p);
8374: MatDestroy(&B);
8375: VecGetArray(vp[0],&array);
8376: VecPlaceArray(pcbddc->vec1_P,array);
8377: VecRestoreArray(vp[0],&array);
8378: MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
8379: VecResetArray(pcbddc->vec1_P);
8380: ISDestroy(&dummy);
8381: VecDestroy(&v);
8382: }
8383: }
8384: if (reuser) {
8385: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
8386: } else {
8387: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
8388: }
8389: if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8390: PetscScalar *arraym;
8391: const PetscScalar *arrayv;
8392: PetscInt nl;
8393: VecGetLocalSize(vp[0],&nl);
8394: MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
8395: MatDenseGetArray(coarsedivudotp,&arraym);
8396: VecGetArrayRead(vp[0],&arrayv);
8397: PetscArraycpy(arraym,arrayv,nl);
8398: VecRestoreArrayRead(vp[0],&arrayv);
8399: MatDenseRestoreArray(coarsedivudotp,&arraym);
8400: VecDestroy(&vp[0]);
8401: } else {
8402: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
8403: }
8404: } else {
8405: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
8406: }
8407: if (coarse_mat_is || coarse_mat) {
8408: if (!multilevel_allowed) {
8409: MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);
8410: } else {
8411: /* if this matrix is present, it means we are not reusing the coarse matrix */
8412: if (coarse_mat_is) {
8413: if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
8414: PetscObjectReference((PetscObject)coarse_mat_is);
8415: coarse_mat = coarse_mat_is;
8416: }
8417: }
8418: }
8419: MatDestroy(&t_coarse_mat_is);
8420: MatDestroy(&coarse_mat_is);
8422: /* create local to global scatters for coarse problem */
8423: if (compute_vecs) {
8424: PetscInt lrows;
8425: VecDestroy(&pcbddc->coarse_vec);
8426: if (coarse_mat) {
8427: MatGetLocalSize(coarse_mat,&lrows,NULL);
8428: } else {
8429: lrows = 0;
8430: }
8431: VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
8432: VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
8433: VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);
8434: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8435: VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
8436: }
8437: ISDestroy(&coarse_is);
8439: /* set defaults for coarse KSP and PC */
8440: if (multilevel_allowed) {
8441: coarse_ksp_type = KSPRICHARDSON;
8442: coarse_pc_type = PCBDDC;
8443: } else {
8444: coarse_ksp_type = KSPPREONLY;
8445: coarse_pc_type = PCREDUNDANT;
8446: }
8448: /* print some info if requested */
8449: if (pcbddc->dbg_flag) {
8450: if (!multilevel_allowed) {
8451: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8452: if (multilevel_requested) {
8453: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
8454: } else if (pcbddc->max_levels) {
8455: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);
8456: }
8457: PetscViewerFlush(pcbddc->dbg_viewer);
8458: }
8459: }
8461: /* communicate coarse discrete gradient */
8462: coarseG = NULL;
8463: if (pcbddc->nedcG && multilevel_allowed) {
8464: MPI_Comm ccomm;
8465: if (coarse_mat) {
8466: ccomm = PetscObjectComm((PetscObject)coarse_mat);
8467: } else {
8468: ccomm = MPI_COMM_NULL;
8469: }
8470: MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
8471: }
8473: /* create the coarse KSP object only once with defaults */
8474: if (coarse_mat) {
8475: PetscBool isredundant,isbddc,force,valid;
8476: PetscViewer dbg_viewer = NULL;
8478: if (pcbddc->dbg_flag) {
8479: dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8480: PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
8481: }
8482: if (!pcbddc->coarse_ksp) {
8483: char prefix[256],str_level[16];
8484: size_t len;
8486: KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
8487: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8488: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
8489: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
8490: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8491: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
8492: KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
8493: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8494: /* TODO is this logic correct? should check for coarse_mat type */
8495: PCSetType(pc_temp,coarse_pc_type);
8496: /* prefix */
8497: PetscStrcpy(prefix,"");
8498: PetscStrcpy(str_level,"");
8499: if (!pcbddc->current_level) {
8500: PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));
8501: PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));
8502: } else {
8503: PetscStrlen(((PetscObject)pc)->prefix,&len);
8504: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8505: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8506: /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8507: PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
8508: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
8509: PetscStrlcat(prefix,str_level,sizeof(prefix));
8510: }
8511: KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
8512: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8513: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8514: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8515: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8516: /* allow user customization */
8517: KSPSetFromOptions(pcbddc->coarse_ksp);
8518: /* get some info after set from options */
8519: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8520: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8521: force = PETSC_FALSE;
8522: PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);
8523: PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");
8524: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8525: if (multilevel_allowed && !force && !valid) {
8526: isbddc = PETSC_TRUE;
8527: PCSetType(pc_temp,PCBDDC);
8528: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8529: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8530: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8531: if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8532: PetscObjectOptionsBegin((PetscObject)pc_temp);
8533: (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);
8534: PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);
8535: PetscOptionsEnd();
8536: pc_temp->setfromoptionscalled++;
8537: }
8538: }
8539: }
8540: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8541: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8542: if (nisdofs) {
8543: PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
8544: for (i=0;i<nisdofs;i++) {
8545: ISDestroy(&isarray[i]);
8546: }
8547: }
8548: if (nisneu) {
8549: PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
8550: ISDestroy(&isarray[nisdofs]);
8551: }
8552: if (nisvert) {
8553: PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
8554: ISDestroy(&isarray[nis-1]);
8555: }
8556: if (coarseG) {
8557: PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
8558: }
8560: /* get some info after set from options */
8561: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8563: /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8564: if (isbddc && !multilevel_allowed) {
8565: PCSetType(pc_temp,coarse_pc_type);
8566: }
8567: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8568: force = PETSC_FALSE;
8569: PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);
8570: PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");
8571: if (multilevel_requested && multilevel_allowed && !valid && !force) {
8572: PCSetType(pc_temp,PCBDDC);
8573: }
8574: PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
8575: if (isredundant) {
8576: KSP inner_ksp;
8577: PC inner_pc;
8579: PCRedundantGetKSP(pc_temp,&inner_ksp);
8580: KSPGetPC(inner_ksp,&inner_pc);
8581: }
8583: /* parameters which miss an API */
8584: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8585: if (isbddc) {
8586: PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;
8588: pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8589: pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8590: pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit;
8591: pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8592: if (pcbddc_coarse->benign_saddle_point) {
8593: Mat coarsedivudotp_is;
8594: ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8595: IS row,col;
8596: const PetscInt *gidxs;
8597: PetscInt n,st,M,N;
8599: MatGetSize(coarsedivudotp,&n,NULL);
8600: MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
8601: st = st-n;
8602: ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
8603: MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
8604: ISLocalToGlobalMappingGetSize(l2gmap,&n);
8605: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
8606: ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
8607: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
8608: ISLocalToGlobalMappingCreateIS(row,&rl2g);
8609: ISLocalToGlobalMappingCreateIS(col,&cl2g);
8610: ISGetSize(row,&M);
8611: MatGetSize(coarse_mat,&N,NULL);
8612: ISDestroy(&row);
8613: ISDestroy(&col);
8614: MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
8615: MatSetType(coarsedivudotp_is,MATIS);
8616: MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
8617: MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
8618: ISLocalToGlobalMappingDestroy(&rl2g);
8619: ISLocalToGlobalMappingDestroy(&cl2g);
8620: MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
8621: MatDestroy(&coarsedivudotp);
8622: PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
8623: MatDestroy(&coarsedivudotp_is);
8624: pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8625: if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8626: }
8627: }
8629: /* propagate symmetry info of coarse matrix */
8630: MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
8631: if (pc->pmat->symmetric_set) {
8632: MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
8633: }
8634: if (pc->pmat->hermitian_set) {
8635: MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
8636: }
8637: if (pc->pmat->spd_set) {
8638: MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
8639: }
8640: if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8641: MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
8642: }
8643: /* set operators */
8644: MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");
8645: MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);
8646: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8647: if (pcbddc->dbg_flag) {
8648: PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
8649: }
8650: }
8651: MatDestroy(&coarseG);
8652: PetscFree(isarray);
8653: #if 0
8654: {
8655: PetscViewer viewer;
8656: char filename[256];
8657: sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8658: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8659: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8660: MatView(coarse_mat,viewer);
8661: PetscViewerPopFormat(viewer);
8662: PetscViewerDestroy(&viewer);
8663: }
8664: #endif
8666: if (corners) {
8667: Vec gv;
8668: IS is;
8669: const PetscInt *idxs;
8670: PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim;
8671: PetscScalar *coords;
8673: if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates");
8674: VecGetSize(pcbddc->coarse_vec,&N);
8675: VecGetLocalSize(pcbddc->coarse_vec,&n);
8676: VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);
8677: VecSetBlockSize(gv,cdim);
8678: VecSetSizes(gv,n*cdim,N*cdim);
8679: VecSetType(gv,VECSTANDARD);
8680: VecSetFromOptions(gv);
8681: VecSet(gv,PETSC_MAX_REAL); /* we only propagate coordinates from vertices constraints */
8683: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8684: ISGetLocalSize(is,&n);
8685: ISGetIndices(is,&idxs);
8686: PetscMalloc1(n*cdim,&coords);
8687: for (i=0;i<n;i++) {
8688: for (d=0;d<cdim;d++) {
8689: coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d];
8690: }
8691: }
8692: ISRestoreIndices(is,&idxs);
8693: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8695: ISGetLocalSize(corners,&n);
8696: ISGetIndices(corners,&idxs);
8697: VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);
8698: ISRestoreIndices(corners,&idxs);
8699: PetscFree(coords);
8700: VecAssemblyBegin(gv);
8701: VecAssemblyEnd(gv);
8702: VecGetArray(gv,&coords);
8703: if (pcbddc->coarse_ksp) {
8704: PC coarse_pc;
8705: PetscBool isbddc;
8707: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
8708: PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
8709: if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8710: PetscReal *realcoords;
8712: VecGetLocalSize(gv,&n);
8713: #if defined(PETSC_USE_COMPLEX)
8714: PetscMalloc1(n,&realcoords);
8715: for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]);
8716: #else
8717: realcoords = coords;
8718: #endif
8719: PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);
8720: #if defined(PETSC_USE_COMPLEX)
8721: PetscFree(realcoords);
8722: #endif
8723: }
8724: }
8725: VecRestoreArray(gv,&coords);
8726: VecDestroy(&gv);
8727: }
8728: ISDestroy(&corners);
8730: if (pcbddc->coarse_ksp) {
8731: Vec crhs,csol;
8733: KSPGetSolution(pcbddc->coarse_ksp,&csol);
8734: KSPGetRhs(pcbddc->coarse_ksp,&crhs);
8735: if (!csol) {
8736: MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
8737: }
8738: if (!crhs) {
8739: MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
8740: }
8741: }
8742: MatDestroy(&coarsedivudotp);
8744: /* compute null space for coarse solver if the benign trick has been requested */
8745: if (pcbddc->benign_null) {
8747: VecSet(pcbddc->vec1_P,0.);
8748: for (i=0;i<pcbddc->benign_n;i++) {
8749: VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
8750: }
8751: VecAssemblyBegin(pcbddc->vec1_P);
8752: VecAssemblyEnd(pcbddc->vec1_P);
8753: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8754: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8755: if (coarse_mat) {
8756: Vec nullv;
8757: PetscScalar *array,*array2;
8758: PetscInt nl;
8760: MatCreateVecs(coarse_mat,&nullv,NULL);
8761: VecGetLocalSize(nullv,&nl);
8762: VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8763: VecGetArray(nullv,&array2);
8764: PetscArraycpy(array2,array,nl);
8765: VecRestoreArray(nullv,&array2);
8766: VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8767: VecNormalize(nullv,NULL);
8768: MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8769: VecDestroy(&nullv);
8770: }
8771: }
8772: PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8774: PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8775: if (pcbddc->coarse_ksp) {
8776: PetscBool ispreonly;
8778: if (CoarseNullSpace) {
8779: PetscBool isnull;
8780: MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8781: if (isnull) {
8782: MatSetNullSpace(coarse_mat,CoarseNullSpace);
8783: }
8784: /* TODO: add local nullspaces (if any) */
8785: }
8786: /* setup coarse ksp */
8787: KSPSetUp(pcbddc->coarse_ksp);
8788: /* Check coarse problem if in debug mode or if solving with an iterative method */
8789: PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8790: if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
8791: KSP check_ksp;
8792: KSPType check_ksp_type;
8793: PC check_pc;
8794: Vec check_vec,coarse_vec;
8795: PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8796: PetscInt its;
8797: PetscBool compute_eigs;
8798: PetscReal *eigs_r,*eigs_c;
8799: PetscInt neigs;
8800: const char *prefix;
8802: /* Create ksp object suitable for estimation of extreme eigenvalues */
8803: KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8804: PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);
8805: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);
8806: KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8807: KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8808: /* prevent from setup unneeded object */
8809: KSPGetPC(check_ksp,&check_pc);
8810: PCSetType(check_pc,PCNONE);
8811: if (ispreonly) {
8812: check_ksp_type = KSPPREONLY;
8813: compute_eigs = PETSC_FALSE;
8814: } else {
8815: check_ksp_type = KSPGMRES;
8816: compute_eigs = PETSC_TRUE;
8817: }
8818: KSPSetType(check_ksp,check_ksp_type);
8819: KSPSetComputeSingularValues(check_ksp,compute_eigs);
8820: KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8821: KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8822: KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8823: KSPSetOptionsPrefix(check_ksp,prefix);
8824: KSPAppendOptionsPrefix(check_ksp,"check_");
8825: KSPSetFromOptions(check_ksp);
8826: KSPSetUp(check_ksp);
8827: KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8828: KSPSetPC(check_ksp,check_pc);
8829: /* create random vec */
8830: MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8831: VecSetRandom(check_vec,NULL);
8832: MatMult(coarse_mat,check_vec,coarse_vec);
8833: /* solve coarse problem */
8834: KSPSolve(check_ksp,coarse_vec,coarse_vec);
8835: KSPCheckSolve(check_ksp,pc,coarse_vec);
8836: /* set eigenvalue estimation if preonly has not been requested */
8837: if (compute_eigs) {
8838: PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8839: PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8840: KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8841: if (neigs) {
8842: lambda_max = eigs_r[neigs-1];
8843: lambda_min = eigs_r[0];
8844: if (pcbddc->use_coarse_estimates) {
8845: if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8846: KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8847: KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8848: }
8849: }
8850: }
8851: }
8853: /* check coarse problem residual error */
8854: if (pcbddc->dbg_flag) {
8855: PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8856: PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8857: VecAXPY(check_vec,-1.0,coarse_vec);
8858: VecNorm(check_vec,NORM_INFINITY,&infty_error);
8859: MatMult(coarse_mat,check_vec,coarse_vec);
8860: VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8861: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8862: PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8863: PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8864: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);
8865: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8866: if (CoarseNullSpace) {
8867: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8868: }
8869: if (compute_eigs) {
8870: PetscReal lambda_max_s,lambda_min_s;
8871: KSPConvergedReason reason;
8872: KSPGetType(check_ksp,&check_ksp_type);
8873: KSPGetIterationNumber(check_ksp,&its);
8874: KSPGetConvergedReason(check_ksp,&reason);
8875: KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8876: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8877: for (i=0;i<neigs;i++) {
8878: PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8879: }
8880: }
8881: PetscViewerFlush(dbg_viewer);
8882: PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8883: }
8884: VecDestroy(&check_vec);
8885: VecDestroy(&coarse_vec);
8886: KSPDestroy(&check_ksp);
8887: if (compute_eigs) {
8888: PetscFree(eigs_r);
8889: PetscFree(eigs_c);
8890: }
8891: }
8892: }
8893: MatNullSpaceDestroy(&CoarseNullSpace);
8894: /* print additional info */
8895: if (pcbddc->dbg_flag) {
8896: /* waits until all processes reaches this point */
8897: PetscBarrier((PetscObject)pc);
8898: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);
8899: PetscViewerFlush(pcbddc->dbg_viewer);
8900: }
8902: /* free memory */
8903: MatDestroy(&coarse_mat);
8904: PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8905: return(0);
8906: }
8908: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8909: {
8910: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
8911: PC_IS* pcis = (PC_IS*)pc->data;
8912: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8913: IS subset,subset_mult,subset_n;
8914: PetscInt local_size,coarse_size=0;
8915: PetscInt *local_primal_indices=NULL;
8916: const PetscInt *t_local_primal_indices;
8920: /* Compute global number of coarse dofs */
8921: if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8922: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8923: ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8924: ISDestroy(&subset_n);
8925: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8926: ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8927: ISDestroy(&subset);
8928: ISDestroy(&subset_mult);
8929: ISGetLocalSize(subset_n,&local_size);
8930: if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8931: PetscMalloc1(local_size,&local_primal_indices);
8932: ISGetIndices(subset_n,&t_local_primal_indices);
8933: PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size);
8934: ISRestoreIndices(subset_n,&t_local_primal_indices);
8935: ISDestroy(&subset_n);
8937: /* check numbering */
8938: if (pcbddc->dbg_flag) {
8939: PetscScalar coarsesum,*array,*array2;
8940: PetscInt i;
8941: PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
8943: PetscViewerFlush(pcbddc->dbg_viewer);
8944: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8945: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8946: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8947: /* counter */
8948: VecSet(pcis->vec1_global,0.0);
8949: VecSet(pcis->vec1_N,1.0);
8950: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8951: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8952: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8953: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8954: VecSet(pcis->vec1_N,0.0);
8955: for (i=0;i<pcbddc->local_primal_size;i++) {
8956: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8957: }
8958: VecAssemblyBegin(pcis->vec1_N);
8959: VecAssemblyEnd(pcis->vec1_N);
8960: VecSet(pcis->vec1_global,0.0);
8961: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8962: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8963: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8964: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8965: VecGetArray(pcis->vec1_N,&array);
8966: VecGetArray(pcis->vec2_N,&array2);
8967: for (i=0;i<pcis->n;i++) {
8968: if (array[i] != 0.0 && array[i] != array2[i]) {
8969: PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8970: PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8971: set_error = PETSC_TRUE;
8972: ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8973: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);
8974: }
8975: }
8976: VecRestoreArray(pcis->vec2_N,&array2);
8977: MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8978: PetscViewerFlush(pcbddc->dbg_viewer);
8979: for (i=0;i<pcis->n;i++) {
8980: if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8981: }
8982: VecRestoreArray(pcis->vec1_N,&array);
8983: VecSet(pcis->vec1_global,0.0);
8984: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8985: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8986: VecSum(pcis->vec1_global,&coarsesum);
8987: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8988: if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8989: PetscInt *gidxs;
8991: PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8992: ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8993: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8994: PetscViewerFlush(pcbddc->dbg_viewer);
8995: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8996: for (i=0;i<pcbddc->local_primal_size;i++) {
8997: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8998: }
8999: PetscViewerFlush(pcbddc->dbg_viewer);
9000: PetscFree(gidxs);
9001: }
9002: PetscViewerFlush(pcbddc->dbg_viewer);
9003: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
9004: if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
9005: }
9007: /* get back data */
9008: *coarse_size_n = coarse_size;
9009: *local_primal_indices_n = local_primal_indices;
9010: return(0);
9011: }
9013: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
9014: {
9015: IS localis_t;
9016: PetscInt i,lsize,*idxs,n;
9017: PetscScalar *vals;
9021: /* get indices in local ordering exploiting local to global map */
9022: ISGetLocalSize(globalis,&lsize);
9023: PetscMalloc1(lsize,&vals);
9024: for (i=0;i<lsize;i++) vals[i] = 1.0;
9025: ISGetIndices(globalis,(const PetscInt**)&idxs);
9026: VecSet(gwork,0.0);
9027: VecSet(lwork,0.0);
9028: if (idxs) { /* multilevel guard */
9029: VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
9030: VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
9031: }
9032: VecAssemblyBegin(gwork);
9033: ISRestoreIndices(globalis,(const PetscInt**)&idxs);
9034: PetscFree(vals);
9035: VecAssemblyEnd(gwork);
9036: /* now compute set in local ordering */
9037: VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
9038: VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
9039: VecGetArrayRead(lwork,(const PetscScalar**)&vals);
9040: VecGetSize(lwork,&n);
9041: for (i=0,lsize=0;i<n;i++) {
9042: if (PetscRealPart(vals[i]) > 0.5) {
9043: lsize++;
9044: }
9045: }
9046: PetscMalloc1(lsize,&idxs);
9047: for (i=0,lsize=0;i<n;i++) {
9048: if (PetscRealPart(vals[i]) > 0.5) {
9049: idxs[lsize++] = i;
9050: }
9051: }
9052: VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
9053: ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
9054: *localis = localis_t;
9055: return(0);
9056: }
9058: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9059: {
9060: PC_IS *pcis=(PC_IS*)pc->data;
9061: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9062: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
9063: Mat S_j;
9064: PetscInt *used_xadj,*used_adjncy;
9065: PetscBool free_used_adj;
9066: PetscErrorCode ierr;
9069: PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9070: /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9071: free_used_adj = PETSC_FALSE;
9072: if (pcbddc->sub_schurs_layers == -1) {
9073: used_xadj = NULL;
9074: used_adjncy = NULL;
9075: } else {
9076: if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9077: used_xadj = pcbddc->mat_graph->xadj;
9078: used_adjncy = pcbddc->mat_graph->adjncy;
9079: } else if (pcbddc->computed_rowadj) {
9080: used_xadj = pcbddc->mat_graph->xadj;
9081: used_adjncy = pcbddc->mat_graph->adjncy;
9082: } else {
9083: PetscBool flg_row=PETSC_FALSE;
9084: const PetscInt *xadj,*adjncy;
9085: PetscInt nvtxs;
9087: MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
9088: if (flg_row) {
9089: PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
9090: PetscArraycpy(used_xadj,xadj,nvtxs+1);
9091: PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]);
9092: free_used_adj = PETSC_TRUE;
9093: } else {
9094: pcbddc->sub_schurs_layers = -1;
9095: used_xadj = NULL;
9096: used_adjncy = NULL;
9097: }
9098: MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
9099: }
9100: }
9102: /* setup sub_schurs data */
9103: MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9104: if (!sub_schurs->schur_explicit) {
9105: /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9106: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
9107: PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
9108: } else {
9109: Mat change = NULL;
9110: Vec scaling = NULL;
9111: IS change_primal = NULL, iP;
9112: PetscInt benign_n;
9113: PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
9114: PetscBool need_change = PETSC_FALSE;
9115: PetscBool discrete_harmonic = PETSC_FALSE;
9117: if (!pcbddc->use_vertices && reuse_solvers) {
9118: PetscInt n_vertices;
9120: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
9121: reuse_solvers = (PetscBool)!n_vertices;
9122: }
9123: if (!pcbddc->benign_change_explicit) {
9124: benign_n = pcbddc->benign_n;
9125: } else {
9126: benign_n = 0;
9127: }
9128: /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9129: We need a global reduction to avoid possible deadlocks.
9130: We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9131: if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9132: PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9133: MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
9134: need_change = (PetscBool)(!need_change);
9135: }
9136: /* If the user defines additional constraints, we import them here.
9137: We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
9138: if (need_change) {
9139: PC_IS *pcisf;
9140: PC_BDDC *pcbddcf;
9141: PC pcf;
9143: if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
9144: PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
9145: PCSetOperators(pcf,pc->mat,pc->pmat);
9146: PCSetType(pcf,PCBDDC);
9148: /* hacks */
9149: pcisf = (PC_IS*)pcf->data;
9150: pcisf->is_B_local = pcis->is_B_local;
9151: pcisf->vec1_N = pcis->vec1_N;
9152: pcisf->BtoNmap = pcis->BtoNmap;
9153: pcisf->n = pcis->n;
9154: pcisf->n_B = pcis->n_B;
9155: pcbddcf = (PC_BDDC*)pcf->data;
9156: PetscFree(pcbddcf->mat_graph);
9157: pcbddcf->mat_graph = pcbddc->mat_graph;
9158: pcbddcf->use_faces = PETSC_TRUE;
9159: pcbddcf->use_change_of_basis = PETSC_TRUE;
9160: pcbddcf->use_change_on_faces = PETSC_TRUE;
9161: pcbddcf->use_qr_single = PETSC_TRUE;
9162: pcbddcf->fake_change = PETSC_TRUE;
9164: /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
9165: PCBDDCConstraintsSetUp(pcf);
9166: sub_schurs->change_with_qr = pcbddcf->use_qr_single;
9167: ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
9168: change = pcbddcf->ConstraintMatrix;
9169: pcbddcf->ConstraintMatrix = NULL;
9171: /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
9172: PetscFree(pcbddcf->sub_schurs);
9173: MatNullSpaceDestroy(&pcbddcf->onearnullspace);
9174: PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
9175: PetscFree(pcbddcf->primal_indices_local_idxs);
9176: PetscFree(pcbddcf->onearnullvecs_state);
9177: PetscFree(pcf->data);
9178: pcf->ops->destroy = NULL;
9179: pcf->ops->reset = NULL;
9180: PCDestroy(&pcf);
9181: }
9182: if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9184: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
9185: if (iP) {
9186: PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
9187: PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
9188: PetscOptionsEnd();
9189: }
9190: if (discrete_harmonic) {
9191: Mat A;
9192: MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
9193: MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
9194: PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
9195: PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9196: MatDestroy(&A);
9197: } else {
9198: PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9199: }
9200: MatDestroy(&change);
9201: ISDestroy(&change_primal);
9202: }
9203: MatDestroy(&S_j);
9205: /* free adjacency */
9206: if (free_used_adj) {
9207: PetscFree2(used_xadj,used_adjncy);
9208: }
9209: PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9210: return(0);
9211: }
9213: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9214: {
9215: PC_IS *pcis=(PC_IS*)pc->data;
9216: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9217: PCBDDCGraph graph;
9218: PetscErrorCode ierr;
9221: /* attach interface graph for determining subsets */
9222: if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9223: IS verticesIS,verticescomm;
9224: PetscInt vsize,*idxs;
9226: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9227: ISGetSize(verticesIS,&vsize);
9228: ISGetIndices(verticesIS,(const PetscInt**)&idxs);
9229: ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
9230: ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
9231: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9232: PCBDDCGraphCreate(&graph);
9233: PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
9234: PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
9235: ISDestroy(&verticescomm);
9236: PCBDDCGraphComputeConnectedComponents(graph);
9237: } else {
9238: graph = pcbddc->mat_graph;
9239: }
9240: /* print some info */
9241: if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9242: IS vertices;
9243: PetscInt nv,nedges,nfaces;
9244: PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
9245: PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9246: ISGetSize(vertices,&nv);
9247: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
9248: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
9249: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);
9250: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);
9251: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);
9252: PetscViewerFlush(pcbddc->dbg_viewer);
9253: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
9254: PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9255: }
9257: /* sub_schurs init */
9258: if (!pcbddc->sub_schurs) {
9259: PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
9260: }
9261: PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);
9263: /* free graph struct */
9264: if (pcbddc->sub_schurs_rebuild) {
9265: PCBDDCGraphDestroy(&graph);
9266: }
9267: return(0);
9268: }
9270: PetscErrorCode PCBDDCCheckOperator(PC pc)
9271: {
9272: PC_IS *pcis=(PC_IS*)pc->data;
9273: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9274: PetscErrorCode ierr;
9277: if (pcbddc->n_vertices == pcbddc->local_primal_size) {
9278: IS zerodiag = NULL;
9279: Mat S_j,B0_B=NULL;
9280: Vec dummy_vec=NULL,vec_check_B,vec_scale_P;
9281: PetscScalar *p0_check,*array,*array2;
9282: PetscReal norm;
9283: PetscInt i;
9285: /* B0 and B0_B */
9286: if (zerodiag) {
9287: IS dummy;
9289: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
9290: MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
9291: MatCreateVecs(B0_B,NULL,&dummy_vec);
9292: ISDestroy(&dummy);
9293: }
9294: /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
9295: VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
9296: VecSet(pcbddc->vec1_P,1.0);
9297: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9298: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9299: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9300: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9301: VecReciprocal(vec_scale_P);
9302: /* S_j */
9303: MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9304: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
9306: /* mimic vector in \widetilde{W}_\Gamma */
9307: VecSetRandom(pcis->vec1_N,NULL);
9308: /* continuous in primal space */
9309: VecSetRandom(pcbddc->coarse_vec,NULL);
9310: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9311: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9312: VecGetArray(pcbddc->vec1_P,&array);
9313: PetscCalloc1(pcbddc->benign_n,&p0_check);
9314: for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
9315: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9316: VecRestoreArray(pcbddc->vec1_P,&array);
9317: VecAssemblyBegin(pcis->vec1_N);
9318: VecAssemblyEnd(pcis->vec1_N);
9319: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9320: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9321: VecDuplicate(pcis->vec2_B,&vec_check_B);
9322: VecCopy(pcis->vec2_B,vec_check_B);
9324: /* assemble rhs for coarse problem */
9325: /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
9326: /* local with Schur */
9327: MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
9328: if (zerodiag) {
9329: VecGetArray(dummy_vec,&array);
9330: for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
9331: VecRestoreArray(dummy_vec,&array);
9332: MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
9333: }
9334: /* sum on primal nodes the local contributions */
9335: VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9336: VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9337: VecGetArray(pcis->vec1_N,&array);
9338: VecGetArray(pcbddc->vec1_P,&array2);
9339: for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
9340: VecRestoreArray(pcbddc->vec1_P,&array2);
9341: VecRestoreArray(pcis->vec1_N,&array);
9342: VecSet(pcbddc->coarse_vec,0.);
9343: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9344: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9345: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9346: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9347: VecGetArray(pcbddc->vec1_P,&array);
9348: /* scale primal nodes (BDDC sums contibutions) */
9349: VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
9350: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9351: VecRestoreArray(pcbddc->vec1_P,&array);
9352: VecAssemblyBegin(pcis->vec1_N);
9353: VecAssemblyEnd(pcis->vec1_N);
9354: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9355: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9356: /* global: \widetilde{B0}_B w_\Gamma */
9357: if (zerodiag) {
9358: MatMult(B0_B,pcis->vec2_B,dummy_vec);
9359: VecGetArray(dummy_vec,&array);
9360: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
9361: VecRestoreArray(dummy_vec,&array);
9362: }
9363: /* BDDC */
9364: VecSet(pcis->vec1_D,0.);
9365: PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);
9367: VecCopy(pcis->vec1_B,pcis->vec2_B);
9368: VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
9369: VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
9370: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
9371: for (i=0;i<pcbddc->benign_n;i++) {
9372: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
9373: }
9374: PetscFree(p0_check);
9375: VecDestroy(&vec_scale_P);
9376: VecDestroy(&vec_check_B);
9377: VecDestroy(&dummy_vec);
9378: MatDestroy(&S_j);
9379: MatDestroy(&B0_B);
9380: }
9381: return(0);
9382: }
9384: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9385: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9386: {
9387: Mat At;
9388: IS rows;
9389: PetscInt rst,ren;
9391: PetscLayout rmap;
9394: rst = ren = 0;
9395: if (ccomm != MPI_COMM_NULL) {
9396: PetscLayoutCreate(ccomm,&rmap);
9397: PetscLayoutSetSize(rmap,A->rmap->N);
9398: PetscLayoutSetBlockSize(rmap,1);
9399: PetscLayoutSetUp(rmap);
9400: PetscLayoutGetRange(rmap,&rst,&ren);
9401: }
9402: ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
9403: MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
9404: ISDestroy(&rows);
9406: if (ccomm != MPI_COMM_NULL) {
9407: Mat_MPIAIJ *a,*b;
9408: IS from,to;
9409: Vec gvec;
9410: PetscInt lsize;
9412: MatCreate(ccomm,B);
9413: MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
9414: MatSetType(*B,MATAIJ);
9415: PetscLayoutDestroy(&((*B)->rmap));
9416: PetscLayoutSetUp((*B)->cmap);
9417: a = (Mat_MPIAIJ*)At->data;
9418: b = (Mat_MPIAIJ*)(*B)->data;
9419: MPI_Comm_size(ccomm,&b->size);
9420: MPI_Comm_rank(ccomm,&b->rank);
9421: PetscObjectReference((PetscObject)a->A);
9422: PetscObjectReference((PetscObject)a->B);
9423: b->A = a->A;
9424: b->B = a->B;
9426: b->donotstash = a->donotstash;
9427: b->roworiented = a->roworiented;
9428: b->rowindices = NULL;
9429: b->rowvalues = NULL;
9430: b->getrowactive = PETSC_FALSE;
9432: (*B)->rmap = rmap;
9433: (*B)->factortype = A->factortype;
9434: (*B)->assembled = PETSC_TRUE;
9435: (*B)->insertmode = NOT_SET_VALUES;
9436: (*B)->preallocated = PETSC_TRUE;
9438: if (a->colmap) {
9439: #if defined(PETSC_USE_CTABLE)
9440: PetscTableCreateCopy(a->colmap,&b->colmap);
9441: #else
9442: PetscMalloc1(At->cmap->N,&b->colmap);
9443: PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
9444: PetscArraycpy(b->colmap,a->colmap,At->cmap->N);
9445: #endif
9446: } else b->colmap = NULL;
9447: if (a->garray) {
9448: PetscInt len;
9449: len = a->B->cmap->n;
9450: PetscMalloc1(len+1,&b->garray);
9451: PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
9452: if (len) { PetscArraycpy(b->garray,a->garray,len); }
9453: } else b->garray = NULL;
9455: PetscObjectReference((PetscObject)a->lvec);
9456: b->lvec = a->lvec;
9457: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);
9459: /* cannot use VecScatterCopy */
9460: VecGetLocalSize(b->lvec,&lsize);
9461: ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
9462: ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
9463: MatCreateVecs(*B,&gvec,NULL);
9464: VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
9465: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
9466: ISDestroy(&from);
9467: ISDestroy(&to);
9468: VecDestroy(&gvec);
9469: }
9470: MatDestroy(&At);
9471: return(0);
9472: }