Actual source code: bddcnullspace.c

  1: #include <../src/ksp/pc/impls/bddc/bddc.h>
  2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
  3: #include <../src/mat/impls/dense/seq/dense.h>

  5: /* E + small_solve */
  6: static PetscErrorCode PCBDDCNullSpaceCorrPreSolve(KSP ksp,Vec y,Vec x, void* ctx)
  7: {
  8:   NullSpaceCorrection_ctx corr_ctx = (NullSpaceCorrection_ctx)ctx;
  9:   Mat                     K;
 10:   PetscErrorCode          ierr;

 13:   PetscLogEventBegin(corr_ctx->evapply,ksp,0,0,0);
 14:   MatMultTranspose(corr_ctx->basis_mat,y,corr_ctx->sw[0]);
 15:   if (corr_ctx->symm) {
 16:     MatMult(corr_ctx->inv_smat,corr_ctx->sw[0],corr_ctx->sw[1]);
 17:   } else {
 18:     MatMultTranspose(corr_ctx->inv_smat,corr_ctx->sw[0],corr_ctx->sw[1]);
 19:   }
 20:   VecScale(corr_ctx->sw[1],-1.0);
 21:   MatMult(corr_ctx->basis_mat,corr_ctx->sw[1],corr_ctx->fw[0]);
 22:   VecScale(corr_ctx->sw[1],-1.0);
 23:   KSPGetOperators(ksp,&K,NULL);
 24:   MatMultAdd(K,corr_ctx->fw[0],y,y);
 25:   PetscLogEventEnd(corr_ctx->evapply,ksp,0,0,0);
 26:   return(0);
 27: }

 29: /* E^t + small */
 30: static PetscErrorCode PCBDDCNullSpaceCorrPostSolve(KSP ksp,Vec y,Vec x, void* ctx)
 31: {
 32:   NullSpaceCorrection_ctx corr_ctx = (NullSpaceCorrection_ctx)ctx;
 33:   PetscErrorCode          ierr;
 34:   Mat                     K;

 37:   PetscLogEventBegin(corr_ctx->evapply,ksp,0,0,0);
 38:   KSPGetOperators(ksp,&K,NULL);
 39:   if (corr_ctx->symm) {
 40:     MatMult(K,x,corr_ctx->fw[0]);
 41:   } else {
 42:     MatMultTranspose(K,x,corr_ctx->fw[0]);
 43:   }
 44:   MatMultTranspose(corr_ctx->basis_mat,corr_ctx->fw[0],corr_ctx->sw[0]);
 45:   VecScale(corr_ctx->sw[0],-1.0);
 46:   MatMult(corr_ctx->inv_smat,corr_ctx->sw[0],corr_ctx->sw[2]);
 47:   MatMultAdd(corr_ctx->basis_mat,corr_ctx->sw[2],x,corr_ctx->fw[0]);
 48:   VecScale(corr_ctx->fw[0],corr_ctx->scale);
 49:   /* Sum contributions from approximate solver and projected system */
 50:   MatMultAdd(corr_ctx->basis_mat,corr_ctx->sw[1],corr_ctx->fw[0],x);
 51:   PetscLogEventEnd(corr_ctx->evapply,ksp,0,0,0);
 52:   return(0);
 53: }

 55: static PetscErrorCode PCBDDCNullSpaceCorrDestroy(void * ctx)
 56: {
 57:   NullSpaceCorrection_ctx corr_ctx = (NullSpaceCorrection_ctx)ctx;
 58:   PetscErrorCode          ierr;

 61:   VecDestroyVecs(3,&corr_ctx->sw);
 62:   VecDestroyVecs(1,&corr_ctx->fw);
 63:   MatDestroy(&corr_ctx->basis_mat);
 64:   MatDestroy(&corr_ctx->inv_smat);
 65:   PetscFree(corr_ctx);
 66:   return(0);
 67: }

 69: PetscErrorCode PCBDDCNullSpaceAssembleCorrection(PC pc, PetscBool isdir, PetscBool needscaling)
 70: {
 71:   PC_BDDC                  *pcbddc = (PC_BDDC*)pc->data;
 72:   MatNullSpace             NullSpace = NULL;
 73:   KSP                      local_ksp;
 74:   NullSpaceCorrection_ctx  shell_ctx;
 75:   Mat                      local_mat,local_pmat,dmat,Kbasis_mat;
 76:   Vec                      v;
 77:   PetscContainer           c;
 78:   PetscInt                 basis_size;
 79:   IS                       zerorows;
 80:   PetscBool                iscusp;
 81:   PetscErrorCode           ierr;

 84:   if (isdir) local_ksp = pcbddc->ksp_D; /* Dirichlet solver */
 85:   else local_ksp = pcbddc->ksp_R; /* Neumann solver */
 86:   KSPGetOperators(local_ksp,&local_mat,&local_pmat);
 87:   MatGetNearNullSpace(local_pmat,&NullSpace);
 88:   if (!NullSpace) {
 89:     if (pcbddc->dbg_flag) {
 90:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d doesn't have local (near) nullspace: no need for correction in %s solver \n",PetscGlobalRank,isdir ? "Dirichlet" : "Neumann");
 91:     }
 92:     return(0);
 93:   }
 94:   PetscObjectQuery((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject*)&dmat);
 95:   if (!dmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing dense matrix");
 96:   PetscLogEventBegin(PC_BDDC_ApproxSetUp[pcbddc->current_level],pc,0,0,0);

 98:   PetscNew(&shell_ctx);
 99:   shell_ctx->scale = 1.0;
100:   PetscObjectReference((PetscObject)dmat);
101:   shell_ctx->basis_mat = dmat;
102:   MatGetSize(dmat,NULL,&basis_size);
103:   shell_ctx->evapply = PC_BDDC_ApproxApply[pcbddc->current_level];

105:   MatGetOption(local_mat,MAT_SYMMETRIC,&shell_ctx->symm);

107:   /* explicit construct (Phi^T K Phi)^-1 */
108:   PetscObjectTypeCompare((PetscObject)local_mat,MATSEQAIJCUSPARSE,&iscusp);
109:   if (iscusp) {
110:     MatConvert(shell_ctx->basis_mat,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&shell_ctx->basis_mat);
111:   }
112:   MatMatMult(local_mat,shell_ctx->basis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Kbasis_mat);
113:   MatTransposeMatMult(Kbasis_mat,shell_ctx->basis_mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&shell_ctx->inv_smat);
114:   MatDestroy(&Kbasis_mat);
115:   MatBindToCPU(shell_ctx->inv_smat,PETSC_TRUE);
116:   MatFindZeroRows(shell_ctx->inv_smat,&zerorows);
117:   if (zerorows) { /* linearly dependent basis */
118:     const PetscInt *idxs;
119:     PetscInt       i,nz;

121:     ISGetLocalSize(zerorows,&nz);
122:     ISGetIndices(zerorows,&idxs);
123:     for (i=0;i<nz;i++) {
124:       MatSetValue(shell_ctx->inv_smat,idxs[i],idxs[i],1.0,INSERT_VALUES);
125:     }
126:     ISRestoreIndices(zerorows,&idxs);
127:     MatAssemblyBegin(shell_ctx->inv_smat,MAT_FINAL_ASSEMBLY);
128:     MatAssemblyEnd(shell_ctx->inv_smat,MAT_FINAL_ASSEMBLY);
129:   }
130:   MatLUFactor(shell_ctx->inv_smat,NULL,NULL,NULL);
131:   MatSeqDenseInvertFactors_Private(shell_ctx->inv_smat);
132:   if (zerorows) { /* linearly dependent basis */
133:     const PetscInt *idxs;
134:     PetscInt       i,nz;

136:     ISGetLocalSize(zerorows,&nz);
137:     ISGetIndices(zerorows,&idxs);
138:     for (i=0;i<nz;i++) {
139:       MatSetValue(shell_ctx->inv_smat,idxs[i],idxs[i],0.0,INSERT_VALUES);
140:     }
141:     ISRestoreIndices(zerorows,&idxs);
142:     MatAssemblyBegin(shell_ctx->inv_smat,MAT_FINAL_ASSEMBLY);
143:     MatAssemblyEnd(shell_ctx->inv_smat,MAT_FINAL_ASSEMBLY);
144:   }
145:   ISDestroy(&zerorows);

147:   /* Create work vectors in shell context */
148:   MatCreateVecs(shell_ctx->inv_smat,&v,NULL);
149:   KSPCreateVecs(local_ksp,1,&shell_ctx->fw,0,NULL);
150:   VecDuplicateVecs(v,3,&shell_ctx->sw);
151:   VecDestroy(&v);

153:   /* add special pre/post solve to KSP (see [1], eq. 48) */
154:   KSPSetPreSolve(local_ksp,PCBDDCNullSpaceCorrPreSolve,shell_ctx);
155:   KSPSetPostSolve(local_ksp,PCBDDCNullSpaceCorrPostSolve,shell_ctx);
156:   PetscContainerCreate(PetscObjectComm((PetscObject)local_ksp),&c);
157:   PetscContainerSetPointer(c,shell_ctx);
158:   PetscContainerSetUserDestroy(c,PCBDDCNullSpaceCorrDestroy);
159:   PetscObjectCompose((PetscObject)local_ksp,"_PCBDDC_Null_PrePost_ctx",(PetscObject)c);
160:   PetscContainerDestroy(&c);

162:   /* Create ksp object suitable for extreme eigenvalues' estimation */
163:   if (needscaling || pcbddc->dbg_flag) {
164:     KSP         check_ksp;
165:     PC          local_pc;
166:     Vec         work1,work2;
167:     const char* prefix;
168:     PetscReal   test_err,lambda_min,lambda_max;
169:     PetscInt    k,maxit;

171:     VecDuplicate(shell_ctx->fw[0],&work1);
172:     VecDuplicate(shell_ctx->fw[0],&work2);
173:     KSPCreate(PETSC_COMM_SELF,&check_ksp);
174:     if (local_mat->spd) {
175:       KSPSetType(check_ksp,KSPCG);
176:     }
177:     PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)local_ksp,0);
178:     KSPGetOptionsPrefix(local_ksp,&prefix);
179:     KSPSetOptionsPrefix(check_ksp,prefix);
180:     KSPAppendOptionsPrefix(check_ksp,"approximate_scale_");
181:     KSPSetErrorIfNotConverged(check_ksp,PETSC_FALSE);
182:     KSPSetOperators(check_ksp,local_mat,local_pmat);
183:     KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);
184:     KSPSetPreSolve(check_ksp,PCBDDCNullSpaceCorrPreSolve,shell_ctx);
185:     KSPSetPostSolve(check_ksp,PCBDDCNullSpaceCorrPostSolve,shell_ctx);
186:     KSPSetTolerances(check_ksp,PETSC_SMALL,PETSC_SMALL,PETSC_DEFAULT,PETSC_DEFAULT);
187:     KSPSetFromOptions(check_ksp);
188:     /* setup with default maxit, then set maxit to min(10,any_set_from_command_line) (bug in computing eigenvalues when chaning the number of iterations */
189:     KSPSetUp(check_ksp);
190:     KSPGetPC(local_ksp,&local_pc);
191:     KSPSetPC(check_ksp,local_pc);
192:     KSPGetTolerances(check_ksp,NULL,NULL,NULL,&maxit);
193:     KSPSetTolerances(check_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PetscMin(10,maxit));
194:     VecSetRandom(work2,NULL);
195:     MatMult(local_mat,work2,work1);
196:     KSPSolve(check_ksp,work1,work1);
197:     KSPCheckSolve(check_ksp,pc,work1);
198:     VecAXPY(work1,-1.,work2);
199:     VecNorm(work1,NORM_INFINITY,&test_err);
200:     KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);
201:     KSPGetIterationNumber(check_ksp,&k);
202:     if (pcbddc->dbg_flag) {
203:       if (isdir) {
204:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet adapted solver (no scale) %1.14e (it %D, eigs %1.6e %1.6e)\n",PetscGlobalRank,test_err,k,lambda_min,lambda_max);
205:       } else {
206:         PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann adapted solver (no scale) %1.14e (it %D, eigs %1.6e %1.6e)\n",PetscGlobalRank,test_err,k,lambda_min,lambda_max);
207:       }
208:     }
209:     if (needscaling) shell_ctx->scale = 1.0/lambda_max;

211:     if (needscaling && pcbddc->dbg_flag) { /* test for scaling factor */
212:       PC new_pc;

214:       VecSetRandom(work2,NULL);
215:       MatMult(local_mat,work2,work1);
216:       PCCreate(PetscObjectComm((PetscObject)check_ksp),&new_pc);
217:       PCSetType(new_pc,PCKSP);
218:       PCSetOperators(new_pc,local_mat,local_pmat);
219:       PCKSPSetKSP(new_pc,local_ksp);
220:       KSPSetPC(check_ksp,new_pc);
221:       PCDestroy(&new_pc);
222:       KSPSetTolerances(check_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,maxit);
223:       KSPSetPreSolve(check_ksp,NULL,NULL);
224:       KSPSetPostSolve(check_ksp,NULL,NULL);
225:       KSPSolve(check_ksp,work1,work1);
226:       KSPCheckSolve(check_ksp,pc,work1);
227:       VecAXPY(work1,-1.,work2);
228:       VecNorm(work1,NORM_INFINITY,&test_err);
229:       KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);
230:       KSPGetIterationNumber(check_ksp,&k);
231:       if (pcbddc->dbg_flag) {
232:         if (isdir) {
233:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet adapted solver (scale %g) %1.14e (it %D, eigs %1.6e %1.6e)\n",PetscGlobalRank,(double)PetscRealPart(shell_ctx->scale),test_err,k,lambda_min,lambda_max);
234:         } else {
235:           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann adapted solver (scale %g) %1.14e (it %D, eigs %1.6e %1.6e)\n",PetscGlobalRank,(double)PetscRealPart(shell_ctx->scale),test_err,k,lambda_min,lambda_max);
236:         }
237:       }
238:     }
239:     KSPDestroy(&check_ksp);
240:     VecDestroy(&work1);
241:     VecDestroy(&work2);
242:   }
243:   PetscLogEventEnd(PC_BDDC_ApproxSetUp[pcbddc->current_level],pc,0,0,0);

245:   if (pcbddc->dbg_flag) {
246:     Vec       work1,work2,work3;
247:     PetscReal test_err;

249:     /* check nullspace basis is solved exactly */
250:     VecDuplicate(shell_ctx->fw[0],&work1);
251:     VecDuplicate(shell_ctx->fw[0],&work2);
252:     VecDuplicate(shell_ctx->fw[0],&work3);
253:     VecSetRandom(shell_ctx->sw[0],NULL);
254:     MatMult(shell_ctx->basis_mat,shell_ctx->sw[0],work1);
255:     VecCopy(work1,work2);
256:     MatMult(local_mat,work1,work3);
257:     KSPSolve(local_ksp,work3,work1);
258:     VecAXPY(work1,-1.,work2);
259:     VecNorm(work1,NORM_INFINITY,&test_err);
260:     if (isdir) {
261:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet nullspace correction solver: %1.14e\n",PetscGlobalRank,test_err);
262:     } else {
263:       PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann nullspace correction solver: %1.14e\n",PetscGlobalRank,test_err);
264:     }
265:     VecDestroy(&work1);
266:     VecDestroy(&work2);
267:     VecDestroy(&work3);
268:   }
269:   return(0);
270: }