Actual source code: ex18.c
1: static char help[] = "Tests the use of MatZeroRowsColumns() for parallel matrices.\n\
2: Contributed-by: Stephan Kramer <s.kramer@imperial.ac.uk>\n\n";
4: #include <petscmat.h>
6: int main(int argc,char **args)
7: {
8: Mat A;
9: Vec x, rhs, y;
10: PetscInt i,j,k,b,m = 3,n,nlocal=2,bs=1,Ii,J;
11: PetscInt *boundary_nodes, nboundary_nodes, *boundary_indices;
12: PetscMPIInt rank,size;
14: PetscScalar v,v0,v1,v2,a0=0.1,a,rhsval, *boundary_values,diag = 1.0;
15: PetscReal norm;
16: char convname[64];
17: PetscBool upwind = PETSC_FALSE, nonlocalBC = PETSC_FALSE, zerorhs = PETSC_TRUE, convert = PETSC_FALSE;
19: PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
20: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
21: MPI_Comm_size(PETSC_COMM_WORLD,&size);
22: n = nlocal*size;
24: PetscOptionsGetInt(NULL,NULL, "-bs", &bs, NULL);
25: PetscOptionsGetBool(NULL,NULL, "-nonlocal_bc", &nonlocalBC, NULL);
26: PetscOptionsGetScalar(NULL,NULL, "-diag", &diag, NULL);
27: PetscOptionsGetString(NULL,NULL,"-convname",convname,sizeof(convname),&convert);
28: PetscOptionsGetBool(NULL,NULL, "-zerorhs", &zerorhs, NULL);
30: MatCreate(PETSC_COMM_WORLD,&A);
31: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n*bs,m*n*bs);
32: MatSetFromOptions(A);
33: MatSetUp(A);
35: MatCreateVecs(A, NULL, &rhs);
36: VecSetFromOptions(rhs);
37: VecSetUp(rhs);
39: rhsval = 0.0;
40: for (i=0; i<m; i++) {
41: for (j=nlocal*rank; j<nlocal*(rank+1); j++) {
42: a = a0;
43: for (b=0; b<bs; b++) {
44: /* let's start with a 5-point stencil diffusion term */
45: v = -1.0; Ii = (j + n*i)*bs + b;
46: if (i>0) {J = Ii - n*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
47: if (i<m-1) {J = Ii + n*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
48: if (j>0) {J = Ii - 1*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
49: if (j<n-1) {J = Ii + 1*bs; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
50: v = 4.0; MatSetValues(A,1,&Ii,1,&Ii,&v,ADD_VALUES);
51: if (upwind) {
52: /* now add a 2nd order upwind advection term to add a little asymmetry */
53: if (j>2) {
54: J = Ii-2*bs; v2 = 0.5*a; v1 = -2.0*a; v0 = 1.5*a;
55: MatSetValues(A,1,&Ii,1,&J,&v2,ADD_VALUES);
56: } else {
57: /* fall back to 1st order upwind */
58: v1 = -1.0*a; v0 = 1.0*a;
59: };
60: if (j>1) {J = Ii-1*bs; MatSetValues(A,1,&Ii,1,&J,&v1,ADD_VALUES);}
61: MatSetValues(A,1,&Ii,1,&Ii,&v0,ADD_VALUES);
62: a /= 10.; /* use a different velocity for the next component */
63: /* add a coupling to the previous and next components */
64: v = 0.5;
65: if (b>0) {J = Ii - 1; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
66: if (b<bs-1) {J = Ii + 1; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
67: }
68: /* make up some rhs */
69: VecSetValue(rhs, Ii, rhsval, INSERT_VALUES);
70: rhsval += 1.0;
71: }
72: }
73: }
74: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
75: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
77: if (convert) { /* Test different Mat implementations */
78: Mat B;
80: MatConvert(A,convname,MAT_INITIAL_MATRIX,&B);
81: MatDestroy(&A);
82: A = B;
83: }
85: VecAssemblyBegin(rhs);
86: VecAssemblyEnd(rhs);
87: /* set rhs to zero to simplify */
88: if (zerorhs) {
89: VecZeroEntries(rhs);
90: }
92: if (nonlocalBC) {
93: /*version where boundary conditions are set by processes that don't necessarily own the nodes */
94: if (rank == 0) {
95: nboundary_nodes = size>m ? nlocal : m-size+nlocal;
96: PetscMalloc1(nboundary_nodes,&boundary_nodes);
97: k = 0;
98: for (i=size; i<m; i++,k++) {boundary_nodes[k] = n*i;};
99: } else if (rank < m) {
100: nboundary_nodes = nlocal+1;
101: PetscMalloc1(nboundary_nodes,&boundary_nodes);
102: boundary_nodes[0] = rank*n;
103: k = 1;
104: } else {
105: nboundary_nodes = nlocal;
106: PetscMalloc1(nboundary_nodes,&boundary_nodes);
107: k = 0;
108: }
109: for (j=nlocal*rank; j<nlocal*(rank+1); j++,k++) {boundary_nodes[k] = j;};
110: } else {
111: /*version where boundary conditions are set by the node owners only */
112: PetscMalloc1(m*n,&boundary_nodes);
113: k=0;
114: for (j=0; j<n; j++) {
115: Ii = j;
116: if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii;
117: }
118: for (i=1; i<m; i++) {
119: Ii = n*i;
120: if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii;
121: }
122: nboundary_nodes = k;
123: }
125: VecDuplicate(rhs, &x);
126: VecZeroEntries(x);
127: PetscMalloc2(nboundary_nodes*bs,&boundary_indices,nboundary_nodes*bs,&boundary_values);
128: for (k=0; k<nboundary_nodes; k++) {
129: Ii = boundary_nodes[k]*bs;
130: v = 1.0*boundary_nodes[k];
131: for (b=0; b<bs; b++, Ii++) {
132: boundary_indices[k*bs+b] = Ii;
133: boundary_values[k*bs+b] = v;
134: PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%d %D %f\n", rank, Ii, (double)PetscRealPart(v));
135: v += 0.1;
136: }
137: }
138: PetscSynchronizedFlush(PETSC_COMM_WORLD, NULL);
139: VecSetValues(x, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES);
140: VecAssemblyBegin(x);
141: VecAssemblyEnd(x);
143: /* We can check the rhs returned by MatZeroColumns by computing y=rhs-A*x and overwriting the boundary entries with boundary values */
144: VecDuplicate(x, &y);
145: MatMult(A, x, y);
146: VecAYPX(y, -1.0, rhs);
147: for (k=0; k<nboundary_nodes*bs; k++) boundary_values[k] *= diag;
148: VecSetValues(y, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES);
149: VecAssemblyBegin(y);
150: VecAssemblyEnd(y);
152: PetscPrintf(PETSC_COMM_WORLD, "*** Matrix A and vector x:\n");
153: MatView(A, PETSC_VIEWER_STDOUT_WORLD);
154: VecView(x,PETSC_VIEWER_STDOUT_WORLD);
156: MatZeroRowsColumns(A, nboundary_nodes*bs, boundary_indices, diag, x, rhs);
157: PetscPrintf(PETSC_COMM_WORLD, "*** Vector rhs returned by MatZeroRowsColumns\n");
158: VecView(rhs,PETSC_VIEWER_STDOUT_WORLD);
159: VecAXPY(y, -1.0, rhs);
160: VecNorm(y, NORM_INFINITY, &norm);
161: if (norm > 1.0e-10) {
162: PetscPrintf(PETSC_COMM_WORLD, "*** Difference between rhs and y, inf-norm: %f\n", (double)norm);
163: VecView(y,PETSC_VIEWER_STDOUT_WORLD);
164: SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Bug in MatZeroRowsColumns");
165: }
167: PetscFree(boundary_nodes);
168: PetscFree2(boundary_indices,boundary_values);
169: VecDestroy(&x);
170: VecDestroy(&y);
171: VecDestroy(&rhs);
172: MatDestroy(&A);
174: PetscFinalize();
175: return ierr;
176: }
178: /*TEST
180: test:
181: diff_args: -j
182: suffix: 0
184: test:
185: diff_args: -j
186: suffix: 1
187: nsize: 2
189: test:
190: diff_args: -j
191: suffix: 10
192: nsize: 2
193: args: -bs 2 -nonlocal_bc
195: test:
196: diff_args: -j
197: suffix: 11
198: nsize: 7
199: args: -bs 2 -nonlocal_bc
201: test:
202: diff_args: -j
203: suffix: 12
204: args: -bs 2 -nonlocal_bc -mat_type baij
206: test:
207: diff_args: -j
208: suffix: 13
209: nsize: 2
210: args: -bs 2 -nonlocal_bc -mat_type baij
212: test:
213: diff_args: -j
214: suffix: 14
215: nsize: 7
216: args: -bs 2 -nonlocal_bc -mat_type baij
218: test:
219: diff_args: -j
220: suffix: 2
221: nsize: 7
223: test:
224: diff_args: -j
225: suffix: 3
226: args: -mat_type baij
228: test:
229: diff_args: -j
230: suffix: 4
231: nsize: 2
232: args: -mat_type baij
234: test:
235: diff_args: -j
236: suffix: 5
237: nsize: 7
238: args: -mat_type baij
240: test:
241: diff_args: -j
242: suffix: 6
243: args: -bs 2 -mat_type baij
245: test:
246: diff_args: -j
247: suffix: 7
248: nsize: 2
249: args: -bs 2 -mat_type baij
251: test:
252: diff_args: -j
253: suffix: 8
254: nsize: 7
255: args: -bs 2 -mat_type baij
257: test:
258: diff_args: -j
259: suffix: 9
260: args: -bs 2 -nonlocal_bc
262: test:
263: diff_args: -j
264: suffix: 15
265: args: -bs 2 -nonlocal_bc -convname shell
267: test:
268: diff_args: -j
269: suffix: 16
270: nsize: 2
271: args: -bs 2 -nonlocal_bc -convname shell
273: test:
274: diff_args: -j
275: suffix: 17
276: args: -bs 2 -nonlocal_bc -convname dense
278: testset:
279: diff_args: -j
280: suffix: full
281: nsize: {{1 3}separate output}
282: args: -diag {{0.12 -0.13}separate output} -convname {{aij shell baij}separate output} -zerorhs 0
284: test:
285: diff_args: -j
286: requires: cuda
287: suffix: cusparse_1
288: nsize: 1
289: args: -diag {{0.12 -0.13}separate output} -convname {{seqaijcusparse mpiaijcusparse}separate output} -zerorhs 0 -mat_type {{seqaijcusparse mpiaijcusparse}separate output}
291: test:
292: diff_args: -j
293: requires: cuda
294: suffix: cusparse_3
295: nsize: 3
296: args: -diag {{0.12 -0.13}separate output} -convname mpiaijcusparse -zerorhs 0 -mat_type mpiaijcusparse
298: TEST*/