Actual source code: spbas_cholesky.h
2: /*
3: spbas_cholesky_row_alloc:
4: in the data arrays, find a place where another row may be stored.
5: Return PETSC_ERR_MEM if there is insufficient space to store the
6: row, so garbage collection and/or re-allocation may be done.
7: */
8: PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
9: {
11: retval.icols[k] = &retval.alloc_icol[*n_alloc_used];
12: retval.values[k] = &retval.alloc_val[*n_alloc_used];
13: *n_alloc_used += r_nnz;
14: if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
15: return(0);
16: }
18: /*
19: spbas_cholesky_garbage_collect:
20: move the rows which have been calculated so far, as well as
21: those currently under construction, to the front of the
22: array, while putting them in the proper order.
23: When it seems necessary, enlarge the current arrays.
25: NB: re-allocation is being simulated using
26: PetscMalloc, memcpy, PetscFree, because
27: PetscRealloc does not seem to exist.
29: */
30: PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed.
31: Only the storage, not the contents of this matrix is changed in this function */
32: PetscInt i_row, /* I : Number of rows for which the final contents are known */
33: PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
34: places in the arrays: they need not be moved any more */
35: PetscInt *n_alloc_used, /* I/O: */
36: PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */
37: {
38: /* PSEUDO-CODE:
39: 1. Choose the appropriate size for the arrays
40: 2. Rescue the arrays which would be lost during garbage collection
41: 3. Reallocate and correct administration
42: 4. Move all arrays so that they are in proper order */
44: PetscInt i,j;
45: PetscInt nrows = result->nrows;
46: PetscInt n_alloc_ok =0;
47: PetscInt n_alloc_ok_max=0;
48: PetscErrorCode ierr;
49: PetscInt need_already = 0;
50: PetscInt n_rows_ahead =0;
51: PetscInt max_need_extra= 0;
52: PetscInt n_alloc_max, n_alloc_est, n_alloc;
53: PetscInt n_alloc_now = result->n_alloc_icol;
54: PetscInt *alloc_icol_old = result->alloc_icol;
55: PetscScalar *alloc_val_old = result->alloc_val;
56: PetscInt *icol_rescue;
57: PetscScalar *val_rescue;
58: PetscInt n_rescue;
59: PetscInt n_row_rescue;
60: PetscInt i_here, i_last, n_copy;
61: const PetscReal xtra_perc = 20;
64: /*********************************************************
65: 1. Choose appropriate array size
66: Count number of rows and memory usage which is already final */
67: for (i=0; i<i_row; i++) {
68: n_alloc_ok += result->row_nnz[i];
69: n_alloc_ok_max += max_row_nnz[i];
70: }
72: /* Count currently needed memory usage and future memory requirements
73: (max, predicted)*/
74: for (i=i_row; i<nrows; i++) {
75: if (!result->row_nnz[i]) {
76: max_need_extra += max_row_nnz[i];
77: } else {
78: need_already += max_row_nnz[i];
79: n_rows_ahead++;
80: }
81: }
83: /* Make maximal and realistic memory requirement estimates */
84: n_alloc_max = n_alloc_ok + need_already + max_need_extra;
85: n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
87: /* Choose array sizes */
88: if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
89: else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
90: else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
91: else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));
93: /* If new estimate is less than what we already have,
94: don't reallocate, just garbage-collect */
95: if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
96: n_alloc = result->n_alloc_icol;
97: }
99: /* Motivate dimension choice */
100: PetscInfo1(NULL," Allocating %d nonzeros: ",n_alloc);
101: if (n_alloc_max == n_alloc_est) {
102: PetscInfo(NULL,"this is the correct size\n");
103: } else if (n_alloc_now >= n_alloc_est) {
104: PetscInfo(NULL,"the current size, which seems enough\n");
105: } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
106: PetscInfo(NULL,"the maximum estimate\n");
107: } else {
108: PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);
109: }
111: /**********************************************************
112: 2. Rescue arrays which would be lost
113: Count how many rows and nonzeros will have to be rescued
114: when moving all arrays in place */
115: n_row_rescue = 0; n_rescue = 0;
116: if (*n_row_alloc_ok==0) *n_alloc_used = 0;
117: else {
118: i = *n_row_alloc_ok - 1;
120: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
121: }
123: for (i=*n_row_alloc_ok; i<nrows; i++) {
124: i_here = result->icols[i]-result->alloc_icol;
125: i_last = i_here + result->row_nnz[i];
126: if (result->row_nnz[i]>0) {
127: if (*n_alloc_used > i_here || i_last > n_alloc) {
128: n_rescue += result->row_nnz[i];
129: n_row_rescue++;
130: }
132: if (i<i_row) *n_alloc_used += result->row_nnz[i];
133: else *n_alloc_used += max_row_nnz[i];
134: }
135: }
137: /* Allocate rescue arrays */
138: PetscMalloc1(n_rescue, &icol_rescue);
139: PetscMalloc1(n_rescue, &val_rescue);
141: /* Rescue the arrays which need rescuing */
142: n_row_rescue = 0; n_rescue = 0;
143: if (*n_row_alloc_ok==0) *n_alloc_used = 0;
144: else {
145: i = *n_row_alloc_ok - 1;
146: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
147: }
149: for (i=*n_row_alloc_ok; i<nrows; i++) {
150: i_here = result->icols[i]-result->alloc_icol;
151: i_last = i_here + result->row_nnz[i];
152: if (result->row_nnz[i]>0) {
153: if (*n_alloc_used > i_here || i_last > n_alloc) {
154: PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]);
155: PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]);
156: n_rescue += result->row_nnz[i];
157: n_row_rescue++;
158: }
160: if (i<i_row) *n_alloc_used += result->row_nnz[i];
161: else *n_alloc_used += max_row_nnz[i];
162: }
163: }
165: /**********************************************************
166: 3. Reallocate and correct administration */
168: if (n_alloc != result->n_alloc_icol) {
169: n_copy = PetscMin(n_alloc,result->n_alloc_icol);
171: /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
173: Allocate new icol-data, copy old contents */
174: PetscMalloc1(n_alloc, &result->alloc_icol);
175: PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy);
177: /* Update administration, Reset pointers to new arrays */
178: result->n_alloc_icol = n_alloc;
179: for (i=0; i<nrows; i++) {
180: result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old);
181: result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
182: }
184: /* Delete old array */
185: PetscFree(alloc_icol_old);
187: /* Allocate new value-data, copy old contents */
188: PetscMalloc1(n_alloc, &result->alloc_val);
189: PetscArraycpy(result->alloc_val, alloc_val_old, n_copy);
191: /* Update administration, Reset pointers to new arrays */
192: result->n_alloc_val = n_alloc;
193: for (i=0; i<nrows; i++) {
194: result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
195: }
197: /* Delete old array */
198: PetscFree(alloc_val_old);
199: }
201: /*********************************************************
202: 4. Copy all the arrays to their proper places */
203: n_row_rescue = 0; n_rescue = 0;
204: if (*n_row_alloc_ok==0) *n_alloc_used = 0;
205: else {
206: i = *n_row_alloc_ok - 1;
208: *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i];
209: }
211: for (i=*n_row_alloc_ok; i<nrows; i++) {
212: i_here = result->icols[i]-result->alloc_icol;
213: i_last = i_here + result->row_nnz[i];
215: result->icols[i] = result->alloc_icol + *n_alloc_used;
216: result->values[i]= result->alloc_val + *n_alloc_used;
218: if (result->row_nnz[i]>0) {
219: if (*n_alloc_used > i_here || i_last > n_alloc) {
220: PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]);
221: PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i]);
223: n_rescue += result->row_nnz[i];
224: n_row_rescue++;
225: } else {
226: for (j=0; j<result->row_nnz[i]; j++) {
227: result->icols[i][j] = result->alloc_icol[i_here+j];
228: result->values[i][j] = result->alloc_val[i_here+j];
229: }
230: }
231: if (i<i_row) *n_alloc_used += result->row_nnz[i];
232: else *n_alloc_used += max_row_nnz[i];
233: }
234: }
236: /* Delete the rescue arrays */
237: PetscFree(icol_rescue);
238: PetscFree(val_rescue);
240: *n_row_alloc_ok = i_row;
241: return(0);
242: }
244: /*
245: spbas_incomplete_cholesky:
246: incomplete Cholesky decomposition of a square, symmetric,
247: positive definite matrix.
249: In case negative diagonals are encountered, function returns
250: NEGATIVE_DIAGONAL. When this happens, call this function again
251: with a larger epsdiag_in, a less sparse pattern, and/or a smaller
252: droptol
253: */
254: PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
255: {
256: PetscInt jL;
257: Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data;
258: PetscInt *ai=a->i,*aj=a->j;
259: MatScalar *aa=a->a;
260: PetscInt nrows, ncols;
261: PetscInt *max_row_nnz;
262: PetscErrorCode ierr;
263: spbas_matrix retval;
264: PetscScalar *diag;
265: PetscScalar *val;
266: PetscScalar *lvec;
267: PetscScalar epsdiag;
268: PetscInt i,j,k;
269: const PetscBool do_values = PETSC_TRUE;
270: PetscInt *r1_icol;
271: PetscScalar *r1_val;
272: PetscInt *r_icol;
273: PetscInt r_nnz;
274: PetscScalar *r_val;
275: PetscInt *A_icol;
276: PetscInt A_nnz;
277: PetscScalar *A_val;
278: PetscInt *p_icol;
279: PetscInt p_nnz;
280: PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */
281: PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */
284: /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */
285: MatGetSize(A, &nrows, &ncols);
286: MatGetTrace(A, &epsdiag);
288: epsdiag *= epsdiag_in / nrows;
290: PetscInfo2(NULL," Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);
292: if (droptol<1e-10) droptol=1e-10;
294: if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");
296: retval.nrows = nrows;
297: retval.ncols = nrows;
298: retval.nnz = pattern.nnz/10;
299: retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
300: retval.block_data = PETSC_TRUE;
302: spbas_allocate_pattern(&retval, do_values);
303: PetscArrayzero(retval.row_nnz, nrows);
304: spbas_allocate_data(&retval);
305: retval.nnz = 0;
307: PetscMalloc1(nrows, &diag);
308: PetscCalloc1(nrows, &val);
309: PetscCalloc1(nrows, &lvec);
310: PetscCalloc1(nrows, &max_row_nnz);
312: /* Check correct format of sparseness pattern */
313: if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
315: /* Count the nonzeros on transpose of pattern */
316: for (i = 0; i<nrows; i++) {
317: p_nnz = pattern.row_nnz[i];
318: p_icol = pattern.icols[i];
319: for (j=0; j<p_nnz; j++) {
320: max_row_nnz[i+p_icol[j]]++;
321: }
322: }
324: /* Calculate rows of L */
325: for (i = 0; i<nrows; i++) {
326: p_nnz = pattern.row_nnz[i];
327: p_icol = pattern.icols[i];
329: r_nnz = retval.row_nnz[i];
330: r_icol = retval.icols[i];
331: r_val = retval.values[i];
332: A_nnz = ai[rip[i]+1] - ai[rip[i]];
333: A_icol = &aj[ai[rip[i]]];
334: A_val = &aa[ai[rip[i]]];
336: /* Calculate val += A(i,i:n)'; */
337: for (j=0; j<A_nnz; j++) {
338: k = riip[A_icol[j]];
339: if (k>=i) val[k] = A_val[j];
340: }
342: /* Add regularization */
343: val[i] += epsdiag;
345: /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i);
346: val(i) = A(i,i) - L(0:i-1,i)' * lvec */
347: for (j=0; j<r_nnz; j++) {
348: k = r_icol[j];
349: lvec[k] = diag[k] * r_val[j];
350: val[i] -= r_val[j] * lvec[k];
351: }
353: /* Calculate the new diagonal */
354: diag[i] = val[i];
355: if (PetscRealPart(diag[i])<droptol) {
356: PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");
357: PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);
359: /* Delete the whole matrix at once. */
360: spbas_delete(retval);
361: return NEGATIVE_DIAGONAL;
362: }
364: /* If necessary, allocate arrays */
365: if (r_nnz==0) {
366: spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
367: if (ierr == PETSC_ERR_MEM) {
368: spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
369: spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
370: } else
371: r_icol = retval.icols[i];
372: r_val = retval.values[i];
373: }
375: /* Now, fill in */
376: r_icol[r_nnz] = i;
377: r_val [r_nnz] = 1.0;
378: r_nnz++;
379: retval.row_nnz[i]++;
381: retval.nnz += r_nnz;
383: /* Calculate
384: val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
385: for (j=1; j<p_nnz; j++) {
386: k = i+p_icol[j];
387: r1_icol = retval.icols[k];
388: r1_val = retval.values[k];
389: for (jL=0; jL<retval.row_nnz[k]; jL++) {
390: val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
391: }
392: val[k] /= diag[i];
394: if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
395: /* If necessary, allocate arrays */
396: if (retval.row_nnz[k]==0) {
397: spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
398: if (ierr == PETSC_ERR_MEM) {
399: spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
400: spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
401: r_icol = retval.icols[i];
402: } else
403: }
405: retval.icols[k][retval.row_nnz[k]] = i;
406: retval.values[k][retval.row_nnz[k]] = val[k];
407: retval.row_nnz[k]++;
408: }
409: val[k] = 0;
410: }
412: /* Erase the values used in the work arrays */
413: for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
414: }
416: PetscFree(lvec);
417: PetscFree(val);
419: spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);
420: PetscFree(max_row_nnz);
422: /* Place the inverse of the diagonals in the matrix */
423: for (i=0; i<nrows; i++) {
424: r_nnz = retval.row_nnz[i];
426: retval.values[i][r_nnz-1] = 1.0 / diag[i];
427: for (j=0; j<r_nnz-1; j++) {
428: retval.values[i][j] *= -1;
429: }
430: }
431: PetscFree(diag);
432: *matrix_L = retval;
433: return(0);
434: }