Actual source code: symtranspose.c
2: /*
3: Defines symbolic transpose routines for SeqAIJ matrices.
5: Currently Get/Restore only allocates/frees memory for holding the
6: (i,j) info for the transpose. Someday, this info could be
7: maintained so successive calls to Get will not recompute the info.
9: Also defined is a faster implementation of MatTranspose for SeqAIJ
10: matrices which avoids calls to MatSetValues. This routine is the new
11: standard since it is much faster than MatTranspose_AIJ.
13: */
15: #include <../src/mat/impls/aij/seq/aij.h>
17: PetscErrorCode MatGetSymbolicTranspose_SeqAIJ(Mat A,PetscInt *Ati[],PetscInt *Atj[])
18: {
20: PetscInt i,j,anzj;
21: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
22: PetscInt an=A->cmap->N,am=A->rmap->N;
23: PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j;
26: PetscInfo(A,"Getting Symbolic Transpose.\n");
28: /* Set up timers */
29: PetscLogEventBegin(MAT_Getsymtranspose,A,0,0,0);
31: /* Allocate space for symbolic transpose info and work array */
32: PetscCalloc1(an+1,&ati);
33: PetscMalloc1(ai[am],&atj);
34: PetscMalloc1(an,&atfill);
36: /* Walk through aj and count ## of non-zeros in each row of A^T. */
37: /* Note: offset by 1 for fast conversion into csr format. */
38: for (i=0;i<ai[am];i++) {
39: ati[aj[i]+1] += 1;
40: }
41: /* Form ati for csr format of A^T. */
42: for (i=0;i<an;i++) {
43: ati[i+1] += ati[i];
44: }
46: /* Copy ati into atfill so we have locations of the next free space in atj */
47: PetscArraycpy(atfill,ati,an);
49: /* Walk through A row-wise and mark nonzero entries of A^T. */
50: for (i=0; i<am; i++) {
51: anzj = ai[i+1] - ai[i];
52: for (j=0; j<anzj; j++) {
53: atj[atfill[*aj]] = i;
54: atfill[*aj++] += 1;
55: }
56: }
58: /* Clean up temporary space and complete requests. */
59: PetscFree(atfill);
60: *Ati = ati;
61: *Atj = atj;
63: PetscLogEventEnd(MAT_Getsymtranspose,A,0,0,0);
64: return(0);
65: }
66: /*
67: MatGetSymbolicTransposeReduced_SeqAIJ() - Get symbolic matrix structure of submatrix A[rstart:rend,:],
68: modified from MatGetSymbolicTranspose_SeqAIJ()
69: */
70: PetscErrorCode MatGetSymbolicTransposeReduced_SeqAIJ(Mat A,PetscInt rstart,PetscInt rend,PetscInt *Ati[],PetscInt *Atj[])
71: {
73: PetscInt i,j,anzj;
74: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
75: PetscInt an=A->cmap->N;
76: PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j;
79: PetscInfo(A,"Getting Symbolic Transpose\n");
80: PetscLogEventBegin(MAT_Getsymtransreduced,A,0,0,0);
82: /* Allocate space for symbolic transpose info and work array */
83: PetscCalloc1(an+1,&ati);
84: anzj = ai[rend] - ai[rstart];
85: PetscMalloc1(anzj+1,&atj);
86: PetscMalloc1(an+1,&atfill);
88: /* Walk through aj and count ## of non-zeros in each row of A^T. */
89: /* Note: offset by 1 for fast conversion into csr format. */
90: for (i=ai[rstart]; i<ai[rend]; i++) {
91: ati[aj[i]+1] += 1;
92: }
93: /* Form ati for csr format of A^T. */
94: for (i=0;i<an;i++) {
95: ati[i+1] += ati[i];
96: }
98: /* Copy ati into atfill so we have locations of the next free space in atj */
99: PetscArraycpy(atfill,ati,an);
101: /* Walk through A row-wise and mark nonzero entries of A^T. */
102: aj = aj + ai[rstart];
103: for (i=rstart; i<rend; i++) {
104: anzj = ai[i+1] - ai[i];
105: for (j=0; j<anzj; j++) {
106: atj[atfill[*aj]] = i-rstart;
107: atfill[*aj++] += 1;
108: }
109: }
111: /* Clean up temporary space and complete requests. */
112: PetscFree(atfill);
113: *Ati = ati;
114: *Atj = atj;
116: PetscLogEventEnd(MAT_Getsymtransreduced,A,0,0,0);
117: return(0);
118: }
120: PetscErrorCode MatTranspose_SeqAIJ(Mat A,MatReuse reuse,Mat *B)
121: {
122: PetscErrorCode ierr;
123: PetscInt i,j,anzj;
124: Mat At;
125: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*at;
126: PetscInt an=A->cmap->N,am=A->rmap->N;
127: PetscInt *ati,*atj,*atfill,*ai=a->i,*aj=a->j;
128: MatScalar *ata;
129: const MatScalar *aa,*av;
132: MatSeqAIJGetArrayRead(A,&av);
133: aa = av;
134: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
135: /* Allocate space for symbolic transpose info and work array */
136: PetscCalloc1(an+1,&ati);
137: PetscMalloc1(ai[am],&atj);
138: PetscMalloc1(ai[am],&ata);
139: /* Walk through aj and count ## of non-zeros in each row of A^T. */
140: /* Note: offset by 1 for fast conversion into csr format. */
141: for (i=0;i<ai[am];i++) {
142: ati[aj[i]+1] += 1; /* count ## of non-zeros for row aj[i] of A^T */
143: }
144: /* Form ati for csr format of A^T. */
145: for (i=0;i<an;i++) {
146: ati[i+1] += ati[i];
147: }
148: } else { /* This segment is called by MatTranspose_MPIAIJ(...,MAT_INITIAL_MATRIX,..) directly! */
149: Mat_SeqAIJ *sub_B = (Mat_SeqAIJ*) (*B)->data;
150: ati = sub_B->i;
151: atj = sub_B->j;
152: ata = sub_B->a;
153: At = *B;
154: }
156: /* Copy ati into atfill so we have locations of the next free space in atj */
157: PetscMalloc1(an,&atfill);
158: PetscArraycpy(atfill,ati,an);
160: /* Walk through A row-wise and mark nonzero entries of A^T. */
161: for (i=0;i<am;i++) {
162: anzj = ai[i+1] - ai[i];
163: for (j=0;j<anzj;j++) {
164: atj[atfill[*aj]] = i;
165: ata[atfill[*aj]] = *aa++;
166: atfill[*aj++] += 1;
167: }
168: }
169: MatSeqAIJRestoreArrayRead(A,&av);
171: /* Clean up temporary space and complete requests. */
172: PetscFree(atfill);
173: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
174: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),an,am,ati,atj,ata,&At);
175: MatSetBlockSizes(At,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));
177: at = (Mat_SeqAIJ*)(At->data);
178: at->free_a = PETSC_TRUE;
179: at->free_ij = PETSC_TRUE;
180: at->nonew = 0;
181: at->maxnz = ati[an];
183: MatSetType(At,((PetscObject)A)->type_name);
184: }
186: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
187: *B = At;
188: } else {
189: MatHeaderMerge(A,&At);
190: }
191: #if defined(PETSC_HAVE_DEVICE)
192: (*B)->offloadmask = PETSC_OFFLOAD_CPU;
193: #endif
194: return(0);
195: }
197: PetscErrorCode MatRestoreSymbolicTranspose_SeqAIJ(Mat A,PetscInt *ati[],PetscInt *atj[])
198: {
202: PetscInfo(A,"Restoring Symbolic Transpose.\n");
203: PetscFree(*ati);
204: PetscFree(*atj);
205: return(0);
206: }