Actual source code: matimpl.h
2: #ifndef __MATIMPL_H
5: #include <petscmat.h>
6: #include <petscmatcoarsen.h>
7: #include <petsc/private/petscimpl.h>
9: PETSC_EXTERN PetscBool MatRegisterAllCalled;
10: PETSC_EXTERN PetscBool MatSeqAIJRegisterAllCalled;
11: PETSC_EXTERN PetscBool MatOrderingRegisterAllCalled;
12: PETSC_EXTERN PetscBool MatColoringRegisterAllCalled;
13: PETSC_EXTERN PetscBool MatPartitioningRegisterAllCalled;
14: PETSC_EXTERN PetscBool MatCoarsenRegisterAllCalled;
15: PETSC_EXTERN PetscErrorCode MatRegisterAll(void);
16: PETSC_EXTERN PetscErrorCode MatOrderingRegisterAll(void);
17: PETSC_EXTERN PetscErrorCode MatColoringRegisterAll(void);
18: PETSC_EXTERN PetscErrorCode MatPartitioningRegisterAll(void);
19: PETSC_EXTERN PetscErrorCode MatCoarsenRegisterAll(void);
20: PETSC_EXTERN PetscErrorCode MatSeqAIJRegisterAll(void);
22: /*
23: This file defines the parts of the matrix data structure that are
24: shared by all matrix types.
25: */
27: /*
28: If you add entries here also add them to the MATOP enum
29: in include/petscmat.h and src/mat/f90-mod/petscmat.h
30: */
31: typedef struct _MatOps *MatOps;
32: struct _MatOps {
33: /* 0*/
34: PetscErrorCode (*setvalues)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
35: PetscErrorCode (*getrow)(Mat,PetscInt,PetscInt *,PetscInt*[],PetscScalar*[]);
36: PetscErrorCode (*restorerow)(Mat,PetscInt,PetscInt *,PetscInt *[],PetscScalar *[]);
37: PetscErrorCode (*mult)(Mat,Vec,Vec);
38: PetscErrorCode (*multadd)(Mat,Vec,Vec,Vec);
39: /* 5*/
40: PetscErrorCode (*multtranspose)(Mat,Vec,Vec);
41: PetscErrorCode (*multtransposeadd)(Mat,Vec,Vec,Vec);
42: PetscErrorCode (*solve)(Mat,Vec,Vec);
43: PetscErrorCode (*solveadd)(Mat,Vec,Vec,Vec);
44: PetscErrorCode (*solvetranspose)(Mat,Vec,Vec);
45: /*10*/
46: PetscErrorCode (*solvetransposeadd)(Mat,Vec,Vec,Vec);
47: PetscErrorCode (*lufactor)(Mat,IS,IS,const MatFactorInfo*);
48: PetscErrorCode (*choleskyfactor)(Mat,IS,const MatFactorInfo*);
49: PetscErrorCode (*sor)(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
50: PetscErrorCode (*transpose)(Mat,MatReuse,Mat*);
51: /*15*/
52: PetscErrorCode (*getinfo)(Mat,MatInfoType,MatInfo*);
53: PetscErrorCode (*equal)(Mat,Mat,PetscBool*);
54: PetscErrorCode (*getdiagonal)(Mat,Vec);
55: PetscErrorCode (*diagonalscale)(Mat,Vec,Vec);
56: PetscErrorCode (*norm)(Mat,NormType,PetscReal*);
57: /*20*/
58: PetscErrorCode (*assemblybegin)(Mat,MatAssemblyType);
59: PetscErrorCode (*assemblyend)(Mat,MatAssemblyType);
60: PetscErrorCode (*setoption)(Mat,MatOption,PetscBool);
61: PetscErrorCode (*zeroentries)(Mat);
62: /*24*/
63: PetscErrorCode (*zerorows)(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
64: PetscErrorCode (*lufactorsymbolic)(Mat,Mat,IS,IS,const MatFactorInfo*);
65: PetscErrorCode (*lufactornumeric)(Mat,Mat,const MatFactorInfo*);
66: PetscErrorCode (*choleskyfactorsymbolic)(Mat,Mat,IS,const MatFactorInfo*);
67: PetscErrorCode (*choleskyfactornumeric)(Mat,Mat,const MatFactorInfo*);
68: /*29*/
69: PetscErrorCode (*setup)(Mat);
70: PetscErrorCode (*ilufactorsymbolic)(Mat,Mat,IS,IS,const MatFactorInfo*);
71: PetscErrorCode (*iccfactorsymbolic)(Mat,Mat,IS,const MatFactorInfo*);
72: PetscErrorCode (*getdiagonalblock)(Mat,Mat*);
73: PetscErrorCode (*setinf)(Mat);
74: /*34*/
75: PetscErrorCode (*duplicate)(Mat,MatDuplicateOption,Mat*);
76: PetscErrorCode (*forwardsolve)(Mat,Vec,Vec);
77: PetscErrorCode (*backwardsolve)(Mat,Vec,Vec);
78: PetscErrorCode (*ilufactor)(Mat,IS,IS,const MatFactorInfo*);
79: PetscErrorCode (*iccfactor)(Mat,IS,const MatFactorInfo*);
80: /*39*/
81: PetscErrorCode (*axpy)(Mat,PetscScalar,Mat,MatStructure);
82: PetscErrorCode (*createsubmatrices)(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
83: PetscErrorCode (*increaseoverlap)(Mat,PetscInt,IS[],PetscInt);
84: PetscErrorCode (*getvalues)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
85: PetscErrorCode (*copy)(Mat,Mat,MatStructure);
86: /*44*/
87: PetscErrorCode (*getrowmax)(Mat,Vec,PetscInt[]);
88: PetscErrorCode (*scale)(Mat,PetscScalar);
89: PetscErrorCode (*shift)(Mat,PetscScalar);
90: PetscErrorCode (*diagonalset)(Mat,Vec,InsertMode);
91: PetscErrorCode (*zerorowscolumns)(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
92: /*49*/
93: PetscErrorCode (*setrandom)(Mat,PetscRandom);
94: PetscErrorCode (*getrowij)(Mat,PetscInt,PetscBool ,PetscBool ,PetscInt*,const PetscInt *[],const PetscInt *[],PetscBool *);
95: PetscErrorCode (*restorerowij)(Mat,PetscInt,PetscBool ,PetscBool ,PetscInt *,const PetscInt *[],const PetscInt *[],PetscBool *);
96: PetscErrorCode (*getcolumnij)(Mat,PetscInt,PetscBool ,PetscBool ,PetscInt*,const PetscInt *[],const PetscInt *[],PetscBool *);
97: PetscErrorCode (*restorecolumnij)(Mat,PetscInt,PetscBool ,PetscBool ,PetscInt*,const PetscInt *[],const PetscInt *[],PetscBool *);
98: /*54*/
99: PetscErrorCode (*fdcoloringcreate)(Mat,ISColoring,MatFDColoring);
100: PetscErrorCode (*coloringpatch)(Mat,PetscInt,PetscInt,ISColoringValue[],ISColoring*);
101: PetscErrorCode (*setunfactored)(Mat);
102: PetscErrorCode (*permute)(Mat,IS,IS,Mat*);
103: PetscErrorCode (*setvaluesblocked)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
104: /*59*/
105: PetscErrorCode (*createsubmatrix)(Mat,IS,IS,MatReuse,Mat*);
106: PetscErrorCode (*destroy)(Mat);
107: PetscErrorCode (*view)(Mat,PetscViewer);
108: PetscErrorCode (*convertfrom)(Mat,MatType,MatReuse,Mat*);
109: PetscErrorCode (*placeholder_63)(void);
110: /*64*/
111: PetscErrorCode (*matmatmultsymbolic)(Mat,Mat,Mat,PetscReal,Mat);
112: PetscErrorCode (*matmatmultnumeric)(Mat,Mat,Mat,Mat);
113: PetscErrorCode (*setlocaltoglobalmapping)(Mat,ISLocalToGlobalMapping,ISLocalToGlobalMapping);
114: PetscErrorCode (*setvalueslocal)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
115: PetscErrorCode (*zerorowslocal)(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
116: /*69*/
117: PetscErrorCode (*getrowmaxabs)(Mat,Vec,PetscInt[]);
118: PetscErrorCode (*getrowminabs)(Mat,Vec,PetscInt[]);
119: PetscErrorCode (*convert)(Mat, MatType,MatReuse,Mat*);
120: PetscErrorCode (*hasoperation)(Mat,MatOperation,PetscBool*);
121: PetscErrorCode (*placeholder_73)(void);
122: /*74*/
123: PetscErrorCode (*setvaluesadifor)(Mat,PetscInt,void*);
124: PetscErrorCode (*fdcoloringapply)(Mat,MatFDColoring,Vec,void*);
125: PetscErrorCode (*setfromoptions)(PetscOptionItems*,Mat);
126: PetscErrorCode (*multconstrained)(Mat,Vec,Vec);
127: PetscErrorCode (*multtransposeconstrained)(Mat,Vec,Vec);
128: /*79*/
129: PetscErrorCode (*findzerodiagonals)(Mat,IS*);
130: PetscErrorCode (*mults)(Mat,Vecs,Vecs);
131: PetscErrorCode (*solves)(Mat,Vecs,Vecs);
132: PetscErrorCode (*getinertia)(Mat,PetscInt*,PetscInt*,PetscInt*);
133: PetscErrorCode (*load)(Mat,PetscViewer);
134: /*84*/
135: PetscErrorCode (*issymmetric)(Mat,PetscReal,PetscBool*);
136: PetscErrorCode (*ishermitian)(Mat,PetscReal,PetscBool*);
137: PetscErrorCode (*isstructurallysymmetric)(Mat,PetscBool *);
138: PetscErrorCode (*setvaluesblockedlocal)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
139: PetscErrorCode (*getvecs)(Mat,Vec*,Vec*);
140: /*89*/
141: PetscErrorCode (*placeholder_89)(void);
142: PetscErrorCode (*matmultsymbolic)(Mat,Mat,PetscReal,Mat);
143: PetscErrorCode (*matmultnumeric)(Mat,Mat,Mat);
144: PetscErrorCode (*placeholder_92)(void);
145: PetscErrorCode (*ptapsymbolic)(Mat,Mat,PetscReal,Mat); /* double dispatch wrapper routine */
146: /*94*/
147: PetscErrorCode (*ptapnumeric)(Mat,Mat,Mat); /* double dispatch wrapper routine */
148: PetscErrorCode (*placeholder_95)(void);
149: PetscErrorCode (*mattransposemultsymbolic)(Mat,Mat,PetscReal,Mat);
150: PetscErrorCode (*mattransposemultnumeric)(Mat,Mat,Mat);
151: PetscErrorCode (*bindtocpu)(Mat,PetscBool);
152: /*99*/
153: PetscErrorCode (*productsetfromoptions)(Mat);
154: PetscErrorCode (*productsymbolic)(Mat);
155: PetscErrorCode (*productnumeric)(Mat);
156: PetscErrorCode (*conjugate)(Mat); /* complex conjugate */
157: PetscErrorCode (*viewnative)(Mat,PetscViewer);
158: /*104*/
159: PetscErrorCode (*setvaluesrow)(Mat,PetscInt,const PetscScalar[]);
160: PetscErrorCode (*realpart)(Mat);
161: PetscErrorCode (*imaginarypart)(Mat);
162: PetscErrorCode (*getrowuppertriangular)(Mat);
163: PetscErrorCode (*restorerowuppertriangular)(Mat);
164: /*109*/
165: PetscErrorCode (*matsolve)(Mat,Mat,Mat);
166: PetscErrorCode (*matsolvetranspose)(Mat,Mat,Mat);
167: PetscErrorCode (*getrowmin)(Mat,Vec,PetscInt[]);
168: PetscErrorCode (*getcolumnvector)(Mat,Vec,PetscInt);
169: PetscErrorCode (*missingdiagonal)(Mat,PetscBool *,PetscInt*);
170: /*114*/
171: PetscErrorCode (*getseqnonzerostructure)(Mat,Mat *);
172: PetscErrorCode (*create)(Mat);
173: PetscErrorCode (*getghosts)(Mat,PetscInt*,const PetscInt *[]);
174: PetscErrorCode (*getlocalsubmatrix)(Mat,IS,IS,Mat*);
175: PetscErrorCode (*restorelocalsubmatrix)(Mat,IS,IS,Mat*);
176: /*119*/
177: PetscErrorCode (*multdiagonalblock)(Mat,Vec,Vec);
178: PetscErrorCode (*hermitiantranspose)(Mat,MatReuse,Mat*);
179: PetscErrorCode (*multhermitiantranspose)(Mat,Vec,Vec);
180: PetscErrorCode (*multhermitiantransposeadd)(Mat,Vec,Vec,Vec);
181: PetscErrorCode (*getmultiprocblock)(Mat,MPI_Comm,MatReuse,Mat*);
182: /*124*/
183: PetscErrorCode (*findnonzerorows)(Mat,IS*);
184: PetscErrorCode (*getcolumnreductions)(Mat,PetscInt,PetscReal*);
185: PetscErrorCode (*invertblockdiagonal)(Mat,const PetscScalar**);
186: PetscErrorCode (*invertvariableblockdiagonal)(Mat,PetscInt,const PetscInt*,PetscScalar*);
187: PetscErrorCode (*createsubmatricesmpi)(Mat,PetscInt,const IS[], const IS[], MatReuse, Mat**);
188: /*129*/
189: PetscErrorCode (*setvaluesbatch)(Mat,PetscInt,PetscInt,PetscInt*,const PetscScalar*);
190: PetscErrorCode (*placeholder_130)(void);
191: PetscErrorCode (*transposematmultsymbolic)(Mat,Mat,PetscReal,Mat);
192: PetscErrorCode (*transposematmultnumeric)(Mat,Mat,Mat);
193: PetscErrorCode (*transposecoloringcreate)(Mat,ISColoring,MatTransposeColoring);
194: /*134*/
195: PetscErrorCode (*transcoloringapplysptoden)(MatTransposeColoring,Mat,Mat);
196: PetscErrorCode (*transcoloringapplydentosp)(MatTransposeColoring,Mat,Mat);
197: PetscErrorCode (*placeholder_136)(void);
198: PetscErrorCode (*rartsymbolic)(Mat,Mat,PetscReal,Mat); /* double dispatch wrapper routine */
199: PetscErrorCode (*rartnumeric)(Mat,Mat,Mat); /* double dispatch wrapper routine */
200: /*139*/
201: PetscErrorCode (*setblocksizes)(Mat,PetscInt,PetscInt);
202: PetscErrorCode (*aypx)(Mat,PetscScalar,Mat,MatStructure);
203: PetscErrorCode (*residual)(Mat,Vec,Vec,Vec);
204: PetscErrorCode (*fdcoloringsetup)(Mat,ISColoring,MatFDColoring);
205: PetscErrorCode (*findoffblockdiagonalentries)(Mat,IS*);
206: PetscErrorCode (*creatempimatconcatenateseqmat)(MPI_Comm,Mat,PetscInt,MatReuse,Mat*);
207: /*145*/
208: PetscErrorCode (*destroysubmatrices)(PetscInt,Mat*[]);
209: PetscErrorCode (*mattransposesolve)(Mat,Mat,Mat);
210: PetscErrorCode (*getvalueslocal)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar[]);
211: };
212: /*
213: If you add MatOps entries above also add them to the MATOP enum
214: in include/petscmat.h and src/mat/f90-mod/petscmat.h
215: */
217: #include <petscsys.h>
218: PETSC_EXTERN PetscErrorCode MatRegisterOp(MPI_Comm, const char[], PetscVoidFunction, const char[], PetscInt, ...);
219: PETSC_EXTERN PetscErrorCode MatQueryOp(MPI_Comm, PetscVoidFunction*, const char[], PetscInt, ...);
221: typedef struct _p_MatRootName* MatRootName;
222: struct _p_MatRootName {
223: char *rname,*sname,*mname;
224: MatRootName next;
225: };
227: PETSC_EXTERN MatRootName MatRootNameList;
229: /*
230: Utility private matrix routines
231: */
232: PETSC_INTERN PetscErrorCode MatFindNonzeroRowsOrCols_Basic(Mat,PetscBool,PetscReal,IS*);
233: PETSC_INTERN PetscErrorCode MatConvert_Basic(Mat,MatType,MatReuse,Mat*);
234: PETSC_INTERN PetscErrorCode MatConvert_Shell(Mat,MatType,MatReuse,Mat*);
235: PETSC_INTERN PetscErrorCode MatConvertFrom_Shell(Mat,MatType,MatReuse,Mat*);
236: PETSC_INTERN PetscErrorCode MatCopy_Basic(Mat,Mat,MatStructure);
237: PETSC_INTERN PetscErrorCode MatDiagonalSet_Default(Mat,Vec,InsertMode);
238: #if defined(PETSC_HAVE_SCALAPACK)
239: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
240: #endif
241: PETSC_INTERN PetscErrorCode MatSetPreallocationCOO_Basic(Mat,PetscInt,const PetscInt[],const PetscInt[]);
242: PETSC_INTERN PetscErrorCode MatSetValuesCOO_Basic(Mat,const PetscScalar[],InsertMode);
244: /* these callbacks rely on the old matrix function pointers for
245: matmat operations. They are unsafe, and should be removed.
246: However, the amount of work needed to clean up all the
247: implementations is not negligible */
248: PETSC_INTERN PetscErrorCode MatProductSymbolic_AB(Mat);
249: PETSC_INTERN PetscErrorCode MatProductNumeric_AB(Mat);
250: PETSC_INTERN PetscErrorCode MatProductSymbolic_AtB(Mat);
251: PETSC_INTERN PetscErrorCode MatProductNumeric_AtB(Mat);
252: PETSC_INTERN PetscErrorCode MatProductSymbolic_ABt(Mat);
253: PETSC_INTERN PetscErrorCode MatProductNumeric_ABt(Mat);
254: PETSC_INTERN PetscErrorCode MatProductNumeric_PtAP(Mat);
255: PETSC_INTERN PetscErrorCode MatProductNumeric_RARt(Mat);
256: PETSC_INTERN PetscErrorCode MatProductSymbolic_ABC(Mat);
257: PETSC_INTERN PetscErrorCode MatProductNumeric_ABC(Mat);
259: PETSC_INTERN PetscErrorCode MatProductCreate_Private(Mat,Mat,Mat,Mat);
260: /* this callback handles all the different triple products and
261: does not rely on the function pointers; used by cuSPARSE and KOKKOS-KERNELS */
262: PETSC_INTERN PetscErrorCode MatProductSymbolic_ABC_Basic(Mat);
264: #if !defined(PETSC_CLANG_STATIC_ANALYZER)
265: #if defined(PETSC_USE_DEBUG)
266: # define MatCheckPreallocated(A,arg) do { \
267: if (PetscUnlikely(!(A)->preallocated)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatXXXSetPreallocation(), MatSetUp() or the matrix has not yet been factored on argument %D \"%s\" before %s()",(arg),#A,PETSC_FUNCTION_NAME); \
268: } while (0)
269: #else
270: # define MatCheckPreallocated(A,arg) do {} while (0)
271: #endif
273: #if defined(PETSC_USE_DEBUG)
274: # define MatCheckProduct(A,arg) do { \
275: if (PetscUnlikely(!(A)->product)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Argument %D \"%s\" is not a matrix obtained from MatProductCreate()",(arg),#A); \
276: } while (0)
277: #else
278: # define MatCheckProduct(A,arg) do {} while (0)
279: #endif
280: #else /* PETSC_CLANG_STATIC_ANALYZER */
281: template <typename Tm>
282: void MatCheckPreallocated(Tm,int);
283: template <typename Tm>
284: void MatCheckProduct(Tm,int);
285: #endif /* PETSC_CLANG_STATIC_ANALYZER */
287: /*
288: The stash is used to temporarily store inserted matrix values that
289: belong to another processor. During the assembly phase the stashed
290: values are moved to the correct processor and
291: */
293: typedef struct _MatStashSpace *PetscMatStashSpace;
295: struct _MatStashSpace {
296: PetscMatStashSpace next;
297: PetscScalar *space_head,*val;
298: PetscInt *idx,*idy;
299: PetscInt total_space_size;
300: PetscInt local_used;
301: PetscInt local_remaining;
302: };
304: PETSC_EXTERN PetscErrorCode PetscMatStashSpaceGet(PetscInt,PetscInt,PetscMatStashSpace *);
305: PETSC_EXTERN PetscErrorCode PetscMatStashSpaceContiguous(PetscInt,PetscMatStashSpace *,PetscScalar *,PetscInt *,PetscInt *);
306: PETSC_EXTERN PetscErrorCode PetscMatStashSpaceDestroy(PetscMatStashSpace*);
308: typedef struct {
309: PetscInt count;
310: } MatStashHeader;
312: typedef struct {
313: void *buffer; /* Of type blocktype, dynamically constructed */
314: PetscInt count;
315: char pending;
316: } MatStashFrame;
318: typedef struct _MatStash MatStash;
319: struct _MatStash {
320: PetscInt nmax; /* maximum stash size */
321: PetscInt umax; /* user specified max-size */
322: PetscInt oldnmax; /* the nmax value used previously */
323: PetscInt n; /* stash size */
324: PetscInt bs; /* block size of the stash */
325: PetscInt reallocs; /* preserve the no of mallocs invoked */
326: PetscMatStashSpace space_head,space; /* linked list to hold stashed global row/column numbers and matrix values */
328: PetscErrorCode (*ScatterBegin)(Mat,MatStash*,PetscInt*);
329: PetscErrorCode (*ScatterGetMesg)(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
330: PetscErrorCode (*ScatterEnd)(MatStash*);
331: PetscErrorCode (*ScatterDestroy)(MatStash*);
333: /* The following variables are used for communication */
334: MPI_Comm comm;
335: PetscMPIInt size,rank;
336: PetscMPIInt tag1,tag2;
337: MPI_Request *send_waits; /* array of send requests */
338: MPI_Request *recv_waits; /* array of receive requests */
339: MPI_Status *send_status; /* array of send status */
340: PetscInt nsends,nrecvs; /* numbers of sends and receives */
341: PetscScalar *svalues; /* sending data */
342: PetscInt *sindices;
343: PetscScalar **rvalues; /* receiving data (values) */
344: PetscInt **rindices; /* receiving data (indices) */
345: PetscInt nprocessed; /* number of messages already processed */
346: PetscMPIInt *flg_v; /* indicates what messages have arrived so far and from whom */
347: PetscBool reproduce;
348: PetscInt reproduce_count;
350: /* The following variables are used for BTS communication */
351: PetscBool first_assembly_done; /* Is the first time matrix assembly done? */
352: PetscBool use_status; /* Use MPI_Status to determine number of items in each message */
353: PetscMPIInt nsendranks;
354: PetscMPIInt nrecvranks;
355: PetscMPIInt *sendranks;
356: PetscMPIInt *recvranks;
357: MatStashHeader *sendhdr,*recvhdr;
358: MatStashFrame *sendframes; /* pointers to the main messages */
359: MatStashFrame *recvframes;
360: MatStashFrame *recvframe_active;
361: PetscInt recvframe_i; /* index of block within active frame */
362: PetscMPIInt recvframe_count; /* Count actually sent for current frame */
363: PetscInt recvcount; /* Number of receives processed so far */
364: PetscMPIInt *some_indices; /* From last call to MPI_Waitsome */
365: MPI_Status *some_statuses; /* Statuses from last call to MPI_Waitsome */
366: PetscMPIInt some_count; /* Number of requests completed in last call to MPI_Waitsome */
367: PetscMPIInt some_i; /* Index of request currently being processed */
368: MPI_Request *sendreqs;
369: MPI_Request *recvreqs;
370: PetscSegBuffer segsendblocks;
371: PetscSegBuffer segrecvframe;
372: PetscSegBuffer segrecvblocks;
373: MPI_Datatype blocktype;
374: size_t blocktype_size;
375: InsertMode *insertmode; /* Pointer to check mat->insertmode and set upon message arrival in case no local values have been set. */
376: };
378: #if !defined(PETSC_HAVE_MPIUNI)
379: PETSC_INTERN PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);
380: #endif
381: PETSC_INTERN PetscErrorCode MatStashCreate_Private(MPI_Comm,PetscInt,MatStash*);
382: PETSC_INTERN PetscErrorCode MatStashDestroy_Private(MatStash*);
383: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Private(MatStash*);
384: PETSC_INTERN PetscErrorCode MatStashSetInitialSize_Private(MatStash*,PetscInt);
385: PETSC_INTERN PetscErrorCode MatStashGetInfo_Private(MatStash*,PetscInt*,PetscInt*);
386: PETSC_INTERN PetscErrorCode MatStashValuesRow_Private(MatStash*,PetscInt,PetscInt,const PetscInt[],const PetscScalar[],PetscBool);
387: PETSC_INTERN PetscErrorCode MatStashValuesCol_Private(MatStash*,PetscInt,PetscInt,const PetscInt[],const PetscScalar[],PetscInt,PetscBool);
388: PETSC_INTERN PetscErrorCode MatStashValuesRowBlocked_Private(MatStash*,PetscInt,PetscInt,const PetscInt[],const PetscScalar[],PetscInt,PetscInt,PetscInt);
389: PETSC_INTERN PetscErrorCode MatStashValuesColBlocked_Private(MatStash*,PetscInt,PetscInt,const PetscInt[],const PetscScalar[],PetscInt,PetscInt,PetscInt);
390: PETSC_INTERN PetscErrorCode MatStashScatterBegin_Private(Mat,MatStash*,PetscInt*);
391: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Private(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
392: PETSC_INTERN PetscErrorCode MatGetInfo_External(Mat,MatInfoType,MatInfo*);
394: typedef struct {
395: PetscInt dim;
396: PetscInt dims[4];
397: PetscInt starts[4];
398: PetscBool noc; /* this is a single component problem, hence user will not set MatStencil.c */
399: } MatStencilInfo;
401: /* Info about using compressed row format */
402: typedef struct {
403: PetscBool use; /* indicates compressed rows have been checked and will be used */
404: PetscInt nrows; /* number of non-zero rows */
405: PetscInt *i; /* compressed row pointer */
406: PetscInt *rindex; /* compressed row index */
407: } Mat_CompressedRow;
408: PETSC_EXTERN PetscErrorCode MatCheckCompressedRow(Mat,PetscInt,Mat_CompressedRow*,PetscInt*,PetscInt,PetscReal);
410: typedef struct { /* used by MatCreateRedundantMatrix() for reusing matredundant */
411: PetscInt nzlocal,nsends,nrecvs;
412: PetscMPIInt *send_rank,*recv_rank;
413: PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
414: PetscScalar *sbuf_a,**rbuf_a;
415: MPI_Comm subcomm; /* when user does not provide a subcomm */
416: IS isrow,iscol;
417: Mat *matseq;
418: } Mat_Redundant;
420: typedef struct { /* used by MatProduct() */
421: MatProductType type;
422: char *alg;
423: Mat A,B,C,Dwork;
424: PetscReal fill;
425: PetscBool api_user; /* used to distinguish command line options and to indicate the matrix values are ready to be consumed at symbolic phase if needed */
427: /* Some products may display the information on the algorithm used */
428: PetscErrorCode (*view)(Mat,PetscViewer);
430: /* many products have intermediate data structures, each specific to Mat types and product type */
431: PetscBool clear; /* whether or not to clear the data structures after MatProductNumeric has been called */
432: void *data; /* where to stash those structures */
433: PetscErrorCode (*destroy)(void*); /* destroy routine */
434: } Mat_Product;
436: struct _p_Mat {
437: PETSCHEADER(struct _MatOps);
438: PetscLayout rmap,cmap;
439: void *data; /* implementation-specific data */
440: MatFactorType factortype; /* MAT_FACTOR_LU, ILU, CHOLESKY or ICC */
441: PetscBool trivialsymbolic; /* indicates the symbolic factorization doesn't actually do a symbolic factorization, it is delayed to the numeric factorization */
442: PetscBool canuseordering; /* factorization can use ordering provide to routine (most PETSc implementations) */
443: MatOrderingType preferredordering[MAT_FACTOR_NUM_TYPES] ;/* what is the preferred (or default) ordering for the matrix solver type */
444: PetscBool assembled; /* is the matrix assembled? */
445: PetscBool was_assembled; /* new values inserted into assembled mat */
446: PetscInt num_ass; /* number of times matrix has been assembled */
447: PetscObjectState nonzerostate; /* each time new nonzeros locations are introduced into the matrix this is updated */
448: PetscObjectState ass_nonzerostate; /* nonzero state at last assembly */
449: MatInfo info; /* matrix information */
450: InsertMode insertmode; /* have values been inserted in matrix or added? */
451: MatStash stash,bstash; /* used for assembling off-proc mat emements */
452: MatNullSpace nullsp; /* null space (operator is singular) */
453: MatNullSpace transnullsp; /* null space of transpose of operator */
454: MatNullSpace nearnullsp; /* near null space to be used by multigrid methods */
455: PetscInt congruentlayouts; /* are the rows and columns layouts congruent? */
456: PetscBool preallocated;
457: MatStencilInfo stencil; /* information for structured grid */
458: PetscBool symmetric,hermitian,structurally_symmetric,spd;
459: PetscBool symmetric_set,hermitian_set,structurally_symmetric_set,spd_set; /* if true, then corresponding flag is correct*/
460: PetscBool symmetric_eternal;
461: PetscBool nooffprocentries,nooffproczerorows;
462: PetscBool assembly_subset; /* set by MAT_SUBSET_OFF_PROC_ENTRIES */
463: PetscBool submat_singleis; /* for efficient PCSetUp_ASM() */
464: PetscBool structure_only;
465: PetscBool sortedfull; /* full, sorted rows are inserted */
466: PetscBool force_diagonals; /* set by MAT_FORCE_DIAGONAL_ENTRIES */
467: #if defined(PETSC_HAVE_DEVICE)
468: PetscOffloadMask offloadmask; /* a mask which indicates where the valid matrix data is (GPU, CPU or both) */
469: PetscBool boundtocpu;
470: #endif
471: void *spptr; /* pointer for special library like SuperLU */
472: char *solvertype;
473: PetscBool checksymmetryonassembly,checknullspaceonassembly;
474: PetscReal checksymmetrytol;
475: Mat schur; /* Schur complement matrix */
476: MatFactorSchurStatus schur_status; /* status of the Schur complement matrix */
477: Mat_Redundant *redundant; /* used by MatCreateRedundantMatrix() */
478: PetscBool erroriffailure; /* Generate an error if detected (for example a zero pivot) instead of returning */
479: MatFactorError factorerrortype; /* type of error in factorization */
480: PetscReal factorerror_zeropivot_value; /* If numerical zero pivot was detected this is the computed value */
481: PetscInt factorerror_zeropivot_row; /* Row where zero pivot was detected */
482: PetscInt nblocks,*bsizes; /* support for MatSetVariableBlockSizes() */
483: char *defaultvectype;
484: Mat_Product *product;
485: PetscBool form_explicit_transpose; /* hint to generate an explicit mat tranpsose for operations like MatMultTranspose() */
486: PetscBool transupdated; /* whether or not the explicitly generated transpose is up-to-date */
487: };
489: PETSC_INTERN PetscErrorCode MatAXPY_Basic(Mat,PetscScalar,Mat,MatStructure);
490: PETSC_INTERN PetscErrorCode MatAXPY_BasicWithPreallocation(Mat,Mat,PetscScalar,Mat,MatStructure);
491: PETSC_INTERN PetscErrorCode MatAXPY_Basic_Preallocate(Mat,Mat,Mat*);
492: PETSC_INTERN PetscErrorCode MatAXPY_Dense_Nest(Mat,PetscScalar,Mat);
494: /*
495: Utility for MatFactor (Schur complement)
496: */
497: PETSC_INTERN PetscErrorCode MatFactorFactorizeSchurComplement_Private(Mat);
498: PETSC_INTERN PetscErrorCode MatFactorInvertSchurComplement_Private(Mat);
499: PETSC_INTERN PetscErrorCode MatFactorUpdateSchurStatus_Private(Mat);
500: PETSC_INTERN PetscErrorCode MatFactorSetUpInPlaceSchur_Private(Mat);
502: /*
503: Utility for MatZeroRows
504: */
505: PETSC_INTERN PetscErrorCode MatZeroRowsMapLocal_Private(Mat,PetscInt,const PetscInt*,PetscInt*,PetscInt**);
507: /*
508: Utility for MatView/MatLoad
509: */
510: PETSC_INTERN PetscErrorCode MatView_Binary_BlockSizes(Mat,PetscViewer);
511: PETSC_INTERN PetscErrorCode MatLoad_Binary_BlockSizes(Mat,PetscViewer);
513: /*
514: Object for partitioning graphs
515: */
517: typedef struct _MatPartitioningOps *MatPartitioningOps;
518: struct _MatPartitioningOps {
519: PetscErrorCode (*apply)(MatPartitioning,IS*);
520: PetscErrorCode (*applynd)(MatPartitioning,IS*);
521: PetscErrorCode (*setfromoptions)(PetscOptionItems*,MatPartitioning);
522: PetscErrorCode (*destroy)(MatPartitioning);
523: PetscErrorCode (*view)(MatPartitioning,PetscViewer);
524: PetscErrorCode (*improve)(MatPartitioning,IS*);
525: };
527: struct _p_MatPartitioning {
528: PETSCHEADER(struct _MatPartitioningOps);
529: Mat adj;
530: PetscInt *vertex_weights;
531: PetscReal *part_weights;
532: PetscInt n; /* number of partitions */
533: void *data;
534: PetscInt setupcalled;
535: PetscBool use_edge_weights; /* A flag indicates whether or not to use edge weights */
536: };
538: /* needed for parallel nested dissection by ParMetis and PTSCOTCH */
539: PETSC_INTERN PetscErrorCode MatPartitioningSizesToSep_Private(PetscInt,PetscInt[],PetscInt[],PetscInt[]);
541: /*
542: Object for coarsen graphs
543: */
544: typedef struct _MatCoarsenOps *MatCoarsenOps;
545: struct _MatCoarsenOps {
546: PetscErrorCode (*apply)(MatCoarsen);
547: PetscErrorCode (*setfromoptions)(PetscOptionItems*,MatCoarsen);
548: PetscErrorCode (*destroy)(MatCoarsen);
549: PetscErrorCode (*view)(MatCoarsen,PetscViewer);
550: };
552: struct _p_MatCoarsen {
553: PETSCHEADER(struct _MatCoarsenOps);
554: Mat graph;
555: void *subctx;
556: /* */
557: PetscBool strict_aggs;
558: IS perm;
559: PetscCoarsenData *agg_lists;
560: };
562: /*
563: MatFDColoring is used to compute Jacobian matrices efficiently
564: via coloring. The data structure is explained below in an example.
566: Color = 0 1 0 2 | 2 3 0
567: ---------------------------------------------------
568: 00 01 | 05
569: 10 11 | 14 15 Processor 0
570: 22 23 | 25
571: 32 33 |
572: ===================================================
573: | 44 45 46
574: 50 | 55 Processor 1
575: | 64 66
576: ---------------------------------------------------
578: ncolors = 4;
580: ncolumns = {2,1,1,0}
581: columns = {{0,2},{1},{3},{}}
582: nrows = {4,2,3,3}
583: rows = {{0,1,2,3},{0,1},{1,2,3},{0,1,2}}
584: vwscale = {dx(0),dx(1),dx(2),dx(3)} MPI Vec
585: vscale = {dx(0),dx(1),dx(2),dx(3),dx(4),dx(5)} Seq Vec
587: ncolumns = {1,0,1,1}
588: columns = {{6},{},{4},{5}}
589: nrows = {3,0,2,2}
590: rows = {{0,1,2},{},{1,2},{1,2}}
591: vwscale = {dx(4),dx(5),dx(6)} MPI Vec
592: vscale = {dx(0),dx(4),dx(5),dx(6)} Seq Vec
594: See the routine MatFDColoringApply() for how this data is used
595: to compute the Jacobian.
597: */
598: typedef struct {
599: PetscInt row;
600: PetscInt col;
601: PetscScalar *valaddr; /* address of value */
602: } MatEntry;
604: typedef struct {
605: PetscInt row;
606: PetscScalar *valaddr; /* address of value */
607: } MatEntry2;
609: struct _p_MatFDColoring{
610: PETSCHEADER(int);
611: PetscInt M,N,m; /* total rows, columns; local rows */
612: PetscInt rstart; /* first row owned by local processor */
613: PetscInt ncolors; /* number of colors */
614: PetscInt *ncolumns; /* number of local columns for a color */
615: PetscInt **columns; /* lists the local columns of each color (using global column numbering) */
616: IS *isa; /* these are the IS that contain the column values given in columns */
617: PetscInt *nrows; /* number of local rows for each color */
618: MatEntry *matentry; /* holds (row, column, address of value) for Jacobian matrix entry */
619: MatEntry2 *matentry2; /* holds (row, address of value) for Jacobian matrix entry */
620: PetscScalar *dy; /* store a block of F(x+dx)-F(x) when J is in BAIJ format */
621: PetscReal error_rel; /* square root of relative error in computing function */
622: PetscReal umin; /* minimum allowable u'dx value */
623: Vec w1,w2,w3; /* work vectors used in computing Jacobian */
624: PetscBool fset; /* indicates that the initial function value F(X) is set */
625: PetscErrorCode (*f)(void); /* function that defines Jacobian */
626: void *fctx; /* optional user-defined context for use by the function f */
627: Vec vscale; /* holds FD scaling, i.e. 1/dx for each perturbed column */
628: PetscInt currentcolor; /* color for which function evaluation is being done now */
629: const char *htype; /* "wp" or "ds" */
630: ISColoringType ctype; /* IS_COLORING_GLOBAL or IS_COLORING_LOCAL */
631: PetscInt brows,bcols; /* number of block rows or columns for speedup inserting the dense matrix into sparse Jacobian */
632: PetscBool setupcalled; /* true if setup has been called */
633: PetscBool viewed; /* true if the -mat_fd_coloring_view has been triggered already */
634: void (*ftn_func_pointer)(void),*ftn_func_cntx; /* serve the same purpose as *fortran_func_pointers in PETSc objects */
635: PetscObjectId matid; /* matrix this object was created with, must always be the same */
636: };
638: typedef struct _MatColoringOps *MatColoringOps;
639: struct _MatColoringOps {
640: PetscErrorCode (*destroy)(MatColoring);
641: PetscErrorCode (*setfromoptions)(PetscOptionItems*,MatColoring);
642: PetscErrorCode (*view)(MatColoring,PetscViewer);
643: PetscErrorCode (*apply)(MatColoring,ISColoring*);
644: PetscErrorCode (*weights)(MatColoring,PetscReal**,PetscInt**);
645: };
647: struct _p_MatColoring {
648: PETSCHEADER(struct _MatColoringOps);
649: Mat mat;
650: PetscInt dist; /* distance of the coloring */
651: PetscInt maxcolors; /* the maximum number of colors returned, maxcolors=1 for MIS */
652: void *data; /* inner context */
653: PetscBool valid; /* check to see if what is produced is a valid coloring */
654: MatColoringWeightType weight_type; /* type of weight computation to be performed */
655: PetscReal *user_weights; /* custom weights and permutation */
656: PetscInt *user_lperm;
657: PetscBool valid_iscoloring; /* check to see if matcoloring is produced a valid iscoloring */
658: };
660: struct _p_MatTransposeColoring{
661: PETSCHEADER(int);
662: PetscInt M,N,m; /* total rows, columns; local rows */
663: PetscInt rstart; /* first row owned by local processor */
664: PetscInt ncolors; /* number of colors */
665: PetscInt *ncolumns; /* number of local columns for a color */
666: PetscInt *nrows; /* number of local rows for each color */
667: PetscInt currentcolor; /* color for which function evaluation is being done now */
668: ISColoringType ctype; /* IS_COLORING_GLOBAL or IS_COLORING_LOCAL */
670: PetscInt *colorforrow,*colorforcol; /* pointer to rows and columns */
671: PetscInt *rows; /* lists the local rows for each color (using the local row numbering) */
672: PetscInt *den2sp; /* maps (row,color) in the dense matrix to index of sparse matrix array a->a */
673: PetscInt *columns; /* lists the local columns of each color (using global column numbering) */
674: PetscInt brows; /* number of rows for efficient implementation of MatTransColoringApplyDenToSp() */
675: PetscInt *lstart; /* array used for loop over row blocks of Csparse */
676: };
678: /*
679: Null space context for preconditioner/operators
680: */
681: struct _p_MatNullSpace {
682: PETSCHEADER(int);
683: PetscBool has_cnst;
684: PetscInt n;
685: Vec* vecs;
686: PetscScalar* alpha; /* for projections */
687: PetscErrorCode (*remove)(MatNullSpace,Vec,void*); /* for user provided removal function */
688: void* rmctx; /* context for remove() function */
689: };
691: /*
692: Checking zero pivot for LU, ILU preconditioners.
693: */
694: typedef struct {
695: PetscInt nshift,nshift_max;
696: PetscReal shift_amount,shift_lo,shift_hi,shift_top,shift_fraction;
697: PetscBool newshift;
698: PetscReal rs; /* active row sum of abs(offdiagonals) */
699: PetscScalar pv; /* pivot of the active row */
700: } FactorShiftCtx;
702: /*
703: Used by MatCreateSubMatrices_MPIXAIJ_Local()
704: */
705: #include <petscctable.h>
706: typedef struct { /* used by MatCreateSubMatrices_MPIAIJ_SingleIS_Local() and MatCreateSubMatrices_MPIAIJ_Local */
707: PetscInt id; /* index of submats, only submats[0] is responsible for deleting some arrays below */
708: PetscInt nrqs,nrqr;
709: PetscInt **rbuf1,**rbuf2,**rbuf3,**sbuf1,**sbuf2;
710: PetscInt **ptr;
711: PetscInt *tmp;
712: PetscInt *ctr;
713: PetscInt *pa; /* proc array */
714: PetscInt *req_size,*req_source1,*req_source2;
715: PetscBool allcolumns,allrows;
716: PetscBool singleis;
717: PetscInt *row2proc; /* row to proc map */
718: PetscInt nstages;
719: #if defined(PETSC_USE_CTABLE)
720: PetscTable cmap,rmap;
721: PetscInt *cmap_loc,*rmap_loc;
722: #else
723: PetscInt *cmap,*rmap;
724: #endif
726: PetscErrorCode (*destroy)(Mat);
727: } Mat_SubSppt;
729: PETSC_EXTERN PetscErrorCode MatFactorDumpMatrix(Mat);
730: PETSC_INTERN PetscErrorCode MatShift_Basic(Mat,PetscScalar);
731: PETSC_INTERN PetscErrorCode MatSetBlockSizes_Default(Mat,PetscInt,PetscInt);
733: PETSC_STATIC_INLINE PetscErrorCode MatPivotCheck_nz(Mat mat,const MatFactorInfo *info,FactorShiftCtx *sctx,PetscInt row)
734: {
735: PetscReal _rs = sctx->rs;
736: PetscReal _zero = info->zeropivot*_rs;
739: if (PetscAbsScalar(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
740: /* force |diag| > zeropivot*rs */
741: if (!sctx->nshift) sctx->shift_amount = info->shiftamount;
742: else sctx->shift_amount *= 2.0;
743: sctx->newshift = PETSC_TRUE;
744: (sctx->nshift)++;
745: } else {
746: sctx->newshift = PETSC_FALSE;
747: }
748: return(0);
749: }
751: PETSC_STATIC_INLINE PetscErrorCode MatPivotCheck_pd(Mat mat,const MatFactorInfo *info,FactorShiftCtx *sctx,PetscInt row)
752: {
753: PetscReal _rs = sctx->rs;
754: PetscReal _zero = info->zeropivot*_rs;
757: if (PetscRealPart(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
758: /* force matfactor to be diagonally dominant */
759: if (sctx->nshift == sctx->nshift_max) {
760: sctx->shift_fraction = sctx->shift_hi;
761: } else {
762: sctx->shift_lo = sctx->shift_fraction;
763: sctx->shift_fraction = (sctx->shift_hi+sctx->shift_lo)/2.;
764: }
765: sctx->shift_amount = sctx->shift_fraction * sctx->shift_top;
766: sctx->nshift++;
767: sctx->newshift = PETSC_TRUE;
768: } else {
769: sctx->newshift = PETSC_FALSE;
770: }
771: return(0);
772: }
774: PETSC_STATIC_INLINE PetscErrorCode MatPivotCheck_inblocks(Mat mat,const MatFactorInfo *info,FactorShiftCtx *sctx,PetscInt row)
775: {
776: PetscReal _zero = info->zeropivot;
779: if (PetscAbsScalar(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
780: sctx->pv += info->shiftamount;
781: sctx->shift_amount = 0.0;
782: sctx->nshift++;
783: }
784: sctx->newshift = PETSC_FALSE;
785: return(0);
786: }
788: PETSC_STATIC_INLINE PetscErrorCode MatPivotCheck_none(Mat fact,Mat mat,const MatFactorInfo *info,FactorShiftCtx *sctx,PetscInt row)
789: {
790: PetscReal _zero = info->zeropivot;
794: sctx->newshift = PETSC_FALSE;
795: if (PetscAbsScalar(sctx->pv) <= _zero && !PetscIsNanScalar(sctx->pv)) {
796: if (!mat->erroriffailure) {
797: PetscInfo3(mat,"Detected zero pivot in factorization in row %D value %g tolerance %g\n",row,(double)PetscAbsScalar(sctx->pv),(double)_zero);
798: fact->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
799: fact->factorerror_zeropivot_value = PetscAbsScalar(sctx->pv);
800: fact->factorerror_zeropivot_row = row;
801: } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %g tolerance %g\n",row,(double)PetscAbsScalar(sctx->pv),(double)_zero);
802: }
803: return(0);
804: }
806: PETSC_STATIC_INLINE PetscErrorCode MatPivotCheck(Mat fact,Mat mat,const MatFactorInfo *info,FactorShiftCtx *sctx,PetscInt row)
807: {
811: if (info->shifttype == (PetscReal) MAT_SHIFT_NONZERO) {
812: MatPivotCheck_nz(mat,info,sctx,row);
813: } else if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) {
814: MatPivotCheck_pd(mat,info,sctx,row);
815: } else if (info->shifttype == (PetscReal) MAT_SHIFT_INBLOCKS) {
816: MatPivotCheck_inblocks(mat,info,sctx,row);
817: } else {
818: MatPivotCheck_none(fact,mat,info,sctx,row);
819: }
820: return(0);
821: }
823: /*
824: Create and initialize a linked list
825: Input Parameters:
826: idx_start - starting index of the list
827: lnk_max - max value of lnk indicating the end of the list
828: nlnk - max length of the list
829: Output Parameters:
830: lnk - list initialized
831: bt - PetscBT (bitarray) with all bits set to false
832: lnk_empty - flg indicating the list is empty
833: */
834: #define PetscLLCreate(idx_start,lnk_max,nlnk,lnk,bt) \
835: (PetscMalloc1(nlnk,&lnk) || PetscBTCreate(nlnk,&(bt)) || (lnk[idx_start] = lnk_max,0))
837: #define PetscLLCreate_new(idx_start,lnk_max,nlnk,lnk,bt,lnk_empty)\
838: (PetscMalloc1(nlnk,&lnk) || PetscBTCreate(nlnk,&(bt)) || (lnk_empty = PETSC_TRUE,0) ||(lnk[idx_start] = lnk_max,0))
840: /*
841: Add an index set into a sorted linked list
842: Input Parameters:
843: nidx - number of input indices
844: indices - integer array
845: idx_start - starting index of the list
846: lnk - linked list(an integer array) that is created
847: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
848: output Parameters:
849: nlnk - number of newly added indices
850: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
851: bt - updated PetscBT (bitarray)
852: */
853: #define PetscLLAdd(nidx,indices,idx_start,nlnk,lnk,bt) 0;\
854: {\
855: PetscInt _k,_entry,_location,_lnkdata;\
856: nlnk = 0;\
857: _lnkdata = idx_start;\
858: for (_k=0; _k<nidx; _k++) {\
859: _entry = indices[_k];\
860: if (!PetscBTLookupSet(bt,_entry)) { /* new entry */\
861: /* search for insertion location */\
862: /* start from the beginning if _entry < previous _entry */\
863: if (_k && _entry < _lnkdata) _lnkdata = idx_start;\
864: do {\
865: _location = _lnkdata;\
866: _lnkdata = lnk[_location];\
867: } while (_entry > _lnkdata);\
868: /* insertion location is found, add entry into lnk */\
869: lnk[_location] = _entry;\
870: lnk[_entry] = _lnkdata;\
871: nlnk++;\
872: _lnkdata = _entry; /* next search starts from here if next_entry > _entry */\
873: }\
874: }\
875: }
877: /*
878: Add a permuted index set into a sorted linked list
879: Input Parameters:
880: nidx - number of input indices
881: indices - integer array
882: perm - permutation of indices
883: idx_start - starting index of the list
884: lnk - linked list(an integer array) that is created
885: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
886: output Parameters:
887: nlnk - number of newly added indices
888: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
889: bt - updated PetscBT (bitarray)
890: */
891: #define PetscLLAddPerm(nidx,indices,perm,idx_start,nlnk,lnk,bt) 0;\
892: {\
893: PetscInt _k,_entry,_location,_lnkdata;\
894: nlnk = 0;\
895: _lnkdata = idx_start;\
896: for (_k=0; _k<nidx; _k++) {\
897: _entry = perm[indices[_k]];\
898: if (!PetscBTLookupSet(bt,_entry)) { /* new entry */\
899: /* search for insertion location */\
900: /* start from the beginning if _entry < previous _entry */\
901: if (_k && _entry < _lnkdata) _lnkdata = idx_start;\
902: do {\
903: _location = _lnkdata;\
904: _lnkdata = lnk[_location];\
905: } while (_entry > _lnkdata);\
906: /* insertion location is found, add entry into lnk */\
907: lnk[_location] = _entry;\
908: lnk[_entry] = _lnkdata;\
909: nlnk++;\
910: _lnkdata = _entry; /* next search starts from here if next_entry > _entry */\
911: }\
912: }\
913: }
915: /*
916: Add a SORTED ascending index set into a sorted linked list - same as PetscLLAdd() bus skip 'if (_k && _entry < _lnkdata) _lnkdata = idx_start;'
917: Input Parameters:
918: nidx - number of input indices
919: indices - sorted integer array
920: idx_start - starting index of the list
921: lnk - linked list(an integer array) that is created
922: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
923: output Parameters:
924: nlnk - number of newly added indices
925: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
926: bt - updated PetscBT (bitarray)
927: */
928: #define PetscLLAddSorted(nidx,indices,idx_start,nlnk,lnk,bt) 0;\
929: {\
930: PetscInt _k,_entry,_location,_lnkdata;\
931: nlnk = 0;\
932: _lnkdata = idx_start;\
933: for (_k=0; _k<nidx; _k++) {\
934: _entry = indices[_k];\
935: if (!PetscBTLookupSet(bt,_entry)) { /* new entry */\
936: /* search for insertion location */\
937: do {\
938: _location = _lnkdata;\
939: _lnkdata = lnk[_location];\
940: } while (_entry > _lnkdata);\
941: /* insertion location is found, add entry into lnk */\
942: lnk[_location] = _entry;\
943: lnk[_entry] = _lnkdata;\
944: nlnk++;\
945: _lnkdata = _entry; /* next search starts from here */\
946: }\
947: }\
948: }
950: #define PetscLLAddSorted_new(nidx,indices,idx_start,lnk_empty,nlnk,lnk,bt) 0; \
951: {\
952: PetscInt _k,_entry,_location,_lnkdata;\
953: if (lnk_empty) {\
954: _lnkdata = idx_start; \
955: for (_k=0; _k<nidx; _k++) { \
956: _entry = indices[_k]; \
957: PetscBTSet(bt,_entry); /* mark the new entry */ \
958: _location = _lnkdata; \
959: _lnkdata = lnk[_location]; \
960: /* insertion location is found, add entry into lnk */ \
961: lnk[_location] = _entry; \
962: lnk[_entry] = _lnkdata; \
963: _lnkdata = _entry; /* next search starts from here */ \
964: } \
965: /*\
966: lnk[indices[nidx-1]] = lnk[idx_start];\
967: lnk[idx_start] = indices[0];\
968: PetscBTSet(bt,indices[0]); \
969: for (_k=1; _k<nidx; _k++) { \
970: PetscBTSet(bt,indices[_k]); \
971: lnk[indices[_k-1]] = indices[_k]; \
972: } \
973: */\
974: nlnk = nidx;\
975: lnk_empty = PETSC_FALSE;\
976: } else {\
977: nlnk = 0; \
978: _lnkdata = idx_start; \
979: for (_k=0; _k<nidx; _k++) { \
980: _entry = indices[_k]; \
981: if (!PetscBTLookupSet(bt,_entry)) { /* new entry */ \
982: /* search for insertion location */ \
983: do { \
984: _location = _lnkdata; \
985: _lnkdata = lnk[_location]; \
986: } while (_entry > _lnkdata); \
987: /* insertion location is found, add entry into lnk */ \
988: lnk[_location] = _entry; \
989: lnk[_entry] = _lnkdata; \
990: nlnk++; \
991: _lnkdata = _entry; /* next search starts from here */ \
992: } \
993: } \
994: } \
995: }
997: /*
998: Add a SORTED index set into a sorted linked list used for LUFactorSymbolic()
999: Same as PetscLLAddSorted() with an additional operation:
1000: count the number of input indices that are no larger than 'diag'
1001: Input Parameters:
1002: indices - sorted integer array
1003: idx_start - starting index of the list, index of pivot row
1004: lnk - linked list(an integer array) that is created
1005: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1006: diag - index of the active row in LUFactorSymbolic
1007: nzbd - number of input indices with indices <= idx_start
1008: im - im[idx_start] is initialized as num of nonzero entries in row=idx_start
1009: output Parameters:
1010: nlnk - number of newly added indices
1011: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from indices
1012: bt - updated PetscBT (bitarray)
1013: im - im[idx_start]: unchanged if diag is not an entry
1014: : num of entries with indices <= diag if diag is an entry
1015: */
1016: #define PetscLLAddSortedLU(indices,idx_start,nlnk,lnk,bt,diag,nzbd,im) 0;\
1017: {\
1018: PetscInt _k,_entry,_location,_lnkdata,_nidx;\
1019: nlnk = 0;\
1020: _lnkdata = idx_start;\
1021: _nidx = im[idx_start] - nzbd; /* num of entries with idx_start < index <= diag */\
1022: for (_k=0; _k<_nidx; _k++) {\
1023: _entry = indices[_k];\
1024: nzbd++;\
1025: if (_entry== diag) im[idx_start] = nzbd;\
1026: if (!PetscBTLookupSet(bt,_entry)) { /* new entry */\
1027: /* search for insertion location */\
1028: do {\
1029: _location = _lnkdata;\
1030: _lnkdata = lnk[_location];\
1031: } while (_entry > _lnkdata);\
1032: /* insertion location is found, add entry into lnk */\
1033: lnk[_location] = _entry;\
1034: lnk[_entry] = _lnkdata;\
1035: nlnk++;\
1036: _lnkdata = _entry; /* next search starts from here */\
1037: }\
1038: }\
1039: }
1041: /*
1042: Copy data on the list into an array, then initialize the list
1043: Input Parameters:
1044: idx_start - starting index of the list
1045: lnk_max - max value of lnk indicating the end of the list
1046: nlnk - number of data on the list to be copied
1047: lnk - linked list
1048: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1049: output Parameters:
1050: indices - array that contains the copied data
1051: lnk - linked list that is cleaned and initialize
1052: bt - PetscBT (bitarray) with all bits set to false
1053: */
1054: #define PetscLLClean(idx_start,lnk_max,nlnk,lnk,indices,bt) 0;\
1055: {\
1056: PetscInt _j,_idx=idx_start;\
1057: for (_j=0; _j<nlnk; _j++) {\
1058: _idx = lnk[_idx];\
1059: indices[_j] = _idx;\
1060: PetscBTClear(bt,_idx);\
1061: }\
1062: lnk[idx_start] = lnk_max;\
1063: }
1064: /*
1065: Free memories used by the list
1066: */
1067: #define PetscLLDestroy(lnk,bt) (PetscFree(lnk) || PetscBTDestroy(&(bt)))
1069: /* Routines below are used for incomplete matrix factorization */
1070: /*
1071: Create and initialize a linked list and its levels
1072: Input Parameters:
1073: idx_start - starting index of the list
1074: lnk_max - max value of lnk indicating the end of the list
1075: nlnk - max length of the list
1076: Output Parameters:
1077: lnk - list initialized
1078: lnk_lvl - array of size nlnk for storing levels of lnk
1079: bt - PetscBT (bitarray) with all bits set to false
1080: */
1081: #define PetscIncompleteLLCreate(idx_start,lnk_max,nlnk,lnk,lnk_lvl,bt)\
1082: (PetscIntMultError(2,nlnk,NULL) || PetscMalloc1(2*nlnk,&lnk) || PetscBTCreate(nlnk,&(bt)) || (lnk[idx_start] = lnk_max,lnk_lvl = lnk + nlnk,0))
1084: /*
1085: Initialize a sorted linked list used for ILU and ICC
1086: Input Parameters:
1087: nidx - number of input idx
1088: idx - integer array used for storing column indices
1089: idx_start - starting index of the list
1090: perm - indices of an IS
1091: lnk - linked list(an integer array) that is created
1092: lnklvl - levels of lnk
1093: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1094: output Parameters:
1095: nlnk - number of newly added idx
1096: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1097: lnklvl - levels of lnk
1098: bt - updated PetscBT (bitarray)
1099: */
1100: #define PetscIncompleteLLInit(nidx,idx,idx_start,perm,nlnk,lnk,lnklvl,bt) 0;\
1101: {\
1102: PetscInt _k,_entry,_location,_lnkdata;\
1103: nlnk = 0;\
1104: _lnkdata = idx_start;\
1105: for (_k=0; _k<nidx; _k++) {\
1106: _entry = perm[idx[_k]];\
1107: if (!PetscBTLookupSet(bt,_entry)) { /* new entry */\
1108: /* search for insertion location */\
1109: if (_k && _entry < _lnkdata) _lnkdata = idx_start;\
1110: do {\
1111: _location = _lnkdata;\
1112: _lnkdata = lnk[_location];\
1113: } while (_entry > _lnkdata);\
1114: /* insertion location is found, add entry into lnk */\
1115: lnk[_location] = _entry;\
1116: lnk[_entry] = _lnkdata;\
1117: lnklvl[_entry] = 0;\
1118: nlnk++;\
1119: _lnkdata = _entry; /* next search starts from here if next_entry > _entry */\
1120: }\
1121: }\
1122: }
1124: /*
1125: Add a SORTED index set into a sorted linked list for ILU
1126: Input Parameters:
1127: nidx - number of input indices
1128: idx - sorted integer array used for storing column indices
1129: level - level of fill, e.g., ICC(level)
1130: idxlvl - level of idx
1131: idx_start - starting index of the list
1132: lnk - linked list(an integer array) that is created
1133: lnklvl - levels of lnk
1134: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1135: prow - the row number of idx
1136: output Parameters:
1137: nlnk - number of newly added idx
1138: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1139: lnklvl - levels of lnk
1140: bt - updated PetscBT (bitarray)
1142: Note: the level of factor(i,j) is set as lvl(i,j) = min{ lvl(i,j), lvl(i,prow)+lvl(prow,j)+1)
1143: where idx = non-zero columns of U(prow,prow+1:n-1), prow<i
1144: */
1145: #define PetscILULLAddSorted(nidx,idx,level,idxlvl,idx_start,nlnk,lnk,lnklvl,bt,lnklvl_prow) 0;\
1146: {\
1147: PetscInt _k,_entry,_location,_lnkdata,_incrlev,_lnklvl_prow=lnklvl[prow];\
1148: nlnk = 0;\
1149: _lnkdata = idx_start;\
1150: for (_k=0; _k<nidx; _k++) {\
1151: _incrlev = idxlvl[_k] + _lnklvl_prow + 1;\
1152: if (_incrlev > level) continue;\
1153: _entry = idx[_k];\
1154: if (!PetscBTLookupSet(bt,_entry)) { /* new entry */\
1155: /* search for insertion location */\
1156: do {\
1157: _location = _lnkdata;\
1158: _lnkdata = lnk[_location];\
1159: } while (_entry > _lnkdata);\
1160: /* insertion location is found, add entry into lnk */\
1161: lnk[_location] = _entry;\
1162: lnk[_entry] = _lnkdata;\
1163: lnklvl[_entry] = _incrlev;\
1164: nlnk++;\
1165: _lnkdata = _entry; /* next search starts from here if next_entry > _entry */\
1166: } else { /* existing entry: update lnklvl */\
1167: if (lnklvl[_entry] > _incrlev) lnklvl[_entry] = _incrlev;\
1168: }\
1169: }\
1170: }
1172: /*
1173: Add a index set into a sorted linked list
1174: Input Parameters:
1175: nidx - number of input idx
1176: idx - integer array used for storing column indices
1177: level - level of fill, e.g., ICC(level)
1178: idxlvl - level of idx
1179: idx_start - starting index of the list
1180: lnk - linked list(an integer array) that is created
1181: lnklvl - levels of lnk
1182: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1183: output Parameters:
1184: nlnk - number of newly added idx
1185: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1186: lnklvl - levels of lnk
1187: bt - updated PetscBT (bitarray)
1188: */
1189: #define PetscIncompleteLLAdd(nidx,idx,level,idxlvl,idx_start,nlnk,lnk,lnklvl,bt) 0;\
1190: {\
1191: PetscInt _k,_entry,_location,_lnkdata,_incrlev;\
1192: nlnk = 0;\
1193: _lnkdata = idx_start;\
1194: for (_k=0; _k<nidx; _k++) {\
1195: _incrlev = idxlvl[_k] + 1;\
1196: if (_incrlev > level) continue;\
1197: _entry = idx[_k];\
1198: if (!PetscBTLookupSet(bt,_entry)) { /* new entry */\
1199: /* search for insertion location */\
1200: if (_k && _entry < _lnkdata) _lnkdata = idx_start;\
1201: do {\
1202: _location = _lnkdata;\
1203: _lnkdata = lnk[_location];\
1204: } while (_entry > _lnkdata);\
1205: /* insertion location is found, add entry into lnk */\
1206: lnk[_location] = _entry;\
1207: lnk[_entry] = _lnkdata;\
1208: lnklvl[_entry] = _incrlev;\
1209: nlnk++;\
1210: _lnkdata = _entry; /* next search starts from here if next_entry > _entry */\
1211: } else { /* existing entry: update lnklvl */\
1212: if (lnklvl[_entry] > _incrlev) lnklvl[_entry] = _incrlev;\
1213: }\
1214: }\
1215: }
1217: /*
1218: Add a SORTED index set into a sorted linked list
1219: Input Parameters:
1220: nidx - number of input indices
1221: idx - sorted integer array used for storing column indices
1222: level - level of fill, e.g., ICC(level)
1223: idxlvl - level of idx
1224: idx_start - starting index of the list
1225: lnk - linked list(an integer array) that is created
1226: lnklvl - levels of lnk
1227: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1228: output Parameters:
1229: nlnk - number of newly added idx
1230: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1231: lnklvl - levels of lnk
1232: bt - updated PetscBT (bitarray)
1233: */
1234: #define PetscIncompleteLLAddSorted(nidx,idx,level,idxlvl,idx_start,nlnk,lnk,lnklvl,bt) 0;\
1235: {\
1236: PetscInt _k,_entry,_location,_lnkdata,_incrlev;\
1237: nlnk = 0;\
1238: _lnkdata = idx_start;\
1239: for (_k=0; _k<nidx; _k++) {\
1240: _incrlev = idxlvl[_k] + 1;\
1241: if (_incrlev > level) continue;\
1242: _entry = idx[_k];\
1243: if (!PetscBTLookupSet(bt,_entry)) { /* new entry */\
1244: /* search for insertion location */\
1245: do {\
1246: _location = _lnkdata;\
1247: _lnkdata = lnk[_location];\
1248: } while (_entry > _lnkdata);\
1249: /* insertion location is found, add entry into lnk */\
1250: lnk[_location] = _entry;\
1251: lnk[_entry] = _lnkdata;\
1252: lnklvl[_entry] = _incrlev;\
1253: nlnk++;\
1254: _lnkdata = _entry; /* next search starts from here */\
1255: } else { /* existing entry: update lnklvl */\
1256: if (lnklvl[_entry] > _incrlev) lnklvl[_entry] = _incrlev;\
1257: }\
1258: }\
1259: }
1261: /*
1262: Add a SORTED index set into a sorted linked list for ICC
1263: Input Parameters:
1264: nidx - number of input indices
1265: idx - sorted integer array used for storing column indices
1266: level - level of fill, e.g., ICC(level)
1267: idxlvl - level of idx
1268: idx_start - starting index of the list
1269: lnk - linked list(an integer array) that is created
1270: lnklvl - levels of lnk
1271: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1272: idxlvl_prow - idxlvl[prow], where prow is the row number of the idx
1273: output Parameters:
1274: nlnk - number of newly added indices
1275: lnk - the sorted(increasing order) linked list containing new and non-redundate entries from idx
1276: lnklvl - levels of lnk
1277: bt - updated PetscBT (bitarray)
1278: Note: the level of U(i,j) is set as lvl(i,j) = min{ lvl(i,j), lvl(prow,i)+lvl(prow,j)+1)
1279: where idx = non-zero columns of U(prow,prow+1:n-1), prow<i
1280: */
1281: #define PetscICCLLAddSorted(nidx,idx,level,idxlvl,idx_start,nlnk,lnk,lnklvl,bt,idxlvl_prow) 0;\
1282: {\
1283: PetscInt _k,_entry,_location,_lnkdata,_incrlev;\
1284: nlnk = 0;\
1285: _lnkdata = idx_start;\
1286: for (_k=0; _k<nidx; _k++) {\
1287: _incrlev = idxlvl[_k] + idxlvl_prow + 1;\
1288: if (_incrlev > level) continue;\
1289: _entry = idx[_k];\
1290: if (!PetscBTLookupSet(bt,_entry)) { /* new entry */\
1291: /* search for insertion location */\
1292: do {\
1293: _location = _lnkdata;\
1294: _lnkdata = lnk[_location];\
1295: } while (_entry > _lnkdata);\
1296: /* insertion location is found, add entry into lnk */\
1297: lnk[_location] = _entry;\
1298: lnk[_entry] = _lnkdata;\
1299: lnklvl[_entry] = _incrlev;\
1300: nlnk++;\
1301: _lnkdata = _entry; /* next search starts from here */\
1302: } else { /* existing entry: update lnklvl */\
1303: if (lnklvl[_entry] > _incrlev) lnklvl[_entry] = _incrlev;\
1304: }\
1305: }\
1306: }
1308: /*
1309: Copy data on the list into an array, then initialize the list
1310: Input Parameters:
1311: idx_start - starting index of the list
1312: lnk_max - max value of lnk indicating the end of the list
1313: nlnk - number of data on the list to be copied
1314: lnk - linked list
1315: lnklvl - level of lnk
1316: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1317: output Parameters:
1318: indices - array that contains the copied data
1319: lnk - linked list that is cleaned and initialize
1320: lnklvl - level of lnk that is reinitialized
1321: bt - PetscBT (bitarray) with all bits set to false
1322: */
1323: #define PetscIncompleteLLClean(idx_start,lnk_max,nlnk,lnk,lnklvl,indices,indiceslvl,bt) 0;\
1324: do {\
1325: PetscInt _j,_idx=idx_start;\
1326: for (_j=0; _j<nlnk; _j++) {\
1327: _idx = lnk[_idx];\
1328: *(indices+_j) = _idx;\
1329: *(indiceslvl+_j) = lnklvl[_idx];\
1330: lnklvl[_idx] = -1;\
1331: PetscBTClear(bt,_idx);\
1332: }\
1333: lnk[idx_start] = lnk_max;\
1334: } while (0)
1335: /*
1336: Free memories used by the list
1337: */
1338: #define PetscIncompleteLLDestroy(lnk,bt) (PetscFree(lnk) || PetscBTDestroy(&(bt)))
1340: #if !defined(PETSC_CLANG_STATIC_ANALYZER)
1341: #define MatCheckSameLocalSize(A,ar1,B,ar2) do { \
1343: if ((A->rmap->n != B->rmap->n) || (A->cmap->n != B->cmap->n)) SETERRQ6(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Incompatible matrix local sizes: parameter # %d (%D x %D) != parameter # %d (%D x %D)",ar1,A->rmap->n,A->cmap->n,ar2,B->rmap->n,B->cmap->n);} while (0)
1344: #define MatCheckSameSize(A,ar1,B,ar2) do { \
1345: if ((A->rmap->N != B->rmap->N) || (A->cmap->N != B->cmap->N)) SETERRQ6(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Incompatible matrix global sizes: parameter # %d (%D x %D) != parameter # %d (%D x %D)",ar1,A->rmap->N,A->cmap->N,ar2,B->rmap->N,B->cmap->N);\
1346: MatCheckSameLocalSize(A,ar1,B,ar2);} while (0)
1347: #else
1348: template <typename Tm>
1349: void MatCheckSameLocalSize(Tm,int,Tm,int);
1350: template <typename Tm>
1351: void MatCheckSameSize(Tm,int,Tm,int);
1352: #endif
1354: #define VecCheckMatCompatible(M,x,ar1,b,ar2) do { \
1355: if (M->cmap->N != x->map->N) SETERRQ3(PetscObjectComm((PetscObject)M),PETSC_ERR_ARG_SIZ,"Vector global length incompatible with matrix: parameter # %d global size %D != matrix column global size %D",ar1,x->map->N,M->cmap->N); \
1356: if (M->rmap->N != b->map->N) SETERRQ3(PetscObjectComm((PetscObject)M),PETSC_ERR_ARG_SIZ,"Vector global length incompatible with matrix: parameter # %d global size %D != matrix row global size %D",ar2,b->map->N,M->rmap->N);} while (0)
1358: /* -------------------------------------------------------------------------------------------------------*/
1359: #include <petscbt.h>
1360: /*
1361: Create and initialize a condensed linked list -
1362: same as PetscLLCreate(), but uses a scalable array 'lnk' with size of max number of entries, not O(N).
1363: Barry suggested this approach (Dec. 6, 2011):
1364: I've thought of an alternative way of representing a linked list that is efficient but doesn't have the O(N) scaling issue
1365: (it may be faster than the O(N) even sequentially due to less crazy memory access).
1367: Instead of having some like a 2 -> 4 -> 11 -> 22 list that uses slot 2 4 11 and 22 in a big array use a small array with two slots
1368: for each entry for example [ 2 1 | 4 3 | 22 -1 | 11 2] so the first number (of the pair) is the value while the second tells you where
1369: in the list the next entry is. Inserting a new link means just append another pair at the end. For example say we want to insert 13 into the
1370: list it would then become [2 1 | 4 3 | 22 -1 | 11 4 | 13 2 ] you just add a pair at the end and fix the point for the one that points to it.
1371: That is 11 use to point to the 2 slot, after the change 11 points to the 4th slot which has the value 13. Note that values are always next
1372: to each other so memory access is much better than using the big array.
1374: Example:
1375: nlnk_max=5, lnk_max=36:
1376: Initial list: [0, 0 | 36, 2 | 0, 0 | 0, 0 | 0, 0 | 0, 0 | 0, 0]
1377: here, head_node has index 2 with value lnk[2]=lnk_max=36,
1378: 0-th entry is used to store the number of entries in the list,
1379: The initial lnk represents head -> tail(marked by 36) with number of entries = lnk[0]=0.
1381: Now adding a sorted set {2,4}, the list becomes
1382: [2, 0 | 36, 4 |2, 6 | 4, 2 | 0, 0 | 0, 0 | 0, 0 ]
1383: represents head -> 2 -> 4 -> tail with number of entries = lnk[0]=2.
1385: Then adding a sorted set {0,3,35}, the list
1386: [5, 0 | 36, 8 | 2, 10 | 4, 12 | 0, 4 | 3, 6 | 35, 2 ]
1387: represents head -> 0 -> 2 -> 3 -> 4 -> 35 -> tail with number of entries = lnk[0]=5.
1389: Input Parameters:
1390: nlnk_max - max length of the list
1391: lnk_max - max value of the entries
1392: Output Parameters:
1393: lnk - list created and initialized
1394: bt - PetscBT (bitarray) with all bits set to false. Note: bt has size lnk_max, not nln_max!
1395: */
1396: PETSC_STATIC_INLINE PetscErrorCode PetscLLCondensedCreate(PetscInt nlnk_max,PetscInt lnk_max,PetscInt **lnk,PetscBT *bt)
1397: {
1399: PetscInt *llnk,lsize = 0;
1402: PetscIntMultError(2,nlnk_max+2,&lsize);
1403: PetscMalloc1(lsize,lnk);
1404: PetscBTCreate(lnk_max,bt);
1405: llnk = *lnk;
1406: llnk[0] = 0; /* number of entries on the list */
1407: llnk[2] = lnk_max; /* value in the head node */
1408: llnk[3] = 2; /* next for the head node */
1409: return(0);
1410: }
1412: /*
1413: Add a SORTED ascending index set into a sorted linked list. See PetscLLCondensedCreate() for detailed description.
1414: Input Parameters:
1415: nidx - number of input indices
1416: indices - sorted integer array
1417: lnk - condensed linked list(an integer array) that is created
1418: bt - PetscBT (bitarray), bt[idx]=true marks idx is in lnk
1419: output Parameters:
1420: lnk - the sorted(increasing order) linked list containing previous and newly added non-redundate indices
1421: bt - updated PetscBT (bitarray)
1422: */
1423: PETSC_STATIC_INLINE PetscErrorCode PetscLLCondensedAddSorted(PetscInt nidx,const PetscInt indices[],PetscInt lnk[],PetscBT bt)
1424: {
1425: PetscInt _k,_entry,_location,_next,_lnkdata,_nlnk,_newnode;
1428: _nlnk = lnk[0]; /* num of entries on the input lnk */
1429: _location = 2; /* head */
1430: for (_k=0; _k<nidx; _k++) {
1431: _entry = indices[_k];
1432: if (!PetscBTLookupSet(bt,_entry)) { /* new entry */
1433: /* search for insertion location */
1434: do {
1435: _next = _location + 1; /* link from previous node to next node */
1436: _location = lnk[_next]; /* idx of next node */
1437: _lnkdata = lnk[_location];/* value of next node */
1438: } while (_entry > _lnkdata);
1439: /* insertion location is found, add entry into lnk */
1440: _newnode = 2*(_nlnk+2); /* index for this new node */
1441: lnk[_next] = _newnode; /* connect previous node to the new node */
1442: lnk[_newnode] = _entry; /* set value of the new node */
1443: lnk[_newnode+1] = _location; /* connect new node to next node */
1444: _location = _newnode; /* next search starts from the new node */
1445: _nlnk++;
1446: } \
1447: }\
1448: lnk[0] = _nlnk; /* number of entries in the list */
1449: return(0);
1450: }
1452: PETSC_STATIC_INLINE PetscErrorCode PetscLLCondensedClean(PetscInt lnk_max,PetscInt nidx,PetscInt *indices,PetscInt lnk[],PetscBT bt)
1453: {
1455: PetscInt _k,_next,_nlnk;
1458: _next = lnk[3]; /* head node */
1459: _nlnk = lnk[0]; /* num of entries on the list */
1460: for (_k=0; _k<_nlnk; _k++) {
1461: indices[_k] = lnk[_next];
1462: _next = lnk[_next + 1];
1463: PetscBTClear(bt,indices[_k]);
1464: }
1465: lnk[0] = 0; /* num of entries on the list */
1466: lnk[2] = lnk_max; /* initialize head node */
1467: lnk[3] = 2; /* head node */
1468: return(0);
1469: }
1471: PETSC_STATIC_INLINE PetscErrorCode PetscLLCondensedView(PetscInt *lnk)
1472: {
1474: PetscInt k;
1477: PetscPrintf(PETSC_COMM_SELF,"LLCondensed of size %D, (val, next)\n",lnk[0]);
1478: for (k=2; k< lnk[0]+2; k++) {
1479: PetscPrintf(PETSC_COMM_SELF," %D: (%D, %D)\n",2*k,lnk[2*k],lnk[2*k+1]);
1480: }
1481: return(0);
1482: }
1484: /*
1485: Free memories used by the list
1486: */
1487: PETSC_STATIC_INLINE PetscErrorCode PetscLLCondensedDestroy(PetscInt *lnk,PetscBT bt)
1488: {
1492: PetscFree(lnk);
1493: PetscBTDestroy(&bt);
1494: return(0);
1495: }
1497: /* -------------------------------------------------------------------------------------------------------*/
1498: /*
1499: Same as PetscLLCondensedCreate(), but does not use non-scalable O(lnk_max) bitarray
1500: Input Parameters:
1501: nlnk_max - max length of the list
1502: Output Parameters:
1503: lnk - list created and initialized
1504: */
1505: PETSC_STATIC_INLINE PetscErrorCode PetscLLCondensedCreate_Scalable(PetscInt nlnk_max,PetscInt **lnk)
1506: {
1508: PetscInt *llnk,lsize = 0;
1511: PetscIntMultError(2,nlnk_max+2,&lsize);
1512: PetscMalloc1(lsize,lnk);
1513: llnk = *lnk;
1514: llnk[0] = 0; /* number of entries on the list */
1515: llnk[2] = PETSC_MAX_INT; /* value in the head node */
1516: llnk[3] = 2; /* next for the head node */
1517: return(0);
1518: }
1520: PETSC_STATIC_INLINE PetscErrorCode PetscLLCondensedExpand_Scalable(PetscInt nlnk_max,PetscInt **lnk)
1521: {
1523: PetscInt lsize = 0;
1526: PetscIntMultError(2,nlnk_max+2,&lsize);
1527: PetscRealloc(lsize*sizeof(PetscInt),lnk);
1528: return(0);
1529: }
1531: PETSC_STATIC_INLINE PetscErrorCode PetscLLCondensedAddSorted_Scalable(PetscInt nidx,const PetscInt indices[],PetscInt lnk[])
1532: {
1533: PetscInt _k,_entry,_location,_next,_lnkdata,_nlnk,_newnode;
1534: _nlnk = lnk[0]; /* num of entries on the input lnk */
1535: _location = 2; /* head */ \
1536: for (_k=0; _k<nidx; _k++) {
1537: _entry = indices[_k];
1538: /* search for insertion location */
1539: do {
1540: _next = _location + 1; /* link from previous node to next node */
1541: _location = lnk[_next]; /* idx of next node */
1542: _lnkdata = lnk[_location];/* value of next node */
1543: } while (_entry > _lnkdata);
1544: if (_entry < _lnkdata) {
1545: /* insertion location is found, add entry into lnk */
1546: _newnode = 2*(_nlnk+2); /* index for this new node */
1547: lnk[_next] = _newnode; /* connect previous node to the new node */
1548: lnk[_newnode] = _entry; /* set value of the new node */
1549: lnk[_newnode+1] = _location; /* connect new node to next node */
1550: _location = _newnode; /* next search starts from the new node */
1551: _nlnk++;
1552: }
1553: }
1554: lnk[0] = _nlnk; /* number of entries in the list */
1555: return 0;
1556: }
1558: PETSC_STATIC_INLINE PetscErrorCode PetscLLCondensedClean_Scalable(PetscInt nidx,PetscInt *indices,PetscInt *lnk)
1559: {
1560: PetscInt _k,_next,_nlnk;
1561: _next = lnk[3]; /* head node */
1562: _nlnk = lnk[0];
1563: for (_k=0; _k<_nlnk; _k++) {
1564: indices[_k] = lnk[_next];
1565: _next = lnk[_next + 1];
1566: }
1567: lnk[0] = 0; /* num of entries on the list */
1568: lnk[3] = 2; /* head node */
1569: return 0;
1570: }
1572: PETSC_STATIC_INLINE PetscErrorCode PetscLLCondensedDestroy_Scalable(PetscInt *lnk)
1573: {
1574: return PetscFree(lnk);
1575: }
1577: /* -------------------------------------------------------------------------------------------------------*/
1578: /*
1579: lnk[0] number of links
1580: lnk[1] number of entries
1581: lnk[3n] value
1582: lnk[3n+1] len
1583: lnk[3n+2] link to next value
1585: The next three are always the first link
1587: lnk[3] PETSC_MIN_INT+1
1588: lnk[4] 1
1589: lnk[5] link to first real entry
1591: The next three are always the last link
1593: lnk[6] PETSC_MAX_INT - 1
1594: lnk[7] 1
1595: lnk[8] next valid link (this is the same as lnk[0] but without the decreases)
1596: */
1598: PETSC_STATIC_INLINE PetscErrorCode PetscLLCondensedCreate_fast(PetscInt nlnk_max,PetscInt **lnk)
1599: {
1601: PetscInt *llnk,lsize = 0;
1604: PetscIntMultError(3,nlnk_max+3,&lsize);
1605: PetscMalloc1(lsize,lnk);
1606: llnk = *lnk;
1607: llnk[0] = 0; /* nlnk: number of entries on the list */
1608: llnk[1] = 0; /* number of integer entries represented in list */
1609: llnk[3] = PETSC_MIN_INT+1; /* value in the first node */
1610: llnk[4] = 1; /* count for the first node */
1611: llnk[5] = 6; /* next for the first node */
1612: llnk[6] = PETSC_MAX_INT-1; /* value in the last node */
1613: llnk[7] = 1; /* count for the last node */
1614: llnk[8] = 0; /* next valid node to be used */
1615: return(0);
1616: }
1618: PETSC_STATIC_INLINE PetscErrorCode PetscLLCondensedAddSorted_fast(PetscInt nidx,const PetscInt indices[],PetscInt lnk[])
1619: {
1620: PetscInt k,entry,prev,next;
1621: prev = 3; /* first value */
1622: next = lnk[prev+2];
1623: for (k=0; k<nidx; k++) {
1624: entry = indices[k];
1625: /* search for insertion location */
1626: while (entry >= lnk[next]) {
1627: prev = next;
1628: next = lnk[next+2];
1629: }
1630: /* entry is in range of previous list */
1631: if (entry < lnk[prev]+lnk[prev+1]) continue;
1632: lnk[1]++;
1633: /* entry is right after previous list */
1634: if (entry == lnk[prev]+lnk[prev+1]) {
1635: lnk[prev+1]++;
1636: if (lnk[next] == entry+1) { /* combine two contiguous strings */
1637: lnk[prev+1] += lnk[next+1];
1638: lnk[prev+2] = lnk[next+2];
1639: next = lnk[next+2];
1640: lnk[0]--;
1641: }
1642: continue;
1643: }
1644: /* entry is right before next list */
1645: if (entry == lnk[next]-1) {
1646: lnk[next]--;
1647: lnk[next+1]++;
1648: prev = next;
1649: next = lnk[prev+2];
1650: continue;
1651: }
1652: /* add entry into lnk */
1653: lnk[prev+2] = 3*((lnk[8]++)+3); /* connect previous node to the new node */
1654: prev = lnk[prev+2];
1655: lnk[prev] = entry; /* set value of the new node */
1656: lnk[prev+1] = 1; /* number of values in contiguous string is one to start */
1657: lnk[prev+2] = next; /* connect new node to next node */
1658: lnk[0]++;
1659: }
1660: return 0;
1661: }
1663: PETSC_STATIC_INLINE PetscErrorCode PetscLLCondensedClean_fast(PetscInt nidx,PetscInt *indices,PetscInt *lnk)
1664: {
1665: PetscInt _k,_next,_nlnk,cnt,j;
1666: _next = lnk[5]; /* first node */
1667: _nlnk = lnk[0];
1668: cnt = 0;
1669: for (_k=0; _k<_nlnk; _k++) {
1670: for (j=0; j<lnk[_next+1]; j++) {
1671: indices[cnt++] = lnk[_next] + j;
1672: }
1673: _next = lnk[_next + 2];
1674: }
1675: lnk[0] = 0; /* nlnk: number of links */
1676: lnk[1] = 0; /* number of integer entries represented in list */
1677: lnk[3] = PETSC_MIN_INT+1; /* value in the first node */
1678: lnk[4] = 1; /* count for the first node */
1679: lnk[5] = 6; /* next for the first node */
1680: lnk[6] = PETSC_MAX_INT-1; /* value in the last node */
1681: lnk[7] = 1; /* count for the last node */
1682: lnk[8] = 0; /* next valid location to make link */
1683: return 0;
1684: }
1686: PETSC_STATIC_INLINE PetscErrorCode PetscLLCondensedView_fast(PetscInt *lnk)
1687: {
1688: PetscInt k,next,nlnk;
1689: next = lnk[5]; /* first node */
1690: nlnk = lnk[0];
1691: for (k=0; k<nlnk; k++) {
1692: #if 0 /* Debugging code */
1693: printf("%d value %d len %d next %d\n",next,lnk[next],lnk[next+1],lnk[next+2]);
1694: #endif
1695: next = lnk[next + 2];
1696: }
1697: return 0;
1698: }
1700: PETSC_STATIC_INLINE PetscErrorCode PetscLLCondensedDestroy_fast(PetscInt *lnk)
1701: {
1702: return PetscFree(lnk);
1703: }
1705: /* this is extern because it is used in MatFDColoringUseDM() which is in the DM library */
1706: PETSC_EXTERN PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,void*);
1708: PETSC_EXTERN PetscLogEvent MAT_Mult;
1709: PETSC_EXTERN PetscLogEvent MAT_MultMatrixFree;
1710: PETSC_EXTERN PetscLogEvent MAT_Mults;
1711: PETSC_EXTERN PetscLogEvent MAT_MultConstrained;
1712: PETSC_EXTERN PetscLogEvent MAT_MultAdd;
1713: PETSC_EXTERN PetscLogEvent MAT_MultTranspose;
1714: PETSC_EXTERN PetscLogEvent MAT_MultTransposeConstrained;
1715: PETSC_EXTERN PetscLogEvent MAT_MultTransposeAdd;
1716: PETSC_EXTERN PetscLogEvent MAT_Solve;
1717: PETSC_EXTERN PetscLogEvent MAT_Solves;
1718: PETSC_EXTERN PetscLogEvent MAT_SolveAdd;
1719: PETSC_EXTERN PetscLogEvent MAT_SolveTranspose;
1720: PETSC_EXTERN PetscLogEvent MAT_SolveTransposeAdd;
1721: PETSC_EXTERN PetscLogEvent MAT_SOR;
1722: PETSC_EXTERN PetscLogEvent MAT_ForwardSolve;
1723: PETSC_EXTERN PetscLogEvent MAT_BackwardSolve;
1724: PETSC_EXTERN PetscLogEvent MAT_LUFactor;
1725: PETSC_EXTERN PetscLogEvent MAT_LUFactorSymbolic;
1726: PETSC_EXTERN PetscLogEvent MAT_LUFactorNumeric;
1727: PETSC_EXTERN PetscLogEvent MAT_QRFactor;
1728: PETSC_EXTERN PetscLogEvent MAT_QRFactorSymbolic;
1729: PETSC_EXTERN PetscLogEvent MAT_QRFactorNumeric;
1730: PETSC_EXTERN PetscLogEvent MAT_CholeskyFactor;
1731: PETSC_EXTERN PetscLogEvent MAT_CholeskyFactorSymbolic;
1732: PETSC_EXTERN PetscLogEvent MAT_CholeskyFactorNumeric;
1733: PETSC_EXTERN PetscLogEvent MAT_ILUFactor;
1734: PETSC_EXTERN PetscLogEvent MAT_ILUFactorSymbolic;
1735: PETSC_EXTERN PetscLogEvent MAT_ICCFactorSymbolic;
1736: PETSC_EXTERN PetscLogEvent MAT_Copy;
1737: PETSC_EXTERN PetscLogEvent MAT_Convert;
1738: PETSC_EXTERN PetscLogEvent MAT_Scale;
1739: PETSC_EXTERN PetscLogEvent MAT_AssemblyBegin;
1740: PETSC_EXTERN PetscLogEvent MAT_AssemblyEnd;
1741: PETSC_EXTERN PetscLogEvent MAT_SetValues;
1742: PETSC_EXTERN PetscLogEvent MAT_GetValues;
1743: PETSC_EXTERN PetscLogEvent MAT_GetRow;
1744: PETSC_EXTERN PetscLogEvent MAT_GetRowIJ;
1745: PETSC_EXTERN PetscLogEvent MAT_CreateSubMats;
1746: PETSC_EXTERN PetscLogEvent MAT_GetColoring;
1747: PETSC_EXTERN PetscLogEvent MAT_GetOrdering;
1748: PETSC_EXTERN PetscLogEvent MAT_RedundantMat;
1749: PETSC_EXTERN PetscLogEvent MAT_IncreaseOverlap;
1750: PETSC_EXTERN PetscLogEvent MAT_Partitioning;
1751: PETSC_EXTERN PetscLogEvent MAT_PartitioningND;
1752: PETSC_EXTERN PetscLogEvent MAT_Coarsen;
1753: PETSC_EXTERN PetscLogEvent MAT_ZeroEntries;
1754: PETSC_EXTERN PetscLogEvent MAT_Load;
1755: PETSC_EXTERN PetscLogEvent MAT_View;
1756: PETSC_EXTERN PetscLogEvent MAT_AXPY;
1757: PETSC_EXTERN PetscLogEvent MAT_FDColoringCreate;
1758: PETSC_EXTERN PetscLogEvent MAT_TransposeColoringCreate;
1759: PETSC_EXTERN PetscLogEvent MAT_FDColoringSetUp;
1760: PETSC_EXTERN PetscLogEvent MAT_FDColoringApply;
1761: PETSC_EXTERN PetscLogEvent MAT_Transpose;
1762: PETSC_EXTERN PetscLogEvent MAT_FDColoringFunction;
1763: PETSC_EXTERN PetscLogEvent MAT_CreateSubMat;
1764: PETSC_EXTERN PetscLogEvent MAT_MatSolve;
1765: PETSC_EXTERN PetscLogEvent MAT_MatTrSolve;
1766: PETSC_EXTERN PetscLogEvent MAT_MatMultSymbolic;
1767: PETSC_EXTERN PetscLogEvent MAT_MatMultNumeric;
1768: PETSC_EXTERN PetscLogEvent MAT_Getlocalmatcondensed;
1769: PETSC_EXTERN PetscLogEvent MAT_GetBrowsOfAcols;
1770: PETSC_EXTERN PetscLogEvent MAT_GetBrowsOfAocols;
1771: PETSC_EXTERN PetscLogEvent MAT_PtAPSymbolic;
1772: PETSC_EXTERN PetscLogEvent MAT_PtAPNumeric;
1773: PETSC_EXTERN PetscLogEvent MAT_Seqstompinum;
1774: PETSC_EXTERN PetscLogEvent MAT_Seqstompisym;
1775: PETSC_EXTERN PetscLogEvent MAT_Seqstompi;
1776: PETSC_EXTERN PetscLogEvent MAT_Getlocalmat;
1777: PETSC_EXTERN PetscLogEvent MAT_RARtSymbolic;
1778: PETSC_EXTERN PetscLogEvent MAT_RARtNumeric;
1779: PETSC_EXTERN PetscLogEvent MAT_MatTransposeMultSymbolic;
1780: PETSC_EXTERN PetscLogEvent MAT_MatTransposeMultNumeric;
1781: PETSC_EXTERN PetscLogEvent MAT_TransposeMatMultSymbolic;
1782: PETSC_EXTERN PetscLogEvent MAT_TransposeMatMultNumeric;
1783: PETSC_EXTERN PetscLogEvent MAT_MatMatMultSymbolic;
1784: PETSC_EXTERN PetscLogEvent MAT_MatMatMultNumeric;
1785: PETSC_EXTERN PetscLogEvent MAT_Applypapt;
1786: PETSC_EXTERN PetscLogEvent MAT_Applypapt_symbolic;
1787: PETSC_EXTERN PetscLogEvent MAT_Applypapt_numeric;
1788: PETSC_EXTERN PetscLogEvent MAT_Getsymtranspose;
1789: PETSC_EXTERN PetscLogEvent MAT_Getsymtransreduced;
1790: PETSC_EXTERN PetscLogEvent MAT_GetSequentialNonzeroStructure;
1791: PETSC_EXTERN PetscLogEvent MATMFFD_Mult;
1792: PETSC_EXTERN PetscLogEvent MAT_GetMultiProcBlock;
1793: PETSC_EXTERN PetscLogEvent MAT_CUSPARSECopyToGPU;
1794: PETSC_EXTERN PetscLogEvent MAT_CUSPARSECopyFromGPU;
1795: PETSC_EXTERN PetscLogEvent MAT_CUSPARSEGenerateTranspose;
1796: PETSC_EXTERN PetscLogEvent MAT_CUSPARSESolveAnalysis;
1797: PETSC_EXTERN PetscLogEvent MAT_SetValuesBatch;
1798: PETSC_EXTERN PetscLogEvent MAT_ViennaCLCopyToGPU;
1799: PETSC_EXTERN PetscLogEvent MAT_DenseCopyToGPU;
1800: PETSC_EXTERN PetscLogEvent MAT_DenseCopyFromGPU;
1801: PETSC_EXTERN PetscLogEvent MAT_Merge;
1802: PETSC_EXTERN PetscLogEvent MAT_Residual;
1803: PETSC_EXTERN PetscLogEvent MAT_SetRandom;
1804: PETSC_EXTERN PetscLogEvent MAT_FactorFactS;
1805: PETSC_EXTERN PetscLogEvent MAT_FactorInvS;
1806: PETSC_EXTERN PetscLogEvent MAT_PreallCOO;
1807: PETSC_EXTERN PetscLogEvent MAT_SetVCOO;
1808: PETSC_EXTERN PetscLogEvent MATCOLORING_Apply;
1809: PETSC_EXTERN PetscLogEvent MATCOLORING_Comm;
1810: PETSC_EXTERN PetscLogEvent MATCOLORING_Local;
1811: PETSC_EXTERN PetscLogEvent MATCOLORING_ISCreate;
1812: PETSC_EXTERN PetscLogEvent MATCOLORING_SetUp;
1813: PETSC_EXTERN PetscLogEvent MATCOLORING_Weights;
1814: PETSC_EXTERN PetscLogEvent MAT_H2Opus_Build;
1815: PETSC_EXTERN PetscLogEvent MAT_H2Opus_Compress;
1816: PETSC_EXTERN PetscLogEvent MAT_H2Opus_Orthog;
1818: #endif