Actual source code: xxt.c
2: /*************************************xxt.c************************************
3: Module Name: xxt
4: Module Info:
6: author: Henry M. Tufo III
7: e-mail: hmt@asci.uchicago.edu
8: contact:
9: +--------------------------------+--------------------------------+
10: |MCS Division - Building 221 |Department of Computer Science |
11: |Argonne National Laboratory |Ryerson 152 |
12: |9700 S. Cass Avenue |The University of Chicago |
13: |Argonne, IL 60439 |Chicago, IL 60637 |
14: |(630) 252-5354/5986 ph/fx |(773) 702-6019/8487 ph/fx |
15: +--------------------------------+--------------------------------+
17: Last Modification: 3.20.01
18: **************************************xxt.c***********************************/
19: #include <../src/ksp/pc/impls/tfs/tfs.h>
21: #define LEFT -1
22: #define RIGHT 1
23: #define BOTH 0
25: typedef struct xxt_solver_info {
26: PetscInt n, m, n_global, m_global;
27: PetscInt nnz, max_nnz, msg_buf_sz;
28: PetscInt *nsep, *lnsep, *fo, nfo, *stages;
29: PetscInt *col_sz, *col_indices;
30: PetscScalar **col_vals, *x, *solve_uu, *solve_w;
31: PetscInt nsolves;
32: PetscScalar tot_solve_time;
33: } xxt_info;
35: typedef struct matvec_info {
36: PetscInt n, m, n_global, m_global;
37: PetscInt *local2global;
38: PCTFS_gs_ADT PCTFS_gs_handle;
39: PetscErrorCode (*matvec)(struct matvec_info*,PetscScalar*,PetscScalar*);
40: void *grid_data;
41: } mv_info;
43: struct xxt_CDT {
44: PetscInt id;
45: PetscInt ns;
46: PetscInt level;
47: xxt_info *info;
48: mv_info *mvi;
49: };
51: static PetscInt n_xxt =0;
52: static PetscInt n_xxt_handles=0;
54: /* prototypes */
55: static PetscErrorCode do_xxt_solve(xxt_ADT xxt_handle, PetscScalar *rhs);
56: static PetscErrorCode check_handle(xxt_ADT xxt_handle);
57: static PetscErrorCode det_separators(xxt_ADT xxt_handle);
58: static PetscErrorCode do_matvec(mv_info *A, PetscScalar *v, PetscScalar *u);
59: static PetscErrorCode xxt_generate(xxt_ADT xxt_handle);
60: static PetscErrorCode do_xxt_factor(xxt_ADT xxt_handle);
61: static mv_info *set_mvi(PetscInt *local2global, PetscInt n, PetscInt m, PetscErrorCode (*matvec)(mv_info*,PetscScalar*,PetscScalar*), void *grid_data);
63: /**************************************xxt.c***********************************/
64: xxt_ADT XXT_new(void)
65: {
66: xxt_ADT xxt_handle;
68: /* rolling count on n_xxt ... pot. problem here */
69: n_xxt_handles++;
70: xxt_handle = (xxt_ADT)malloc(sizeof(struct xxt_CDT));
71: xxt_handle->id = ++n_xxt;
72: xxt_handle->info = NULL; xxt_handle->mvi = NULL;
74: return(xxt_handle);
75: }
77: /**************************************xxt.c***********************************/
78: PetscErrorCode XXT_factor(xxt_ADT xxt_handle, /* prev. allocated xxt handle */
79: PetscInt *local2global, /* global column mapping */
80: PetscInt n, /* local num rows */
81: PetscInt m, /* local num cols */
82: PetscErrorCode (*matvec)(void*,PetscScalar*,PetscScalar*), /* b_loc=A_local.x_loc */
83: void *grid_data) /* grid data for matvec */
84: {
85: PCTFS_comm_init();
86: check_handle(xxt_handle);
88: /* only 2^k for now and all nodes participating */
91: /* space for X info */
92: xxt_handle->info = (xxt_info*)malloc(sizeof(xxt_info));
94: /* set up matvec handles */
95: xxt_handle->mvi = set_mvi(local2global, n, m, (PetscErrorCode (*)(mv_info*,PetscScalar*,PetscScalar*))matvec, grid_data);
97: /* matrix is assumed to be of full rank */
98: /* LATER we can reset to indicate rank def. */
99: xxt_handle->ns=0;
101: /* determine separators and generate firing order - NB xxt info set here */
102: det_separators(xxt_handle);
104: return(do_xxt_factor(xxt_handle));
105: }
107: /**************************************xxt.c***********************************/
108: PetscErrorCode XXT_solve(xxt_ADT xxt_handle, PetscScalar *x, PetscScalar *b)
109: {
111: PCTFS_comm_init();
112: check_handle(xxt_handle);
114: /* need to copy b into x? */
115: if (b) PCTFS_rvec_copy(x,b,xxt_handle->mvi->n);
116: return do_xxt_solve(xxt_handle,x);
117: }
119: /**************************************xxt.c***********************************/
120: PetscInt XXT_free(xxt_ADT xxt_handle)
121: {
123: PCTFS_comm_init();
124: check_handle(xxt_handle);
125: n_xxt_handles--;
127: free(xxt_handle->info->nsep);
128: free(xxt_handle->info->lnsep);
129: free(xxt_handle->info->fo);
130: free(xxt_handle->info->stages);
131: free(xxt_handle->info->solve_uu);
132: free(xxt_handle->info->solve_w);
133: free(xxt_handle->info->x);
134: free(xxt_handle->info->col_vals);
135: free(xxt_handle->info->col_sz);
136: free(xxt_handle->info->col_indices);
137: free(xxt_handle->info);
138: free(xxt_handle->mvi->local2global);
139: PCTFS_gs_free(xxt_handle->mvi->PCTFS_gs_handle);
140: free(xxt_handle->mvi);
141: free(xxt_handle);
143: /* if the check fails we nuke */
144: /* if NULL pointer passed to free we nuke */
145: /* if the calls to free fail that's not my problem */
146: return(0);
147: }
149: /**************************************xxt.c***********************************/
150: PetscErrorCode XXT_stats(xxt_ADT xxt_handle)
151: {
152: PetscInt op[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD};
153: PetscInt fop[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD};
154: PetscInt vals[9], work[9];
155: PetscScalar fvals[3], fwork[3];
157: PCTFS_comm_init();
158: check_handle(xxt_handle);
160: /* if factorization not done there are no stats */
161: if (!xxt_handle->info||!xxt_handle->mvi) {
162: if (!PCTFS_my_id) PetscPrintf(PETSC_COMM_WORLD,"XXT_stats() :: no stats available!\n");
163: return 1;
164: }
166: vals[0]=vals[1]=vals[2]=xxt_handle->info->nnz;
167: vals[3]=vals[4]=vals[5]=xxt_handle->mvi->n;
168: vals[6]=vals[7]=vals[8]=xxt_handle->info->msg_buf_sz;
169: PCTFS_giop(vals,work,sizeof(op)/sizeof(op[0])-1,op);
171: fvals[0]=fvals[1]=fvals[2] =xxt_handle->info->tot_solve_time/xxt_handle->info->nsolves++;
172: PCTFS_grop(fvals,fwork,sizeof(fop)/sizeof(fop[0])-1,fop);
174: if (!PCTFS_my_id) {
175: PetscPrintf(PETSC_COMM_WORLD,"%D :: min xxt_nnz=%D\n",PCTFS_my_id,vals[0]);
176: PetscPrintf(PETSC_COMM_WORLD,"%D :: max xxt_nnz=%D\n",PCTFS_my_id,vals[1]);
177: PetscPrintf(PETSC_COMM_WORLD,"%D :: avg xxt_nnz=%g\n",PCTFS_my_id,1.0*vals[2]/PCTFS_num_nodes);
178: PetscPrintf(PETSC_COMM_WORLD,"%D :: tot xxt_nnz=%D\n",PCTFS_my_id,vals[2]);
179: PetscPrintf(PETSC_COMM_WORLD,"%D :: xxt C(2d) =%g\n",PCTFS_my_id,vals[2]/(PetscPowReal(1.0*vals[5],1.5)));
180: PetscPrintf(PETSC_COMM_WORLD,"%D :: xxt C(3d) =%g\n",PCTFS_my_id,vals[2]/(PetscPowReal(1.0*vals[5],1.6667)));
181: PetscPrintf(PETSC_COMM_WORLD,"%D :: min xxt_n =%D\n",PCTFS_my_id,vals[3]);
182: PetscPrintf(PETSC_COMM_WORLD,"%D :: max xxt_n =%D\n",PCTFS_my_id,vals[4]);
183: PetscPrintf(PETSC_COMM_WORLD,"%D :: avg xxt_n =%g\n",PCTFS_my_id,1.0*vals[5]/PCTFS_num_nodes);
184: PetscPrintf(PETSC_COMM_WORLD,"%D :: tot xxt_n =%D\n",PCTFS_my_id,vals[5]);
185: PetscPrintf(PETSC_COMM_WORLD,"%D :: min xxt_buf=%D\n",PCTFS_my_id,vals[6]);
186: PetscPrintf(PETSC_COMM_WORLD,"%D :: max xxt_buf=%D\n",PCTFS_my_id,vals[7]);
187: PetscPrintf(PETSC_COMM_WORLD,"%D :: avg xxt_buf=%g\n",PCTFS_my_id,1.0*vals[8]/PCTFS_num_nodes);
188: PetscPrintf(PETSC_COMM_WORLD,"%D :: min xxt_slv=%g\n",PCTFS_my_id,fvals[0]);
189: PetscPrintf(PETSC_COMM_WORLD,"%D :: max xxt_slv=%g\n",PCTFS_my_id,fvals[1]);
190: PetscPrintf(PETSC_COMM_WORLD,"%D :: avg xxt_slv=%g\n",PCTFS_my_id,fvals[2]/PCTFS_num_nodes);
191: }
193: return(0);
194: }
196: /*************************************xxt.c************************************
198: Description: get A_local, local portion of global coarse matrix which
199: is a row dist. nxm matrix w/ n<m.
200: o my_ml holds address of ML struct associated w/A_local and coarse grid
201: o local2global holds global number of column i (i=0,...,m-1)
202: o local2global holds global number of row i (i=0,...,n-1)
203: o mylocmatvec performs A_local . vec_local (note that gs is performed using
204: PCTFS_gs_init/gop).
206: mylocmatvec = my_ml->Amat[grid_tag].matvec->external;
207: mylocmatvec (void :: void *data, double *in, double *out)
208: **************************************xxt.c***********************************/
209: static PetscErrorCode do_xxt_factor(xxt_ADT xxt_handle)
210: {
211: return xxt_generate(xxt_handle);
212: }
214: /**************************************xxt.c***********************************/
215: static PetscErrorCode xxt_generate(xxt_ADT xxt_handle)
216: {
217: PetscInt i,j,k,idex;
218: PetscInt dim, col;
219: PetscScalar *u, *uu, *v, *z, *w, alpha, alpha_w;
220: PetscInt *segs;
221: PetscInt op[] = {GL_ADD,0};
222: PetscInt off, len;
223: PetscScalar *x_ptr;
224: PetscInt *iptr, flag;
225: PetscInt start =0, end, work;
226: PetscInt op2[] = {GL_MIN,0};
227: PCTFS_gs_ADT PCTFS_gs_handle;
228: PetscInt *nsep, *lnsep, *fo;
229: PetscInt a_n =xxt_handle->mvi->n;
230: PetscInt a_m =xxt_handle->mvi->m;
231: PetscInt *a_local2global=xxt_handle->mvi->local2global;
232: PetscInt level;
233: PetscInt xxt_nnz=0, xxt_max_nnz=0;
234: PetscInt n, m;
235: PetscInt *col_sz, *col_indices, *stages;
236: PetscScalar **col_vals, *x;
237: PetscInt n_global;
238: PetscInt xxt_zero_nnz =0;
239: PetscInt xxt_zero_nnz_0=0;
240: PetscBLASInt i1 = 1,dlen;
241: PetscScalar dm1 = -1.0;
243: n = xxt_handle->mvi->n;
244: nsep = xxt_handle->info->nsep;
245: lnsep = xxt_handle->info->lnsep;
246: fo = xxt_handle->info->fo;
247: end = lnsep[0];
248: level = xxt_handle->level;
249: PCTFS_gs_handle = xxt_handle->mvi->PCTFS_gs_handle;
251: /* is there a null space? */
252: /* LATER add in ability to detect null space by checking alpha */
253: for (i=0, j=0; i<=level; i++) j+=nsep[i];
255: m = j-xxt_handle->ns;
256: if (m!=j) {
257: PetscPrintf(PETSC_COMM_WORLD,"xxt_generate() :: null space exists %D %D %D\n",m,j,xxt_handle->ns);
258: }
260: /* get and initialize storage for x local */
261: /* note that x local is nxm and stored by columns */
262: col_sz = (PetscInt*) malloc(m*sizeof(PetscInt));
263: col_indices = (PetscInt*) malloc((2*m+1)*sizeof(PetscInt));
264: col_vals = (PetscScalar**) malloc(m*sizeof(PetscScalar*));
265: for (i=j=0; i<m; i++, j+=2) {
266: col_indices[j]=col_indices[j+1]=col_sz[i]=-1;
267: col_vals[i] = NULL;
268: }
269: col_indices[j]=-1;
271: /* size of separators for each sub-hc working from bottom of tree to top */
272: /* this looks like nsep[]=segments */
273: stages = (PetscInt*) malloc((level+1)*sizeof(PetscInt));
274: segs = (PetscInt*) malloc((level+1)*sizeof(PetscInt));
275: PCTFS_ivec_zero(stages,level+1);
276: PCTFS_ivec_copy(segs,nsep,level+1);
277: for (i=0; i<level; i++) segs[i+1] += segs[i];
278: stages[0] = segs[0];
280: /* temporary vectors */
281: u = (PetscScalar*) malloc(n*sizeof(PetscScalar));
282: z = (PetscScalar*) malloc(n*sizeof(PetscScalar));
283: v = (PetscScalar*) malloc(a_m*sizeof(PetscScalar));
284: uu = (PetscScalar*) malloc(m*sizeof(PetscScalar));
285: w = (PetscScalar*) malloc(m*sizeof(PetscScalar));
287: /* extra nnz due to replication of vertices across separators */
288: for (i=1, j=0; i<=level; i++) j+=nsep[i];
290: /* storage for sparse x values */
291: n_global = xxt_handle->info->n_global;
292: xxt_max_nnz = (PetscInt)(2.5*PetscPowReal(1.0*n_global,1.6667) + j*n/2)/PCTFS_num_nodes;
293: x = (PetscScalar*) malloc(xxt_max_nnz*sizeof(PetscScalar));
294: xxt_nnz = 0;
296: /* LATER - can embed next sep to fire in gs */
297: /* time to make the donuts - generate X factor */
298: for (dim=i=j=0; i<m; i++) {
299: /* time to move to the next level? */
300: while (i==segs[dim]) {
302: stages[dim++]=i;
303: end +=lnsep[dim];
304: }
305: stages[dim]=i;
307: /* which column are we firing? */
308: /* i.e. set v_l */
309: /* use new seps and do global min across hc to determine which one to fire */
310: (start<end) ? (col=fo[start]) : (col=INT_MAX);
311: PCTFS_giop_hc(&col,&work,1,op2,dim);
313: /* shouldn't need this */
314: if (col==INT_MAX) {
315: PetscInfo(0,"hey ... col==INT_MAX??\n");
316: continue;
317: }
319: /* do I own it? I should */
320: PCTFS_rvec_zero(v,a_m);
321: if (col==fo[start]) {
322: start++;
323: idex=PCTFS_ivec_linear_search(col, a_local2global, a_n);
324: if (idex!=-1) {
325: v[idex] = 1.0; j++;
326: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"NOT FOUND!");
327: } else {
328: idex=PCTFS_ivec_linear_search(col, a_local2global, a_m);
329: if (idex!=-1) v[idex] = 1.0;
330: }
332: /* perform u = A.v_l */
333: PCTFS_rvec_zero(u,n);
334: do_matvec(xxt_handle->mvi,v,u);
336: /* uu = X^T.u_l (local portion) */
337: /* technically only need to zero out first i entries */
338: /* later turn this into an XXT_solve call ? */
339: PCTFS_rvec_zero(uu,m);
340: x_ptr=x;
341: iptr = col_indices;
342: for (k=0; k<i; k++) {
343: off = *iptr++;
344: len = *iptr++;
345: PetscBLASIntCast(len,&dlen);
346: PetscStackCallBLAS("BLASdot",uu[k] = BLASdot_(&dlen,u+off,&i1,x_ptr,&i1));
347: x_ptr+=len;
348: }
350: /* uu = X^T.u_l (comm portion) */
351: PCTFS_ssgl_radd (uu, w, dim, stages);
353: /* z = X.uu */
354: PCTFS_rvec_zero(z,n);
355: x_ptr=x;
356: iptr = col_indices;
357: for (k=0; k<i; k++) {
358: off = *iptr++;
359: len = *iptr++;
360: PetscBLASIntCast(len,&dlen);
361: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dlen,&uu[k],x_ptr,&i1,z+off,&i1));
362: x_ptr+=len;
363: }
365: /* compute v_l = v_l - z */
366: PCTFS_rvec_zero(v+a_n,a_m-a_n);
367: PetscBLASIntCast(n,&dlen);
368: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dlen,&dm1,z,&i1,v,&i1));
370: /* compute u_l = A.v_l */
371: if (a_n!=a_m) PCTFS_gs_gop_hc(PCTFS_gs_handle,v,"+\0",dim);
372: PCTFS_rvec_zero(u,n);
373: do_matvec(xxt_handle->mvi,v,u);
375: /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - local portion */
376: PetscBLASIntCast(n,&dlen);
377: PetscStackCallBLAS("BLASdot",alpha = BLASdot_(&dlen,u,&i1,v,&i1));
378: /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - comm portion */
379: PCTFS_grop_hc(&alpha, &alpha_w, 1, op, dim);
381: alpha = (PetscScalar) PetscSqrtReal((PetscReal)alpha);
383: /* check for small alpha */
384: /* LATER use this to detect and determine null space */
387: /* compute v_l = v_l/sqrt(alpha) */
388: PCTFS_rvec_scale(v,1.0/alpha,n);
390: /* add newly generated column, v_l, to X */
391: flag = 1;
392: off=len=0;
393: for (k=0; k<n; k++) {
394: if (v[k]!=0.0) {
395: len=k;
396: if (flag) { off=k; flag=0; }
397: }
398: }
400: len -= (off-1);
402: if (len>0) {
403: if ((xxt_nnz+len)>xxt_max_nnz) {
404: PetscInfo(0,"increasing space for X by 2x!\n");
405: xxt_max_nnz *= 2;
406: x_ptr = (PetscScalar*) malloc(xxt_max_nnz*sizeof(PetscScalar));
407: PCTFS_rvec_copy(x_ptr,x,xxt_nnz);
408: free(x);
409: x = x_ptr;
410: x_ptr+=xxt_nnz;
411: }
412: xxt_nnz += len;
413: PCTFS_rvec_copy(x_ptr,v+off,len);
415: /* keep track of number of zeros */
416: if (dim) {
417: for (k=0; k<len; k++) {
418: if (x_ptr[k]==0.0) xxt_zero_nnz++;
419: }
420: } else {
421: for (k=0; k<len; k++) {
422: if (x_ptr[k]==0.0) xxt_zero_nnz_0++;
423: }
424: }
425: col_indices[2*i] = off;
426: col_sz[i] = col_indices[2*i+1] = len;
427: col_vals[i] = x_ptr;
428: }
429: else {
430: col_indices[2*i] = 0;
431: col_sz[i] = col_indices[2*i+1] = 0;
432: col_vals[i] = x_ptr;
433: }
434: }
436: /* close off stages for execution phase */
437: while (dim!=level) {
438: stages[dim++] = i;
439: PetscInfo(0,"disconnected!!! dim(%D)!=level(%D)\n",dim,level);
440: }
441: stages[dim]=i;
443: xxt_handle->info->n = xxt_handle->mvi->n;
444: xxt_handle->info->m = m;
445: xxt_handle->info->nnz = xxt_nnz;
446: xxt_handle->info->max_nnz = xxt_max_nnz;
447: xxt_handle->info->msg_buf_sz = stages[level]-stages[0];
448: xxt_handle->info->solve_uu = (PetscScalar*) malloc(m*sizeof(PetscScalar));
449: xxt_handle->info->solve_w = (PetscScalar*) malloc(m*sizeof(PetscScalar));
450: xxt_handle->info->x = x;
451: xxt_handle->info->col_vals = col_vals;
452: xxt_handle->info->col_sz = col_sz;
453: xxt_handle->info->col_indices = col_indices;
454: xxt_handle->info->stages = stages;
455: xxt_handle->info->nsolves = 0;
456: xxt_handle->info->tot_solve_time = 0.0;
458: free(segs);
459: free(u);
460: free(v);
461: free(uu);
462: free(z);
463: free(w);
465: return(0);
466: }
468: /**************************************xxt.c***********************************/
469: static PetscErrorCode do_xxt_solve(xxt_ADT xxt_handle, PetscScalar *uc)
470: {
471: PetscInt off, len, *iptr;
472: PetscInt level = xxt_handle->level;
473: PetscInt n = xxt_handle->info->n;
474: PetscInt m = xxt_handle->info->m;
475: PetscInt *stages = xxt_handle->info->stages;
476: PetscInt *col_indices = xxt_handle->info->col_indices;
477: PetscScalar *x_ptr, *uu_ptr;
478: PetscScalar *solve_uu = xxt_handle->info->solve_uu;
479: PetscScalar *solve_w = xxt_handle->info->solve_w;
480: PetscScalar *x = xxt_handle->info->x;
481: PetscBLASInt i1 = 1,dlen;
483: uu_ptr=solve_uu;
484: PCTFS_rvec_zero(uu_ptr,m);
486: /* x = X.Y^T.b */
487: /* uu = Y^T.b */
488: for (x_ptr=x,iptr=col_indices; *iptr!=-1; x_ptr+=len) {
489: off =*iptr++;
490: len =*iptr++;
491: PetscBLASIntCast(len,&dlen);
492: PetscStackCallBLAS("BLASdot",*uu_ptr++ = BLASdot_(&dlen,uc+off,&i1,x_ptr,&i1));
493: }
495: /* comunication of beta */
496: uu_ptr=solve_uu;
497: if (level) PCTFS_ssgl_radd(uu_ptr, solve_w, level, stages);
499: PCTFS_rvec_zero(uc,n);
501: /* x = X.uu */
502: for (x_ptr=x,iptr=col_indices; *iptr!=-1; x_ptr+=len) {
503: off =*iptr++;
504: len =*iptr++;
505: PetscBLASIntCast(len,&dlen);
506: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dlen,uu_ptr++,x_ptr,&i1,uc+off,&i1));
507: }
508: return 0;
509: }
511: /**************************************xxt.c***********************************/
512: static PetscErrorCode check_handle(xxt_ADT xxt_handle)
513: {
514: PetscInt vals[2], work[2], op[] = {NON_UNIFORM,GL_MIN,GL_MAX};
518: vals[0]=vals[1]=xxt_handle->id;
519: PCTFS_giop(vals,work,sizeof(op)/sizeof(op[0])-1,op);
521: return 0;
522: }
524: /**************************************xxt.c***********************************/
525: static PetscErrorCode det_separators(xxt_ADT xxt_handle)
526: {
527: PetscInt i, ct, id;
528: PetscInt mask, edge, *iptr;
529: PetscInt *dir, *used;
530: PetscInt sum[4], w[4];
531: PetscScalar rsum[4], rw[4];
532: PetscInt op[] = {GL_ADD,0};
533: PetscScalar *lhs, *rhs;
534: PetscInt *nsep, *lnsep, *fo, nfo=0;
535: PCTFS_gs_ADT PCTFS_gs_handle = xxt_handle->mvi->PCTFS_gs_handle;
536: PetscInt *local2global = xxt_handle->mvi->local2global;
537: PetscInt n = xxt_handle->mvi->n;
538: PetscInt m = xxt_handle->mvi->m;
539: PetscInt level = xxt_handle->level;
540: PetscInt shared = 0;
542: dir = (PetscInt*)malloc(sizeof(PetscInt)*(level+1));
543: nsep = (PetscInt*)malloc(sizeof(PetscInt)*(level+1));
544: lnsep= (PetscInt*)malloc(sizeof(PetscInt)*(level+1));
545: fo = (PetscInt*)malloc(sizeof(PetscInt)*(n+1));
546: used = (PetscInt*)malloc(sizeof(PetscInt)*n);
548: PCTFS_ivec_zero(dir,level+1);
549: PCTFS_ivec_zero(nsep,level+1);
550: PCTFS_ivec_zero(lnsep,level+1);
551: PCTFS_ivec_set (fo,-1,n+1);
552: PCTFS_ivec_zero(used,n);
554: lhs = (PetscScalar*)malloc(sizeof(PetscScalar)*m);
555: rhs = (PetscScalar*)malloc(sizeof(PetscScalar)*m);
557: /* determine the # of unique dof */
558: PCTFS_rvec_zero(lhs,m);
559: PCTFS_rvec_set(lhs,1.0,n);
560: PCTFS_gs_gop_hc(PCTFS_gs_handle,lhs,"+\0",level);
561: PCTFS_rvec_zero(rsum,2);
562: for (i=0; i<n; i++) {
563: if (lhs[i]!=0.0) {
564: rsum[0]+=1.0/lhs[i]; rsum[1]+=lhs[i];
565: }
566: }
567: PCTFS_grop_hc(rsum,rw,2,op,level);
568: rsum[0]+=0.1;
569: rsum[1]+=0.1;
571: if (PetscAbsScalar(rsum[0]-rsum[1])>EPS) shared=1;
573: xxt_handle->info->n_global=xxt_handle->info->m_global=(PetscInt) rsum[0];
574: xxt_handle->mvi->n_global =xxt_handle->mvi->m_global =(PetscInt) rsum[0];
576: /* determine separator sets top down */
577: if (shared) {
578: for (iptr=fo+n,id=PCTFS_my_id,mask=PCTFS_num_nodes>>1,edge=level;edge>0;edge--,mask>>=1) {
580: /* set rsh of hc, fire, and collect lhs responses */
581: (id<mask) ? PCTFS_rvec_zero(lhs,m) : PCTFS_rvec_set(lhs,1.0,m);
582: PCTFS_gs_gop_hc(PCTFS_gs_handle,lhs,"+\0",edge);
584: /* set lsh of hc, fire, and collect rhs responses */
585: (id<mask) ? PCTFS_rvec_set(rhs,1.0,m) : PCTFS_rvec_zero(rhs,m);
586: PCTFS_gs_gop_hc(PCTFS_gs_handle,rhs,"+\0",edge);
588: for (i=0;i<n;i++) {
589: if (id< mask) {
590: if (lhs[i]!=0.0) lhs[i]=1.0;
591: }
592: if (id>=mask) {
593: if (rhs[i]!=0.0) rhs[i]=1.0;
594: }
595: }
597: if (id< mask) PCTFS_gs_gop_hc(PCTFS_gs_handle,lhs,"+\0",edge-1);
598: else PCTFS_gs_gop_hc(PCTFS_gs_handle,rhs,"+\0",edge-1);
600: /* count number of dofs I own that have signal and not in sep set */
601: PCTFS_rvec_zero(rsum,4);
602: for (PCTFS_ivec_zero(sum,4),ct=i=0;i<n;i++) {
603: if (!used[i]) {
604: /* number of unmarked dofs on node */
605: ct++;
606: /* number of dofs to be marked on lhs hc */
607: if (id< mask) {
608: if (lhs[i]!=0.0) { sum[0]++; rsum[0]+=1.0/lhs[i]; }
609: }
610: /* number of dofs to be marked on rhs hc */
611: if (id>=mask) {
612: if (rhs[i]!=0.0) { sum[1]++; rsum[1]+=1.0/rhs[i]; }
613: }
614: }
615: }
617: /* go for load balance - choose half with most unmarked dofs, bias LHS */
618: (id<mask) ? (sum[2]=ct) : (sum[3]=ct);
619: (id<mask) ? (rsum[2]=ct) : (rsum[3]=ct);
620: PCTFS_giop_hc(sum,w,4,op,edge);
621: PCTFS_grop_hc(rsum,rw,4,op,edge);
622: rsum[0]+=0.1; rsum[1]+=0.1; rsum[2]+=0.1; rsum[3]+=0.1;
624: if (id<mask) {
625: /* mark dofs I own that have signal and not in sep set */
626: for (ct=i=0;i<n;i++) {
627: if ((!used[i])&&(lhs[i]!=0.0)) {
628: ct++; nfo++;
632: *--iptr = local2global[i];
633: used[i] = edge;
634: }
635: }
636: if (ct>1) PCTFS_ivec_sort(iptr,ct);
638: lnsep[edge]=ct;
639: nsep[edge]=(PetscInt) rsum[0];
640: dir [edge]=LEFT;
641: }
643: if (id>=mask) {
644: /* mark dofs I own that have signal and not in sep set */
645: for (ct=i=0;i<n;i++) {
646: if ((!used[i])&&(rhs[i]!=0.0)) {
647: ct++; nfo++;
651: *--iptr = local2global[i];
652: used[i] = edge;
653: }
654: }
655: if (ct>1) PCTFS_ivec_sort(iptr,ct);
657: lnsep[edge] = ct;
658: nsep[edge] = (PetscInt) rsum[1];
659: dir [edge] = RIGHT;
660: }
662: /* LATER or we can recur on these to order seps at this level */
663: /* do we need full set of separators for this? */
665: /* fold rhs hc into lower */
666: if (id>=mask) id-=mask;
667: }
668: } else {
669: for (iptr=fo+n,id=PCTFS_my_id,mask=PCTFS_num_nodes>>1,edge=level;edge>0;edge--,mask>>=1) {
670: /* set rsh of hc, fire, and collect lhs responses */
671: (id<mask) ? PCTFS_rvec_zero(lhs,m) : PCTFS_rvec_set(lhs,1.0,m);
672: PCTFS_gs_gop_hc(PCTFS_gs_handle,lhs,"+\0",edge);
674: /* set lsh of hc, fire, and collect rhs responses */
675: (id<mask) ? PCTFS_rvec_set(rhs,1.0,m) : PCTFS_rvec_zero(rhs,m);
676: PCTFS_gs_gop_hc(PCTFS_gs_handle,rhs,"+\0",edge);
678: /* count number of dofs I own that have signal and not in sep set */
679: for (PCTFS_ivec_zero(sum,4),ct=i=0;i<n;i++) {
680: if (!used[i]) {
681: /* number of unmarked dofs on node */
682: ct++;
683: /* number of dofs to be marked on lhs hc */
684: if ((id< mask)&&(lhs[i]!=0.0)) sum[0]++;
685: /* number of dofs to be marked on rhs hc */
686: if ((id>=mask)&&(rhs[i]!=0.0)) sum[1]++;
687: }
688: }
690: /* go for load balance - choose half with most unmarked dofs, bias LHS */
691: (id<mask) ? (sum[2]=ct) : (sum[3]=ct);
692: PCTFS_giop_hc(sum,w,4,op,edge);
694: /* lhs hc wins */
695: if (sum[2]>=sum[3]) {
696: if (id<mask) {
697: /* mark dofs I own that have signal and not in sep set */
698: for (ct=i=0;i<n;i++) {
699: if ((!used[i])&&(lhs[i]!=0.0)) {
700: ct++; nfo++;
701: *--iptr = local2global[i];
702: used[i]=edge;
703: }
704: }
705: if (ct>1) PCTFS_ivec_sort(iptr,ct);
706: lnsep[edge]=ct;
707: }
708: nsep[edge]=sum[0];
709: dir [edge]=LEFT;
710: } else { /* rhs hc wins */
711: if (id>=mask) {
712: /* mark dofs I own that have signal and not in sep set */
713: for (ct=i=0;i<n;i++) {
714: if ((!used[i])&&(rhs[i]!=0.0)) {
715: ct++; nfo++;
716: *--iptr = local2global[i];
717: used[i]=edge;
718: }
719: }
720: if (ct>1) PCTFS_ivec_sort(iptr,ct);
721: lnsep[edge]=ct;
722: }
723: nsep[edge]=sum[1];
724: dir [edge]=RIGHT;
725: }
726: /* LATER or we can recur on these to order seps at this level */
727: /* do we need full set of separators for this? */
729: /* fold rhs hc into lower */
730: if (id>=mask) id-=mask;
731: }
732: }
734: /* level 0 is on processor case - so mark the remainder */
735: for (ct=i=0; i<n; i++) {
736: if (!used[i]) {
737: ct++; nfo++;
738: *--iptr = local2global[i];
739: used[i] = edge;
740: }
741: }
742: if (ct>1) PCTFS_ivec_sort(iptr,ct);
743: lnsep[edge]=ct;
744: nsep [edge]=ct;
745: dir [edge]=LEFT;
747: xxt_handle->info->nsep = nsep;
748: xxt_handle->info->lnsep = lnsep;
749: xxt_handle->info->fo = fo;
750: xxt_handle->info->nfo = nfo;
752: free(dir);
753: free(lhs);
754: free(rhs);
755: free(used);
756: return 0;
757: }
759: /**************************************xxt.c***********************************/
760: static mv_info *set_mvi(PetscInt *local2global,PetscInt n,PetscInt m,PetscErrorCode (*matvec)(mv_info*,PetscScalar*,PetscScalar*),void *grid_data)
761: {
762: mv_info *mvi;
764: mvi = (mv_info*)malloc(sizeof(mv_info));
765: mvi->n = n;
766: mvi->m = m;
767: mvi->n_global = -1;
768: mvi->m_global = -1;
769: mvi->local2global = (PetscInt*)malloc((m+1)*sizeof(PetscInt));
770: PCTFS_ivec_copy(mvi->local2global,local2global,m);
771: mvi->local2global[m] = INT_MAX;
772: mvi->matvec = matvec;
773: mvi->grid_data = grid_data;
775: /* set xxt communication handle to perform restricted matvec */
776: mvi->PCTFS_gs_handle = PCTFS_gs_init(local2global, m, PCTFS_num_nodes);
778: return(mvi);
779: }
781: /**************************************xxt.c***********************************/
782: static PetscErrorCode do_matvec(mv_info *A, PetscScalar *v, PetscScalar *u)
783: {
784: A->matvec((mv_info*)A->grid_data,v,u);
785: return 0;
786: }