atGetLocalSize(A

ai,&aj); MatGetLocalSize(B,&bi,&bj); printf("MtM mpi %dx%d times %dx%d\n",ai,aj,bi,bj);*/ } { Mat *sub,tmp1,tmp2; IS A_total,B_total,IS_A[2],IS_B[2];

/* index sets in A and B */ ierr = ISCreateStride(comm,A_locali,0,1,&A_total); CHKERRQ(ierr); ierr = ISCreateStride(comm,B_global,0,1,&B_total); CHKERRQ(ierr); IS_A[0] = A_total; IS_B[0] = B_total;

/* get the B block corresponding to our part of A */ ierr = MatGetSubMatrices(B,1,IS_A,IS_B,MAT_INITIAL_MATRIX,&sub); CHKERRQ(ierr); ierr = ISDestroy(A_total); CHKERRQ(ierr); ierr = ISDestroy(B_total); CHKERRQ(ierr);

/* multiply diag and off-diag block of A with relevant parts of B */ ierr = MatTMatMult_AIJ(Aij->A,sub[0],&tmp1); CHKERRQ(ierr); /*{int i,j; MatGetLocalSize(tmp1,&i,&j); printf("tmp1 is %d,%d; about to get %d\n",i,j,A_locali);}*/ ierr = MatTMatMult_AIJ(Aij->B,sub[0],&tmp2); CHKERRQ(ierr); /*{ int i,j; MatGetSize(tmp1,&i,&j); if (i!=A_global) SETERRQ(1,0,"tmp1 weirdness\n"); }*/ if (lsize==PETSC_DECIDE) { ierr = MatCreateMPIAIJ (comm,PETSC_DECIDE,PETSC_DECIDE,A_global,B_global,5,0,5,0,&res); CHKERRQ(ierr); } else { if (A_global!=B_global) SETERRQ(1,1,"Only specified lsize for square matrices"); if (lsize!=A_localj) { printf("Ouch! <%d,%d>\n",lsize,A_localj); ierr = MatCreateMPIAIJ (comm,lsize,lsize,PETSC_DECIDE,PETSC_DECIDE,5,0,5,0,&res); CHKERRQ(ierr); } else { Vec bandv; int iRow; ierr = VecCreateMPI(comm,lsize,PETSC_DECIDE,&bandv); CHKERRQ(ierr); for (iRow=0; iRow<A_global; iRow++) { int Row=iRow,ncols; Scalar colsv; ierr = MatGetRow(tmp1,iRow,&ncols,PETSC_NULL,PETSC_NULL); CHKERRQ(ierr); colsv = (Scalar) ncols; ierr = VecSetValues(bandv,1,&Row,&colsv,ADD_VALUES); CHKERRQ(ierr); ierr = MatRestoreRow(tmp1,iRow,&ncols,PETSC_NULL,PETSC_NULL); CHKERRQ(ierr); } for (iRow=0; iRow<A_off; iRow++) { int Row=iRow+A_localj, ncols; Scalar colsv; ierr = MatGetRow(tmp2,iRow,&ncols,PETSC_NULL,PETSC_NULL); CHKERRQ(ierr); colsv = (Scalar) ncols; ierr = VecSetValues(bandv,1,&Row,&colsv,ADD_VALUES); CHKERRQ(ierr); ierr = MatRestoreRow(tmp2,iRow,&ncols,PETSC_NULL,PETSC_NULL); CHKERRQ(ierr); } ierr = VecAssemblyBegin(bandv); CHKERRQ(ierr); ierr = VecAssemblyEnd(bandv); CHKERRQ(ierr); /*VecView(bandv,0);*/ { Scalar *band; int *bandw,vsize,i; ierr = VecGetLocalSize(bandv,&vsize); CHKERRQ(ierr); bandw = (int*) PetscMalloc((vsize+1)*sizeof(int)); CHKPTRQ(bandw); ierr = VecGetArray(bandv,&band); CHKERRQ(ierr); for (i=0; i<vsize; i++) { bandw[i] = (int)band[i];} ierr = VecRestoreArray(bandv,&band); CHKERRQ(ierr); ierr = VecDestroy(bandv); CHKERRQ(ierr); ierr = MatCreateMPIAIJ (comm,lsize,lsize,PETSC_DECIDE,PETSC_DECIDE, 0,bandw,0,bandw,/*5,0,5,0,*/ &res); CHKERRQ(ierr); PetscFree(bandw); } } }

{ int iRow; for (iRow=0; iRow<A_global/*A_localj*/; iRow++) { int Row=iRow, ncols,*cols; Scalar *vals; ierr = MatGetRow(tmp1,iRow,&ncols,&cols,&vals); CHKERRQ(ierr); ierr = MatSetValues(res,1,&Row,ncols,cols,vals,ADD_VALUES); CHKERRQ(ierr); ierr = MatRestoreRow(tmp1,iRow,&ncols,&cols,&vals); CHKERRQ(ierr); } for (iRow=0; iRow<A_off; iRow++) { int Row=iRow+A_localj, ncols,*cols; Scalar *vals; ierr = MatGetRow(tmp2,iRow,&ncols,&cols,&vals); CHKERRQ(ierr); ierr = MatSetValues(res,1,&Row,ncols,cols,vals,ADD_VALUES); CHKERRQ(ierr); ierr = MatRestoreRow(tmp2,iRow,&ncols,&cols,&vals); CHKERRQ(ierr); } } ierr = MatAssemblyBegin(res,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatDestroy(sub[0]); CHKERRQ(ierr); PetscFree(sub); ierr = MatDestroy(tmp1); CHKERRQ(ierr); ierr = MatDestroy(tmp2); CHKERRQ(ierr); ierr = MatAssemblyEnd(res,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); }

*C = res;

PetscFunctionReturn(0); }

#undef __FUNC__ #define __FUNC__ "MatrixAij2MpiAbut" int MatrixAij2MpiAbut(MPI_Comm comm,int part,Mat in,int jconform,Mat *out) { Mat res; int *band,m,n,irow,rstart,rend,ierr;

PetscFunctionBegin; /* get the bandwidth of the matrix to be embedded, specifying this as d_nnz/o_nnz is an overestimate */ ierr = MatGetLocalSize(in,&m,&n); CHKERRQ(ierr); band = (int *) PetscMalloc((m+1)*sizeof(int)); CHKPTRQ(band); for (irow=0; irow<m; irow++) { ierr = MatGetRow(in,irow,band+irow,PETSC_NULL,PETSC_NULL); CHKERRQ(ierr); ierr = MatRestoreRow(in,irow,band+irow,PETSC_NULL,PETSC_NULL); CHKERRQ(ierr); }

{ int isize,jsize; MPI_Allreduce(&m,&isize,1,MPI_INT,MPI_SUM,comm); MPI_Allreduce(&part,&jsize,1,MPI_INT,MPI_SUM,comm); if (jconform /* isize==jsize */) { ierr = MatCreateMPIAIJ(comm,m,m,PETSC_DECIDE,PETSC_DECIDE, PETSC_NULL,band,PETSC_NULL,band,&res); CHKERRQ(ierr); } else { ierr = MatCreateMPIAIJ(comm,m,part,PETSC_DECIDE,PETSC_DECIDE, PETSC_NULL,band,PETSC_NULL,band,&res); CHKERRQ(ierr); } } PetscFree(band);

ierr = MatGetOwnershipRange(res,&rstart,&rend); CHKERRQ(ierr); for (irow=0; irow<m; irow++) { int row=rstart+irow,ncols,*cols; Scalar *vals; ierr = MatGetRow(in,irow,&ncols,&cols,&vals); CHKERRQ(ierr); ierr = MatSetValues(res,1,&row,ncols,cols,vals,INSERT_VALUES); CHKERRQ(ierr); ierr = MatRestoreRow(in,irow,&ncols,&cols,&vals); CHKERRQ(ierr); } ierr = MatAssemblyBegin(res,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(res,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);

*out = res; PetscFunctionReturn(0); }

#undef __FUNC__ #define __FUNC__ "MatrixAij2MpiAdd" int MatrixAij2MpiAdd(int m,int n,int M,int N,Mat in,MPI_Comm comm,Mat *out) { Mat res; int row,mat_m,mat_n,ierr;

PetscFunctionBegin; ierr = MatCreateMPIAIJ(comm,m,n,M,N,5,0,3,0,&res); CHKERRQ(ierr); ierr = MatGetLocalSize(in,&mat_m,&mat_n); CHKERRQ(ierr); { int c_m,c_n; ierr = MatGetSize(res,&c_m,&c_n); CHKERRQ(ierr); if ( (c_m!=mat_m) || (c_n!=mat_n) ) { printf("Mismatch res mat=%d,%d; in mat=%d,%d\n",c_m,c_n,mat_m,mat_n); SETERRQ(1,0,"Parameter mismatch"); } } for (row=0; row<mat_m; row++) { int ncols,*cols; Scalar *vals; ierr = MatGetRow(in,row,&ncols,&cols,&vals); CHKERRQ(ierr); ierr = MatSetValues(res,1,&row,ncols,cols,vals,ADD_VALUES); CHKERRQ(ierr); ierr = MatRestoreRow(in,row,&ncols,&cols,&vals); CHKERRQ(ierr); } ierr = MatAssemblyBegin(res,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(res,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);

*out = res; PetscFunctionReturn(0); }

#undef __FUNC__ #define __FUNC__ "MatrixAij2MpiIndex" int MatrixAij2MpiIndex(MPI_Comm comm,IS iptr,IS jptr,int m,int n,int M,int N, Mat in,InsertMode addv,Mat *out) { Mat res; int n_in,m_in,irow,ierr, nidx,njdx,*idx=0,*jdx=0;

PetscFunctionBegin; /*printf(">> Using unoptimised routine Aij2MpiIndex <<\n");*/ ierr = MatGetLocalSize(in,&m_in,&n_in); CHKERRQ(ierr);

if (iptr!=PETSC_NULL) { ierr = ISGetSize(iptr,&nidx); CHKERRQ(ierr); if (m_in!=nidx) { printf("In matrix size: %dx%d; I index set size=%d\n",m_in,n_in,nidx); SETERRQ(1,0,"MatrixAij2MpiIndex: size mismatch mat/IS_i"); } } if (jptr!=PETSC_NULL) { ierr = ISGetSize(jptr,&njdx); CHKERRQ(ierr); if (n_in!=njdx) { printf("In matrix size: %dx%d; J index set size=%d\n",m_in,n_in,njdx); SETERRQ(1,0,"MatrixAij2MpiIndex: size mismatch mat/IS_j"); } }

ierr = MatCreateMPIAIJ(comm,m,n,M,N,5,PETSC_NULL,3,PETSC_NULL,&res); CHKERRQ(ierr);

if (iptr) {ierr = ISGetIndices(iptr,&idx); CHKERRQ(ierr);} if (jptr) {ierr = ISGetIndices(jptr,&jdx); CHKERRQ(ierr);}

for (irow=0; irow<m_in; irow++) { int row, ncols,*cols,icol; Scalar *vals; if (iptr) row=idx[irow]; else row=irow; ierr = MatGetRow(in,irow,&ncols,&cols,&vals); CHKERRQ(ierr); if (jptr) { for (icol=0; icol<ncols; icol++) { int col=jdx[icol]; ierr = MatSetValues(res,1,&row,1,&col,vals+icol,addv); CHKERRQ(ierr); } } else { ierr = MatSetValues(res,1,&row,ncols,cols,vals,addv); CHKERRQ(ierr); } ierr = MatRestoreRow(in,irow,&ncols,&cols,&vals); CHKERRQ(ierr); } ierr = MatAssemblyBegin(res,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); if (iptr) {ierr = ISRestoreIndices(iptr,&idx); CHKERRQ(ierr);} if (jptr) {ierr = ISRestoreIndices(jptr,&jdx); CHKERRQ(ierr);} ierr = MatAssemblyEnd(res,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);

*out = res; PetscFunctionReturn(0); }

#undef __FUNC__ #define __FUNC__ "MatSolveMat_MPIAIJ" int MatSolveMat_MPIAIJ(SLES solve,Mat g12,Vec v2,Vec w2,Mat *c12) { Scalar *a,zero = 0.; int isize,jsize; int *idx,col,idum,ierr;

PetscFunctionBegin; ierr = MatGetLocalSize(g12,&isize,&jsize); CHKERRQ(ierr); { int istart,iend; ierr = MatGetOwnershipRange(g12,&istart,&iend); CHKERRQ(ierr); idx = (int *) PetscMalloc((isize+1)*sizeof(int)); CHKPTRQ(idx); for (idum=0; idum<isize; idum++) idx[idum]=istart+idum; } { MPI_Comm comm; ierr = PetscObjectGetComm((PetscObject)g12,&comm); CHKERRQ(ierr); ierr = MatCreateMPIAIJ(comm,isize,jsize,PETSC_DECIDE,PETSC_DECIDE, 0,0,0,0,c12); CHKERRQ(ierr); } ierr = MatGetSize(g12,&idum,&jsize); CHKERRQ(ierr); for (col=0; col<jsize; col++) { int row; ierr = MatGetColumnVector(g12,v2,col); CHKERRQ(ierr); ierr = SLESSolve(solve,v2,w2,&idum); CHKERRQ(ierr); ierr = VecGetArray(w2,&a); CHKERRQ(ierr); for (row=0; row<isize; row++) { if (a[row]!=zero) { ierr = MatSetValues(*c12,1,idx+row,1,&col,a+row,INSERT_VALUES); CHKERRQ(ierr); } } ierr = VecRestoreArray(w2,&a); CHKERRQ(ierr); } ierr = MatAssemblyBegin(*c12,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(*c12,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); PetscFree(idx);

PetscFunctionReturn(0); }

/**************************************************************** * Matrix Scatter Pipelines * ****************************************************************/ #undef __FUNC__ #define __FUNC__ "MatCreateScatterPipeline" int MatCreateScatterPipeline (Mat base_mat,PipelineType pipe_type,PetscObject pipe_obj, Vec *border_vec,VecPipeline *main_pipe) { Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) base_mat->data; MPI_Comm comm = base_mat->comm; VecPipeline pipe; Vec tmp_g,bvec; IS is_g,is_l; int local_size,low_border,local_border,local_domain; int dum,ierr;

PetscFunctionBegin; ierr = MatGetOwnershipRange(base_mat,&local_domain,&dum); CHKERRQ(ierr); local_size = dum-local_domain; ierr = VecCreateMPI(comm,local_size,PETSC_DECIDE,&tmp_g); CHKERRQ(ierr); ierr = VecAssemblyBegin(tmp_g); CHKERRA(ierr); ierr = VecAssemblyEnd(tmp_g); CHKERRA(ierr); local_border= ((Mat_SeqAIJ *)(Aij->B)->data)->n; ierr = ISCreateGeneral(MPI_COMM_SELF,local_border,Aij->garray,&is_g); CHKERRQ(ierr);

ierr = VecCreateMPI (comm,local_border,PETSC_DECIDE,&bvec); CHKERRQ(ierr); ierr = VecAssemblyBegin(bvec); CHKERRA(ierr); ierr = VecAssemblyEnd(bvec); CHKERRA(ierr); ierr = VecGetOwnershipRange(bvec,&low_border,&dum); CHKERRQ(ierr); ierr = ISCreateStride (MPI_COMM_SELF,local_border,low_border,1,&is_l); CHKERRQ(ierr);

ierr = VecPipelineCreate(comm,tmp_g,is_g,bvec,is_l,&pipe); CHKERRQ(ierr); /* !!! there was base_mat here. */ ierr = VecPipelineSetType(pipe,pipe_type,pipe_obj); CHKERRQ(ierr); ierr = VecPipelineSetup(pipe); CHKERRQ(ierr);

ierr = VecDestroy(tmp_g); CHKERRQ(ierr); ierr = ISDestroy(is_g); CHKERRQ(ierr); ierr = ISDestroy(is_l); CHKERRQ(ierr);

*border_vec = bvec; *main_pipe = pipe;

PetscFunctionReturn(0); }

/* VE from here on down probably superfluous */ #define IBLOCK_TAG 11 #define RBLOCK_TAG 12 #define LBLOCK_TAG 13 #define MBLOCK_TAG 14

/* Collect the rows whose (global) numbers are in "wanted" * into a sequential matrix. There are separate creation * and collection routines. The case where the index set "wanted" * is null on one or more processors is handled correctly. */ #undef __FUNC__ #define __FUNC__ "MatGatherCtxCreate" int MatGatherCtxCreate(Mat mat,IS wanted,MatGatherCtx *rgs) /* check header, cookie = MATMPIAIJ */ { Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) mat->data; MPI_Comm comm = mat->comm; int numtids,mytid, *owners = Aij->rowners; int *request_rows, request_nrows_tot; int nrecvs_all,nsends_all; int *recv_procs_all,*send_procs_all; int *request_nrows,*request_row_ptrs; int *send_rows_ptrs,*send_rows,*send_nrows; int i,ierr,p,send_nrows_tot; MatGatherCtx gs;

PetscFunctionBegin; MPI_Comm_size(comm,&numtids); MPI_Comm_rank(comm,&mytid);

if (wanted) { int *request_tmp;

ierr = ISGetSize(wanted,&request_nrows_tot); CHKERRQ(ierr); if (request_nrows_tot) { request_rows = (int *) PetscMalloc( request_nrows_tot*sizeof(int) ); CHKPTRQ(request_rows); ierr = ISGetIndices(wanted,&request_tmp); CHKERRQ(ierr); PetscMemcpy(request_rows,request_tmp,request_nrows_tot*sizeof(int)); ierr = ISRestoreIndices(wanted,&request_tmp); CHKERRQ(ierr); } else request_rows = 0; } else { request_rows = 0; request_nrows_tot = 0; }

request_row_ptrs = (int *) PetscMalloc( (numtids+1)*sizeof(int) ); CHKPTRQ(request_row_ptrs); request_nrows = (int *) PetscMalloc( numtids*sizeof(int) ); CHKPTRQ(request_nrows);

/* sort requested numbers, and establish pointers for processors */ ierr = IntSort(request_rows,request_nrows_tot); p = 0; for (i=0; i<request_nrows_tot; i++) {

loopback

if (request_rows[i]>=owners[p]) { request_row_ptrs[p++] = i; goto loopback; } } for (i=p; i<=numtids; i++) request_row_ptrs[i] = request_nrows_tot; for (p=0; p<numtids; p++) request_nrows[p] = request_row_ptrs[p+1]-request_row_ptrs[p];

/* printf("I am requesting: "); for (i=0; i<numtids; i++) {int j; printf("(%d:) ",i); for (j=request_row_ptrs[i];j<request_row_ptrs[i+1];j++) printf("%d ",request_rows[j]);} printf("\n"); */

/* every processor gathers how much is wanted from it from each proc */ send_nrows = (int *) PetscMalloc( (numtids+1)*sizeof(int) ); CHKPTRQ(send_nrows); PetscMemzero(send_nrows,(numtids+1)*sizeof(int)); for (p=0; p<numtids; p++) { int len; len = request_nrows[p]; ierr = MPI_Gather ((void *)(&len),1,MPI_INT,(void *)send_nrows,1,MPI_INT,p,comm); CHKERRQ(ierr); }

nrecvs_all = nsends_all = 0; for (p=0; p<numtids; p++) { if (request_nrows[p]) nrecvs_all++; if (send_nrows[p]) nsends_all++; } recv_procs_all = (int *) PetscMalloc( (nrecvs_all+1)*sizeof(int) ); CHKPTRQ(recv_procs_all); send_procs_all = (int *) PetscMalloc( (nsends_all+1)*sizeof(int) ); CHKPTRQ(send_procs_all); { int nr=0,ns=0; for (p=0; p<numtids; p++) { if (request_nrows[p]) recv_procs_all[nr++] = p; if (send_nrows[p]) send_procs_all[ns++] = p; } }

/* set up pointers to receive info on what exactly is wanted */ send_nrows_tot = 0; for (p=0; p<numtids; p++) send_nrows_tot += send_nrows[p]; send_rows = (int *) PetscMalloc( (send_nrows_tot+1)*sizeof(int) ); CHKPTRQ(send_rows); send_rows_ptrs = (int *) PetscMalloc( (numtids+1)*sizeof(int) ); CHKPTRQ(send_rows_ptrs); send_rows_ptrs[0] = 0; for (p=0; p<numtids; p++) send_rows_ptrs[p+1] = send_rows_ptrs[p]+send_nrows[p];

/* now actually receive that info */ for (p=0; p<numtids; p++) { int len; /* if (p==mytid) len=0; else */ len=request_row_ptrs[p+1]-request_row_ptrs[p]; ierr = MPI_Gatherv ((void *)&(request_rows[request_row_ptrs[p]]),len,MPI_INT, (void *)send_rows,send_nrows,send_rows_ptrs,MPI_INT,p,comm); CHKERRQ(ierr); }

/* printf("I am sending: "); for (i=0; i<numtids; i++) {int j; printf("(%d:) ",i); for (j=send_rows_ptrs[i];j<send_rows_ptrs[i+1];j++) printf("%d ",send_rows[j]);} printf("\n"); */

/* save reusable information */ gs = (MatGatherCtx) PetscMalloc(sizeof(struct _MatGatherCtx)); CHKPTRQ(gs); ierr = VecScatterCopy(Aij->Mvctx,(VecScatter*)&(gs->vs)); CHKERRQ(ierr); gs->comm = comm; gs->N = Aij->N; gs->mytid = mytid; gs->numtids = numtids; gs->request_nrows_tot = request_nrows_tot; gs->send_nrows_tot = send_nrows_tot; gs->nsends_all = nsends_all; gs->nrecvs_all = nrecvs_all; gs->custom_sends = gs->custom_recvs = 0; gs->send_procs_all = send_procs_all; gs->recv_procs_all = recv_procs_all; gs->send_rows = send_rows; gs->send_rows_ptrs = send_rows_ptrs; gs->request_row_ptrs = request_row_ptrs; gs->nsends_outstanding = 0; *rgs = gs;

/* free temporaries */ PetscFree(send_nrows); PetscFree(request_nrows);

PetscFunctionReturn(0); }

#undef __FUNC__ #define __FUNC__ "MatGatherCtxDestroy" int MatGatherCtxDestroy(MatGatherCtx gs) { PetscFunctionBegin; PetscFree(gs->send_procs_all); PetscFree(gs->recv_procs_all); PetscFree(gs->send_rows); PetscFree(gs->send_rows_ptrs); PetscFree(gs->request_row_ptrs); PetscFunctionReturn(0); }

#undef __FUNC__ #define __FUNC__ "MatGatherRowsReceive" static int MatGatherRowsReceive (MatGatherCtx gs,PipelineFunction pf,PetscObject pl,Mat *return_mat) { Mat catch_mat; MPI_Comm comm = gs->comm; int ierr,count,ip; int *recv_procs,nrecvs;

PetscFunctionBegin; if (pf) gs->custom_recvs = 1; else gs->custom_recvs = 0;

/* printf("Selecting sender procs out of: "); for(ip=0;ip<gs->nrecvs_all;ip++)printf("%d, ",gs->recv_procs_all[ip]);printf("\n"); printf("With");if((int)pf);else printf("out");printf(" selection function\n"); */

recv_procs = (int *) PetscMalloc( (gs->nrecvs_all+1)*sizeof(int) ); CHKPTRQ(recv_procs); nrecvs = 0; for (ip=0; ip<gs->nrecvs_all; ip++) { int p = gs->recv_procs_all[ip]; /* if (p==gs->mytid) continue;*/ if (pf) if ((*pf)(p,pl)) { recv_procs[nrecvs++] = p; } else { } else { recv_procs[nrecvs++] = p; } } /* printf("Setting up to receive from: "); for(ip=0;ip<nrecvs;ip++)printf("%d, ",recv_procs[ip]);printf("\n"); */ ierr = MatCreateSeqAIJ (MPI_COMM_SELF,gs->request_nrows_tot,gs->N,0,0,&catch_mat); CHKERRQ(ierr);

count = nrecvs; while (count) { MPI_Status recv_status; while (1) for (ip=0; ip<nrecvs; ip++) { int flag; ierr = MPI_Iprobe(recv_procs[ip],MBLOCK_TAG, comm,&flag,&recv_status); CHKERRQ(ierr); if (flag) goto found; }

found

{ int p,buflen,nrows,irow,unpacloc=0; void *buf;

p = recv_status.MPI_SOURCE; MPI_Get_count(&recv_status,MPI_BYTE,&buflen); buf = (void *) PetscMalloc( buflen*sizeof(MPI_BYTE) ); CHKPTRQ(buf); MPI_Recv (buf,buflen,MPI_PACKED,p,MBLOCK_TAG,comm,&recv_status); nrows = gs->request_row_ptrs[p+1]-gs->request_row_ptrs[p]; { int nrows_test; MPI_Unpack(buf,buflen,&unpacloc,&nrows_test,1,MPI_INT,comm); if (!(nrows==nrows_test)) SETERRQ(p+1,0,"Nrows mismatch"); } for (irow=0; irow<nrows; irow++) { int rlen,*idx, row=gs->request_row_ptrs[p]+irow; Scalar *val; MPI_Unpack(buf,buflen,&unpacloc,&rlen,1,MPI_INT,comm); idx = (int *) PetscMalloc( (1+rlen)*sizeof(int) ); CHKPTRQ(idx); MPI_Unpack(buf,buflen,&unpacloc,idx,rlen,MPI_INT,comm); val = (Scalar *) PetscMalloc( (1+rlen)*sizeof(Scalar) ); CHKPTRQ(val); MPI_Unpack(buf,buflen,&unpacloc,val,rlen,MPI_DOUBLE,comm); ierr = MatSetValues(catch_mat,1,&row,rlen,idx,val,INSERT_VALUES); CHKERRQ(ierr); PetscFree(idx); PetscFree(val); } PetscFree(buf); } count--; } ierr = MatAssemblyBegin(catch_mat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(catch_mat,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);

if (gs->nsends_outstanding) { MPI_Status *send_status; send_status = (MPI_Status *) PetscMalloc (gs->nsends_outstanding*sizeof(MPI_Status)); CHKPTRQ(send_status); MPI_Waitall(gs->nsends_outstanding,gs->send_requests,send_status); PetscFree(send_status); for (ip=0; ip<gs->nsends_outstanding; ip++) PetscFree(gs->send_buffers[ip]); PetscFree(gs->send_buffers); gs->nsends_outstanding = 0; }

*return_mat = catch_mat;

PetscFree(recv_procs);

PetscFunctionReturn(0); }

#undef __FUNC__ #define __FUNC__ "MatGatherRowsSend" static int MatGatherRowsSend (MatGatherCtx gs,Mat mat,PipelineFunction pf,PetscObject pl) { MPI_Comm comm = gs->comm; int *send_procs,nsends; int ip;

PetscFunctionBegin; if (!pf) gs->custom_sends = 0; else gs->custom_sends = 1;

/* printf("Selecting receiver procs out of: "); for(ip=0;ip<gs->nsends_all;ip++)printf("%d, ",gs->send_procs_all[ip]);printf("\n"); printf("With");if((int)pf);else printf("out");printf(" selection function\n"); */

/* Determine what processors we are going to send to */ send_procs = (int *) PetscMalloc( (gs->nsends_all+1)*sizeof(int) ); CHKPTRQ(send_procs); nsends = 0; for (ip=0; ip<gs->nsends_all; ip++) { int p = gs->send_procs_all[ip]; /* if (p==gs->mytid) continue;*/ if (pf) if ((*pf)(p,pl)) { send_procs[nsends++] = p; } else { } else { send_procs[nsends++] = p; } }

/* printf("Setting up to send to: "); for(ip=0;ip<nsends;ip++)printf("%d, ",send_procs[ip]);printf("\n"); */ if ((!gs->custom_sends) && nsends) { gs->nsends_outstanding = nsends; if (nsends) { gs->send_requests = (MPI_Request *) PetscMalloc (nsends*sizeof(MPI_Request)); CHKPTRQ(gs->send_requests); gs->send_buffers = (void **) PetscMalloc (nsends*sizeof(void*)); CHKPTRQ(gs->send_buffers); } } else { gs->nsends_outstanding = 0; }

/* Loop over all processors, then all rows to be sent there, * to determine row sizes and cumulative buffer size. * The packed buffer contains 1 int (number of rows to follow), * and for each row 1 int (the length), the indices and values */ for (ip=0; ip<nsends; ip++) { void *packed_send_buffer; int ierr,irow,nrows, p=send_procs[ip], s=0,ts, pacloc = 0;

nrows = gs->send_rows_ptrs[p+1]-gs->send_rows_ptrs[p]; MPI_Pack_size(1,MPI_INT,comm,&ts); s += ts; for (irow=0; irow<nrows; irow++) { int tlen,row = gs->send_rows[irow+gs->send_rows_ptrs[p]]; ierr = MatGetRowLen_MPIAIJ(mat,row,&tlen); CHKERRQ(ierr); MPI_Pack_size(1,MPI_INT,comm,&ts); s += ts; MPI_Pack_size(tlen,MPI_INT,comm,&ts); s += ts; MPI_Pack_size(tlen,MPI_DOUBLE,comm,&ts); s += ts; } /*printf("expecting to pack %d\n",s);*/ packed_send_buffer = (void *) PetscMalloc(s); CHKPTRQ(packed_send_buffer);

MPI_Pack((void *)&nrows,1,MPI_INT,packed_send_buffer,s,&pacloc,comm); /*printf("Packing rows: ");*/ for (irow=0; irow<nrows; irow++) { int row,ncols,*cols; Scalar *tvals; row = gs->send_rows[irow+gs->send_rows_ptrs[p]]; /*printf("%d ",row);*/ ierr = MatGetRow(mat,row,&ncols,&cols,&tvals); CHKERRQ(ierr); /*printf("(#e=%d: ",ncols); {int i;for(i=0;i<ncols;i++)printf("%d,",cols[i]);}printf(")"); */ MPI_Pack((void *)&ncols,1,MPI_INT, packed_send_buffer,s,&pacloc,comm); MPI_Pack((void *)cols,ncols,MPI_INT, packed_send_buffer,s,&pacloc,comm); MPI_Pack((void *)tvals,ncols,MPI_DOUBLE, packed_send_buffer,s,&pacloc,comm); ierr = MatRestoreRow(mat,row,&ncols,&cols,&tvals); CHKERRQ(ierr); } /*printf("\n");*/ if (pacloc>s) SETERRQ(ip+1,0,"Pack overflow");

if (gs->custom_sends) { MPI_Send (packed_send_buffer,pacloc,MPI_PACKED,p,MBLOCK_TAG,comm); PetscFree(packed_send_buffer); } else { MPI_Isend (packed_send_buffer,pacloc,MPI_PACKED,p,MBLOCK_TAG,comm, gs->send_requests+ip); gs->send_buffers[ip] = packed_send_buffer; } }

PetscFree(send_procs); PetscFunctionReturn(0); }

#undef __FUNC__ #define __FUNC__ "MatGatherRows" int MatGatherRows(Mat mat,MatGatherCtx gs, Mat *res_mat) /* check header, cookie = MATMPIAIJ */ { int ierr;

PetscFunctionBegin; ierr = MatGatherRowsSend(gs,mat,0,0); CHKERRQ(ierr); ierr = MatGatherRowsReceive(gs,0,0,res_mat); CHKERRQ(ierr);

PetscFunctionReturn(0); }

#undef __FUNC__ #define __FUNC__ "MatGatherRowsPipelineBegin" int MatGatherRowsPipelineBegin (Mat mat,PipelineDirection ptype, MatGatherCtx gs, Mat *ret_mat) { VecPipeline pipe = gs->vs; PipelineFunction pf; int ierr;

PetscFunctionBegin; if (ptype == PIPELINE_UP) pf = pipe->dnfn; else pf = pipe->upfn;

if (!(int)pf || IsProcYes((int)pf) ) { ierr = MatGatherRowsSend(gs,mat,0,0); CHKERRQ(ierr); ierr = MatGatherRowsReceive(gs,0,0,ret_mat); } else { PetscObject pl = pipe->custom_pipe_data; ierr = MatGatherRowsReceive(gs,pf,pl,ret_mat); } PetscFunctionReturn(0); }

#undef __FUNC__ #define __FUNC__ "MatGatherRowsPipelineEnd" int MatGatherRowsPipelineEnd (Mat mat,PipelineDirection ptype, MatGatherCtx gs, Mat *ret_mat) { VecPipeline pipe = gs->vs; PipelineFunction pf; PetscObject pl = pipe->custom_pipe_data; int ierr,flg;

PetscFunctionBegin; if (ptype == PIPELINE_UP) pf = pipe->upfn; else pf = pipe->dnfn;

flg = IsProcYes((int)pf);

if (((int)pf) && (!flg)) {ierr = MatGatherRowsSend(gs,mat,pf,pl); CHKERRQ(ierr);}

PetscFunctionReturn(0); }

/* VE !!! not removed */ /* int MatGetSubMatrix_MPIAIJ(Mat A,IS isrow,IS iscol,MatGetSubMatrixCall scall,Mat *B) { MatGatherCtx get_strip; Mat wide_mat; IS all_rows; int ierr,nrows;

PetscFunctionBegin; ierr = MatGatherCtxCreate(A,isrow,&get_strip); CHKERRQ(ierr); ierr = MatGatherRows(A,get_strip,&wide_mat); CHKERRQ(ierr);

if (isrow) { Mat *xmat; ierr = ISGetSize(isrow,&nrows); CHKERRQ(ierr); ierr = ISCreateStride(MPI_COMM_SELF,nrows,0,1,&all_rows); CHKERRQ(ierr); xmat = (Mat *) PetscMalloc(sizeof(Mat)); CHKPTRQ(xmat); ierr = MatGetSubMatrices (wide_mat,1,&all_rows,&iscol,MAT_INITIAL_MATRIX,&xmat); CHKERRQ(ierr); *B = *xmat; PetscFree(xmat); ierr = ISDestroy(all_rows); CHKERRQ(ierr); } else *B = 0; ierr = MatGatherCtxDestroy(get_strip); CHKERRQ(ierr); if (isrow) { ierr = MatDestroy(wide_mat); CHKERRQ(ierr); }

PetscFunctionReturn(0); } */

Location:src/mat/impls/aij/mpi/mpixtra.c


Matrix Index
Table of Contents