program example2
      implicit none
*
*     simple example to show how to generate a scalapack matrix
*     contribution from Ed d'Azevedo, ORNL, 2005
*
      integer BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
     $        LLD_, MB_, M_, NB_, N_, RSRC_
      parameter ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DTYPE_ = 1,
     $            CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
     $            RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
*
*
      integer descA(DLEN_)
*
      integer lwork
      parameter(lwork=10*1000)
      double precision work(lwork)
*
      integer Asize
      parameter(Asize=32*1000*1000)
*
      integer nout
      parameter(nout=16)
*
      double precision aij
      double precision A(Asize)
*
      integer m,n,mb,nb
      integer info, ierr(1)
      integer iam,nprocs
      integer icontext, myprow,mypcol,nprow,npcol
      integer rsrc,csrc,lld,Locp,Locq,Aneed
*
      integer ia,ja,irprnt,icprnt
      logical isroot, isok
*
      integer lrindx,lcindx,ipos
      logical do_print
*
      integer lroffset,lcoffset
      integer ia_first, ja_first
      integer iastart,iaend, jastart,jaend
*
      double precision t1,t2
      double precision MPI_Wtime
      external MPI_Wtime
*
      integer numroc, indxg2p
      external numroc, indxg2p
      external infog2l, descinit
*
      mb = 50
      nb = 50
      m = 400
      n = 400
*
*     -----------------------
*     setup blacs environment
*     -----------------------
      call blacs_pinfo( iam,nprocs)
*
      do nprow=int( sqrt(real(nprocs)) )+1,1,-1
         npcol = nprocs/nprow
         if (nprow*npcol.eq.nprocs) goto 11
      enddo
 11   continue
*
      call blacs_get(-1,0,icontext)
      call blacs_gridinit( icontext, 'Col-major', nprow,npcol)

      call blacs_gridinfo( icontext, nprow,npcol, myprow,mypcol)
      isroot = (myprow.eq.0).and.(mypcol.eq.0)
*
      if (isroot) then
         write(*,*) 'nprow,npcol ', nprow,npcol
         write(*,*) 'm,n ', m,n
         write(*,*) 'mb,nb ', mb,nb
      endif
*
*     ---------------------------------------------------------
*     compute local extent and allocate storage for local piece
*     ---------------------------------------------------------
*
      csrc = 0
      rsrc = 0
*
      Locq = numroc(n,nb,mypcol,csrc,npcol)
      Locq = max(1,Locq)
      Locp = numroc(m,mb,myprow,rsrc,nprow)
*
      lld = max(Locp,1)
      Aneed = lld*Locq
      isok = (Aneed.le.Asize)
      if (.not.isok) then
         if (isroot) then
            write(*,*) 'increase Asize to ',Aneed + 1
         endif
         goto 999
      endif
*
*     ----------------
*     setup descriptor
*     ----------------
*
      call descinit(descA,m,n,mb,nb,rsrc,csrc,icontext,lld,info)
      ierr(1) = info
      call igsum2d( icontext, 'All', ' ',1,1,ierr,1,-1,-1)
      isok = (info.eq.0)
      if (.not.isok) then
         if (isroot) then
            write(*,*) 'descinit returns info = ',info
         endif
         goto 999
      endif
*
*      ------------------------------
*      better method to setup matrix
*     take advantage of the block 2D cyclic format
*
*      (ia,ja) are global indices
*      ------------------------------
*
      call blacs_barrier( icontext, 'All')
      t1 = MPI_Wtime()
*
*     -------------------------------------------------
*     compute the first array index on local processor
*     -------------------------------------------------
*
      if (myprow.ge.descA(RSRC_)) then
         ia_first =  (myprow-descA(RSRC_))*descA(MB_) + 1
      else
         ia_first = (myprow + (nprow-descA(RSRC_)))*descA(MB_)+1
      endif
*
      if (mypcol.ge.descA(CSRC_)) then
         ja_first =  (mypcol-descA(CSRC_))*descA(NB_) + 1
      else
         ja_first = (mypcol + (npcol-descA(CSRC_)))*descA(NB_)+1
      endif
*
*     ----------------------------------
*     Note skip by npcol*nb and nprow*mb
*     ----------------------------------
*
      do jastart=ja_first,descA(N_), npcol*descA(NB_)
      do iastart=ia_first,descA(M_), nprow*descA(MB_)
*
         iaend = min( descA(M_), iastart + descA(MB_)-1)
         jaend = min( descA(N_), jastart + descA(NB_)-1)
*
*        ---------------------------------------------
*        block (iastart:iaend, jastart:jaend) is 
*        within the same block on the local processor
*
*        Need to compute local array index for 1st entry
*        in the local block
*        ---------------------------------------------
*
         ia = iastart
         ja = jastart
         call infog2l(ia,ja,descA,nprow,npcol,myprow,mypcol,
     &                  lroffset,lcoffset, rsrc,csrc)
*
         do ja=jastart,jaend
         do ia=iastart,iaend
*
            call generate_Aij( m,n, ia,ja, aij )
*
            lrindx = lroffset + (ia-iastart)
            lcindx = lcoffset + (ja-jastart)
*
            ipos = lrindx + (lcindx-1)*descA(LLD_)
            A(ipos) = aij
*
         enddo
         enddo
*
      enddo
      enddo
*         
      call blacs_barrier( icontext, 'All')
      t2 = MPI_Wtime()
      if (isroot) then
         write(*,*) 'time to build matrix is ',t2-t1,' sec'
      endif
*
*      --------------------------------------------
*      if matrix is not too big, print out content
*      for debugging
*      --------------------------------------------
*
      do_print = ((m*n.le.200*1000).and.(lwork.ge.mb))
      if (do_print) then
*
         ia = 1
         ja = 1
         irprnt = 0
         icprnt = 0
         call pdlaprnt(m,n,A,ia,ja,descA,irprnt,icprnt,'A',nout,work)
*
      endif
*
999   continue
*
*     ---------------
*     prepare to exit
*     ---------------
*
      call blacs_barrier(icontext, 'All')
      call blacs_gridexit( icontext )
      call blacs_exit(0)
      stop
      end
*
      subroutine generate_Aij( m,n, ia,ja, aij )
      implicit none
      integer m,n, ia,ja
      double precision aij
      aij = dble(ia) + dble(ja-1)*dble(m)
      return
      end
*