1 SUBROUTINE pdtrord( COMPQ, SELECT, PARA, N, T, IT, JT,
2 $ DESCT, Q, IQ, JQ, DESCQ, WR, WI, M, WORK, LWORK,
3 $ IWORK, LIWORK, INFO )
16 INTEGER INFO, LIWORK, LWORK, M, N,
21 INTEGER PARA( 6 ), DESCT( * ), DESCQ( * ), IWORK( * )
22 DOUBLE PRECISION Q( * ), T( * ), WI( * ), WORK( * ), WR( * )
299 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
300 $ lld_, mb_, m_, nb_, n_, rsrc_
301 DOUBLE PRECISION ZERO, ONE
302 PARAMETER ( TOP =
'1-Tree',
303 $ block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
304 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
305 $ rsrc_ = 7, csrc_ = 8, lld_ = 9,
306 $ zero = 0.0d+0, one = 1.0d+0 )
309 LOGICAL LQUERY, PAIR, SWAP, WANTQ,
310 $ ISHH, FIRST, SKIP1CR, BORDER, LASTWAIT
311 INTEGER NPROW, NPCOL, MYROW, MYCOL, NB, NPROCS,
312 $ IERR, DIM1, INDX, LLDT, TRSRC, TCSRC, ILOC1,
313 $ jloc1, myierr, ictxt,
314 $ rsrc1, csrc1, iloc3, jloc3, trsrc3,
315 $ tcsrc3, iloc, jloc, trsrc4, tcsrc4,
316 $ flops, i, ilo, ihi, j, k, kk, kks,
317 $ ks, liwmin, lwmin, mmult, n1, n2,
318 $ ncb, ndtraf, nitraf, nwin, numwin, pdtraf,
319 $ pitraf, pdw, wineig, winsiz, lldq,
320 $ rsrc, csrc, ililo, ilihi, ilsel, irsrc,
321 $ icsrc, ipiw, ipw1, ipw2, ipw3, tihi, tilo,
322 $ lihi, window, lilo, lsel, buffer,
323 $ nmwin2, bufflen, lrows, lcols, iloc2, jloc2,
324 $ wneicr, window0, rsrc4, csrc4, lihi4, rsrc3,
325 $ csrc3, rsrc2, csrc2, lihic, lihi1, ilen4,
326 $ seli4, ilen1, dim4, ipw4, qrows, trows,
327 $ tcols, ipw5, ipw6, ipw7, ipw8, jloc4,
328 $ east, west, iloc4, south, north, indxs,
329 $ itt, jtt, ilen, dlen, indxe, trsrc1, tcsrc1,
330 $ trsrc2, tcsrc2, ilos, dir, tlihi, tlilo, tlsel,
331 $ round, last, win0s, win0e, wine, mmax, mmin
332 DOUBLE PRECISION ELEM, ELEM1, ELEM2, ELEM3, ELEM4, SN, CS, TMP,
336 INTEGER IBUFF( 8 ), IDUM1( 1 ), IDUM2( 1 )
340 INTEGER NUMROC, INDXG2P, INDXG2L
341 EXTERNAL lsame, numroc, indxg2p, indxg2l
346 $
infog2l, dgsum2d, dgesd2d, dgerv2d, dgebs2d,
347 $ dgebr2d, igsum2d, blacs_gridinfo, igebs2d,
351 INTRINSIC abs,
max, sqrt,
min
360 ictxt = desct( ctxt_ )
361 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
367 IF( nprow.EQ.-1 )
THEN
373 lquery = lwork.EQ.-1 .OR. liwork.EQ.-1
378 CALL chk1mat( n, 5, n, 5, it, jt, desct, 9, info )
381 CALL chk1mat( n, 5, n, 5, iq, jq, descq, 13, info )
387 IF( desct( mb_ ).NE.desct( nb_ ) ) info = -(1000*9 + mb_)
390 IF( descq( mb_ ).NE.descq( nb_ ) ) info = -(1000*13 + mb_)
393 IF( desct( mb_ ).NE.descq( mb_ ) ) info = -(1000*9 + mb_)
399 IF( n.NE.desct( mb_ ) .AND. desct( mb_ ).LT.3 )
400 $ info = -(1000*9 + mb_)
401 IF( n.NE.descq( mb_ ) .AND. descq( mb_ ).LT.3 )
402 $ info = -(1000*13 + mb_)
409 IF( para(1).LT.1 .OR. para(1).GT.
min(nprow,npcol) )
410 $ info = -(1000 * 4 + 1)
411 IF( para(2).LT.1 .OR. para(2).GE.para(3) )
412 $ info = -(1000 * 4 + 2)
413 IF( para(3).LT.1 .OR. para(3).GT.nb )
414 $ info = -(1000 * 4 + 3)
415 IF( para(4).LT.0 .OR. para(4).GT.100 )
416 $ info = -(1000 * 4 + 4)
417 IF( para(5).LT.1 .OR. para(5).GT.nb )
418 $ info = -(1000 * 4 + 5)
419 IF( para(6).LT.1 .OR. para(6).GT.para(2) )
420 $ info = -(1000 * 4 + 6)
426 IF( it.NE.1 ) info = -6
427 IF( jt.NE.it ) info = -7
428 IF( iq.NE.1 ) info = -10
429 IF( jq.NE.iq ) info = -11
435 CALL pchk1mat( n, 5, n, 5, it, jt, desct, 9, 0, idum1,
439 CALL pchk1mat( n, 5, n, 5, iq, jq, descq, 13, 0, idum1,
443 CALL pchk2mat( n, 5, n, 5, it, jt, desct, 9, n, 5, n, 5,
444 $ iq, jq, descq, 13, 0, idum1, idum2, info )
449 IF( info.EQ.0 .OR. lquery )
THEN
451 wantq = lsame( compq,
'V' )
467 CALL infog2l( k+1, k, desct, nprow, npcol,
468 $ myrow, mycol, itt, jtt, trsrc, tcsrc )
469 IF( myrow.EQ.trsrc .AND. mycol.EQ.tcsrc )
THEN
470 elem = t( (jtt-1)*lldt + itt )
471 IF( elem.NE.zero )
THEN
472 IF(
SELECT(k).NE.0 .AND.
473 $
SELECT(k+1).EQ.0 )
THEN
476 ELSEIF(
SELECT(k).EQ.0 .AND.
477 $
SELECT(k+1).NE.0 )
THEN
484 IF(
SELECT(k).NE.0 ) m = m + 1
489 $
CALL igamx2d( ictxt,
'All', top, 1, 1, mmax, 1, -1,
492 $
CALL igamn2d( ictxt,
'All', top, 1, 1, mmin, 1, -1,
494 IF( mmax.GT.mmin )
THEN
497 $
CALL igamx2d( ictxt,
'All', top, n, 1,
SELECT, n,
498 $ -1, -1, -1, -1, -1 )
506 trows = numroc( n, nb, myrow, desct(rsrc_), nprow )
507 tcols = numroc( n, nb, mycol, desct(csrc_), npcol )
508 lwmin = n + 7*nb**2 + 2*trows*para( 3 ) + tcols*para( 3 ) +
509 $
max( trows*para( 3 ), tcols*para( 3 ) )
510 liwmin = 5*para( 1 ) + para( 2 )*para( 3 ) -
511 $ para( 2 ) * ( para( 2 ) + 1 ) / 2
513 IF( lwork.LT.lwmin .AND. .NOT.lquery )
THEN
515 ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery )
THEN
524 $
CALL igamx2d( ictxt,
'All', top, 1, 1, info, 1, -1, -1, -1,
529 IF( info.NE.0 .AND. .NOT.lquery )
THEN
531 CALL pxerbla( ictxt,
'PDTRORD', -info )
533 ELSEIF( lquery )
THEN
534 work( 1 ) = dble(lwmin)
541 IF( m.EQ.n .OR. m.EQ.0 )
GO TO 545
546 wineig =
max( para( 2 ), 2 )
547 winsiz =
min(
max( para( 3 ), para( 2 )*2 ), nb )
564 ilihi = ililo + numwin
565 ilsel = ilihi + numwin
566 irsrc = ilsel + numwin
567 icsrc = irsrc + numwin
568 ipiw = icsrc + numwin
586 IF(
SELECT(ilo).NE.0 )
GO TO 40
609 IF(
SELECT(ilos).EQ.0 )
GO TO 52
611 IF(
SELECT(ilos+1).NE.0 .AND. mod(ilos,nb).EQ.0 )
THEN
612 CALL pdelget(
'All', top, elem, t, ilos+1, ilos, desct )
613 IF( elem.NE.zero )
GO TO 52
628 nmwin2 = (iceil(ihi,nb)*nb - (ilo-mod(ilo,nb)+1)+1) / nb
629 nmwin2 =
min(
min( numwin, nmwin2 ), iceil(n,nb) - j + 1 )
636 iwork( ilsel+k-1) = 0
637 iwork( ililo+k-1) =
max( ilo, (j-1)*nb+(k-1)*nb+1 )
638 lilo = iwork( ililo+k-1 )
640 IF(
SELECT(lilo).NE.0 .AND. lilo.LT.(j+k-1)*nb )
THEN
642 IF( lilo.LE.n )
GO TO 82
644 iwork( ililo+k-1 ) = lilo
649 lilo = iwork(ililo+k-1)
650 IF( lilo.GT.nb )
THEN
651 CALL pdelget(
'All', top, elem, t, lilo, lilo-1, desct )
652 IF( elem.NE.zero )
THEN
653 IF( lilo.LT.(j+k-1)*nb )
THEN
654 iwork(ililo+k-1) = iwork(ililo+k-1) + 1
656 iwork(ililo+k-1) = iwork(ililo+k-1) - 1
664 iwork( ilihi+k-1 ) = iwork( ililo+k-1 )
665 iwork( irsrc+k-1 ) = indxg2p( iwork(ililo+k-1), nb, myrow,
666 $ desct( rsrc_ ), nprow )
667 iwork( icsrc+k-1 ) = indxg2p( iwork(ililo+k-1), nb, mycol,
668 $ desct( csrc_ ), npcol )
669 tilo = iwork(ililo+k-1)
670 tihi =
min( n, iceil( tilo, nb ) * nb )
671 DO 90 kk = tihi, tilo, -1
672 IF(
SELECT(kk).NE.0 )
THEN
673 iwork(ilihi+k-1) =
max(iwork(ilihi+k-1) , kk )
674 iwork(ilsel+k-1) = iwork(ilsel+k-1) + 1
675 IF( iwork(ilsel+k-1).GT.wineig )
THEN
676 iwork(ilihi+k-1) = kk
690 lihi = iwork(ilihi+k-1)
692 CALL pdelget(
'All', top, elem, t, lihi+1, lihi, desct )
693 IF( elem.NE.zero )
THEN
694 IF( iceil( lihi, nb ) .NE. iceil( lihi+1, nb ) .OR.
695 $ iwork( ilsel+k-1 ).EQ.wineig )
THEN
696 iwork( ilihi+k-1 ) = iwork( ilihi+k-1 ) - 1
697 IF( iwork( ilsel+k-1 ).GT.2 )
698 $ iwork( ilsel+k-1 ) = iwork( ilsel+k-1 ) - 1
700 iwork( ilihi+k-1 ) = iwork( ilihi+k-1 ) + 1
701 IF(
SELECT(lihi+1).NE.0 )
702 $ iwork( ilsel+k-1 ) = iwork( ilsel+k-1 ) + 1
714 lilo = iwork( ililo + k - 1 )
715 lihi = iwork( ilihi + k - 1 )
716 lsel = iwork( ilsel + k - 1 )
717 IF( lsel.EQ.0 .OR. lilo.EQ.lihi )
THEN
718 lihi = iwork( ilihi + k - 1 )
719 iwork( ilihi + k - 1 ) = (iceil(lihi,nb)-1)*nb + 1
720 iwork( ililo + k - 1 ) = iwork( ilihi + k - 1 ) + 1
731 DO 95 window = 1, nmwin2
732 rsrc = iwork(irsrc+window-1)
733 csrc = iwork(icsrc+window-1)
734 IF( myrow.EQ.rsrc .OR. mycol.EQ.csrc )
THEN
735 tlilo = iwork( ililo + window - 1 )
736 tlihi = iwork( ilihi + window - 1 )
737 tlsel = iwork( ilsel + window - 1 )
738 IF( (.NOT. ( lihi .GE. lilo + lsel ) ) .AND.
739 $ ( (tlihi .GE. tlilo + tlsel) .OR. first ) )
THEN
740 IF( first ) first = .false.
754 IF( lilo.EQ.ihi .AND. lihi.EQ.ilo .AND. lsel.EQ.m )
766 IF( first .OR. ( lihi .GE. lilo + lsel ) )
THEN
773 DO 110 window = 1, nmwin2
774 rsrc = iwork(irsrc+window-1)
775 csrc = iwork(icsrc+window-1)
780 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
THEN
781 lilo = iwork(ililo+window-1)
782 lihi = iwork(ilihi+window-1)
783 lsel = iwork(ilsel+window-1)
787 i =
max( lilo, lihi - winsiz + 1 )
793 CALL infog2l( i, i-1, desct, nprow, npcol, myrow,
794 $ mycol, iloc, jloc, rsrc, csrc )
795 IF( t( lldt*(jloc-1) + iloc ).NE.zero )
801 CALL infog2l( i, i, desct, nprow, npcol,
802 $ myrow, mycol, iloc1, jloc1, rsrc, csrc )
818 swap =
SELECT( k ).NE.0
820 CALL infog2l( k+1, k, desct, nprow, npcol,
821 $ myrow, mycol, iloc, jloc, rsrc, csrc )
822 IF( t( lldt*(jloc-1) + iloc ).NE.zero )
834 nitraf = liwork - pitraf + 1
835 ndtraf = lwork - pdtraf + 1
837 $ t(lldt*(jloc1-1) + iloc1), lldt, kk,
838 $ kks, nitraf, iwork( pitraf ), ndtraf,
839 $ work( pdtraf ), work(ipw1), ierr )
840 pitraf = pitraf + nitraf
841 pdtraf = pdtraf + ndtraf
846 DO 150 j = i+kk-1, i+kks, -1
847 SELECT(j+1) =
SELECT(j-1)
852 DO 160 j = i+kk-1, i+kks, -1
853 SELECT(j) =
SELECT(j-1)
858 IF ( ierr.EQ.1 .OR. ierr.EQ.2 )
THEN
868 IF ( ierr.EQ.2 )
THEN
869 SELECT( i+kks-3 ) = 1
870 SELECT( i+kks-1 ) = 0
893 DO 175 window = 1, nmwin2
894 rsrc = iwork(irsrc+window-1)
895 csrc = iwork(icsrc+window-1)
896 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
THEN
924 DO 111 window = 1, nmwin2
925 rsrc = iwork(irsrc+window-1)
926 csrc = iwork(icsrc+window-1)
927 IF( myrow.EQ.rsrc .OR. mycol.EQ.csrc )
THEN
928 lilo = iwork(ililo+window-1)
929 lihi = iwork(ilihi+window-1)
930 lsel = iwork(ilsel+window-1)
932 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
THEN
933 IF( npcol.GT.1 .AND. dir.EQ.1 )
934 $
CALL igebs2d( ictxt,
'Row', top, 8, 1, ibuff, 8 )
935 IF( nprow.GT.1 .AND. dir.EQ.2 )
936 $
CALL igebs2d( ictxt,
'Col', top, 8, 1, ibuff, 8 )
937 ELSEIF( myrow.EQ.rsrc .OR. mycol.EQ.csrc )
THEN
938 IF( npcol.GT.1 .AND. dir.EQ.1 .AND. myrow.EQ.rsrc )
940 IF( first .OR. (lihi .GE. lilo + lsel) )
THEN
941 CALL igebr2d( ictxt,
'Row', top, 8, 1, ibuff, 8,
957 IF( nprow.GT.1 .AND. dir.EQ.2 .AND. mycol.EQ.csrc )
959 IF( first .OR. (lihi .GE. lilo + lsel) )
THEN
960 CALL igebr2d( ictxt,
'Col', top, 8, 1, ibuff, 8,
987 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
THEN
989 bufflen = dlen + ilen
990 IF( bufflen.NE.0 )
THEN
991 DO 180 indx = 1, ilen
992 work( buffer+indx-1 ) =
993 $ dble( iwork(ipiw+indx-1) )
995 CALL dlamov(
'All', dlen, 1, work( ipw2 ),
996 $ dlen, work(buffer+ilen), dlen )
997 IF( npcol.GT.1 .AND. dir.EQ.1 )
THEN
998 CALL dgebs2d( ictxt,
'Row', top, bufflen, 1,
999 $ work(buffer), bufflen )
1001 IF( nprow.GT.1 .AND. dir.EQ.2 )
THEN
1002 CALL dgebs2d( ictxt,
'Col', top, bufflen, 1,
1003 $ work(buffer), bufflen )
1006 ELSEIF( myrow.EQ.rsrc .OR. mycol.EQ.csrc )
THEN
1007 IF( npcol.GT.1 .AND. dir.EQ.1 .AND. myrow.EQ.rsrc )
1010 bufflen = dlen + ilen
1011 IF( bufflen.NE.0 )
THEN
1012 CALL dgebr2d( ictxt,
'Row', top, bufflen, 1,
1013 $ work(buffer), bufflen, rsrc, csrc )
1016 IF( nprow.GT.1 .AND. dir.EQ.2 .AND. mycol.EQ.csrc )
1019 bufflen = dlen + ilen
1020 IF( bufflen.NE.0 )
THEN
1021 CALL dgebr2d( ictxt,
'Col', top, bufflen, 1,
1022 $ work(buffer), bufflen, rsrc, csrc )
1025 IF((npcol.GT.1.AND.dir.EQ.1.AND.myrow.EQ.rsrc).OR.
1026 $ (nprow.GT.1.AND.dir.EQ.2.AND.mycol.EQ.csrc ) )
1028 IF( bufflen.NE.0 )
THEN
1029 DO 190 indx = 1, ilen
1030 iwork(ipiw+indx-1) =
1031 $ int(work( buffer+indx-1 ))
1033 CALL dlamov(
'All', dlen, 1,
1034 $ work( buffer+ilen ), dlen,
1035 $ work( ipw2 ), dlen )
1050 DO 112 window = 1, nmwin2
1051 rsrc = iwork(irsrc+window-1)
1052 csrc = iwork(icsrc+window-1)
1054 IF( (myrow.EQ.rsrc .AND. dir.EQ.1 ).OR.
1055 $ (mycol.EQ.csrc .AND. dir.EQ.2 ) )
THEN
1056 lilo = iwork(ililo+window-1)
1057 lihi = iwork(ilihi+window-1)
1058 lsel = iwork(ilsel+window-1)
1062 IF( bufflen.EQ.0 )
GO TO 295
1064 nitraf = pitraf - ipiw
1067 DO 200 k = 1, nitraf
1068 IF( iwork( ipiw + k - 1 ).LE.nwin )
THEN
1080 ipw3 = pdw + nwin*nwin
1085 IF( flops.NE.0 .AND.
1086 $ ( flops*100 ) / ( 2*nwin*nwin ) .GE. mmult )
THEN
1094 CALL dlaset(
'All', nwin, nwin, zero, one,
1095 $ work( pdw ), nwin )
1096 CALL bdlaapp( 1, nwin, nwin, ncb, work( pdw ), nwin,
1097 $ nitraf, iwork(ipiw), work( ipw2 ), work(ipw3) )
1109 DO 210 indx = 1, i-1, nb
1110 CALL infog2l( indx, i, desct, nprow, npcol,
1111 $ myrow, mycol, iloc, jloc, rsrc1, csrc1 )
1112 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
1114 lrows =
min(nb,i-indx)
1115 CALL dgemm(
'No transpose',
1116 $
'No transpose', lrows, nwin, nwin,
1117 $ one, t((jloc-1)*lldt+iloc), lldt,
1118 $ work( pdw ), nwin, zero,
1119 $ work(ipw3), lrows )
1120 CALL dlamov(
'All', lrows, nwin,
1121 $ work(ipw3), lrows,
1122 $ t((jloc-1)*lldt+iloc), lldt )
1126 DO 220 indx = 1, n, nb
1127 CALL infog2l( indx, i, descq, nprow,
1128 $ npcol, myrow, mycol, iloc, jloc,
1130 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )
1132 lrows =
min(nb,n-indx+1)
1133 CALL dgemm(
'No transpose',
1134 $
'No transpose', lrows, nwin, nwin,
1135 $ one, q((jloc-1)*lldq+iloc), lldq,
1136 $ work( pdw ), nwin, zero,
1137 $ work(ipw3), lrows )
1138 CALL dlamov(
'All', lrows, nwin,
1139 $ work(ipw3), lrows,
1140 $ q((jloc-1)*lldq+iloc), lldq )
1149 IF( lihi.LT.n )
THEN
1150 IF( mod(lihi,nb).GT.0 )
THEN
1152 CALL infog2l( i, indx, desct, nprow,
1153 $ npcol, myrow, mycol, iloc, jloc,
1155 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )
1157 lcols = mod(
min( nb-mod(lihi,nb),
1159 CALL dgemm(
'Transpose',
1160 $
'No Transpose', nwin, lcols, nwin,
1161 $ one, work( pdw ), nwin,
1162 $ t((jloc-1)*lldt+iloc), lldt, zero,
1163 $ work(ipw3), nwin )
1164 CALL dlamov(
'All', nwin, lcols,
1166 $ t((jloc-1)*lldt+iloc), lldt )
1169 indxs = iceil(lihi,nb)*nb + 1
1170 DO 230 indx = indxs, n, nb
1171 CALL infog2l( i, indx, desct, nprow,
1172 $ npcol, myrow, mycol, iloc, jloc,
1174 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )
1176 lcols =
min( nb, n-indx+1 )
1177 CALL dgemm(
'Transpose',
1178 $
'No Transpose', nwin, lcols, nwin,
1179 $ one, work( pdw ), nwin,
1180 $ t((jloc-1)*lldt+iloc), lldt, zero,
1181 $ work(ipw3), nwin )
1182 CALL dlamov(
'All', nwin, lcols,
1184 $ t((jloc-1)*lldt+iloc), lldt )
1208 DO 240 indx = 1, i-1, nb
1209 CALL infog2l( indx, i, desct, nprow, npcol,
1210 $ myrow, mycol, iloc, jloc, rsrc1, csrc1 )
1211 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
1213 jloc1 = indxg2l( i+nwin-ks, nb, mycol,
1214 $ desct( csrc_ ), npcol )
1215 lrows =
min(nb,i-indx)
1216 CALL dlamov(
'All', lrows, ks,
1217 $ t((jloc1-1)*lldt+iloc ), lldt,
1218 $ work(ipw3), lrows )
1219 CALL dtrmm(
'Right',
'Upper',
1221 $
'Non-unit', lrows, ks, one,
1222 $ work( pdw+nwin-ks ), nwin,
1223 $ work(ipw3), lrows )
1224 CALL dgemm(
'No transpose',
1225 $
'No transpose', lrows, ks, nwin-ks,
1226 $ one, t((jloc-1)*lldt+iloc), lldt,
1227 $ work( pdw ), nwin, one, work(ipw3),
1232 CALL dlamov(
'All', lrows, nwin-ks,
1233 $ t((jloc-1)*lldt+iloc), lldt,
1234 $ work( ipw3+ks*lrows ), lrows )
1235 CALL dtrmm(
'Right',
'Lower',
1236 $
'No transpose',
'Non-unit',
1237 $ lrows, nwin-ks, one,
1238 $ work( pdw+nwin*ks ), nwin,
1239 $ work( ipw3+ks*lrows ), lrows )
1240 CALL dgemm(
'No transpose',
1241 $
'No transpose', lrows, nwin-ks, ks,
1242 $ one, t((jloc1-1)*lldt+iloc), lldt,
1243 $ work( pdw+nwin*ks+nwin-ks ), nwin,
1244 $ one, work( ipw3+ks*lrows ), lrows )
1248 CALL dlamov(
'All', lrows, nwin,
1249 $ work(ipw3), lrows,
1250 $ t((jloc-1)*lldt+iloc), lldt )
1257 DO 250 indx = 1, n, nb
1258 CALL infog2l( indx, i, descq, nprow,
1259 $ npcol, myrow, mycol, iloc, jloc,
1261 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )
1263 jloc1 = indxg2l( i+nwin-ks, nb,
1264 $ mycol, descq( csrc_ ), npcol )
1265 lrows =
min(nb,n-indx+1)
1266 CALL dlamov(
'All', lrows, ks,
1267 $ q((jloc1-1)*lldq+iloc ), lldq,
1268 $ work(ipw3), lrows )
1269 CALL dtrmm(
'Right',
'Upper',
1270 $
'No transpose',
'Non-unit',
1272 $ work( pdw+nwin-ks ), nwin,
1273 $ work(ipw3), lrows )
1274 CALL dgemm(
'No transpose',
1275 $
'No transpose', lrows, ks,
1277 $ q((jloc-1)*lldq+iloc), lldq,
1278 $ work( pdw ), nwin, one,
1279 $ work(ipw3), lrows )
1283 CALL dlamov(
'All', lrows, nwin-ks,
1284 $ q((jloc-1)*lldq+iloc), lldq,
1285 $ work( ipw3+ks*lrows ), lrows)
1286 CALL dtrmm(
'Right',
'Lower',
1287 $
'No transpose',
'Non-unit',
1288 $ lrows, nwin-ks, one,
1289 $ work( pdw+nwin*ks ), nwin,
1290 $ work( ipw3+ks*lrows ), lrows)
1291 CALL dgemm(
'No transpose',
1292 $
'No transpose', lrows, nwin-ks,
1293 $ ks, one, q((jloc1-1)*lldq+iloc),
1294 $ lldq, work(pdw+nwin*ks+nwin-ks),
1295 $ nwin, one, work( ipw3+ks*lrows ),
1300 CALL dlamov(
'All', lrows, nwin,
1301 $ work(ipw3), lrows,
1302 $ q((jloc-1)*lldq+iloc), lldq )
1309 IF ( lihi.LT.n )
THEN
1313 IF( mod(lihi,nb).GT.0 )
THEN
1315 CALL infog2l( i, indx, desct, nprow,
1316 $ npcol, myrow, mycol, iloc, jloc,
1318 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )
1320 iloc1 = indxg2l( i+nwin-ks, nb, myrow,
1321 $ desct( rsrc_ ), nprow )
1322 lcols = mod(
min( nb-mod(lihi,nb),
1324 CALL dlamov(
'All', ks, lcols,
1325 $ t((jloc-1)*lldt+iloc1), lldt,
1326 $ work(ipw3), nwin )
1327 CALL dtrmm(
'Left',
'Upper',
1328 $
'Transpose',
'Non-unit', ks,
1329 $ lcols, one, work( pdw+nwin-ks ),
1330 $ nwin, work(ipw3), nwin )
1331 CALL dgemm(
'Transpose',
1332 $
'No transpose', ks, lcols,
1333 $ nwin-ks, one, work(pdw), nwin,
1334 $ t((jloc-1)*lldt+iloc), lldt, one,
1335 $ work(ipw3), nwin )
1340 CALL dlamov(
'All', nwin-ks, lcols,
1341 $ t((jloc-1)*lldt+iloc), lldt,
1342 $ work( ipw3+ks ), nwin )
1343 CALL dtrmm(
'Left',
'Lower',
1344 $
'Transpose',
'Non-unit',
1345 $ nwin-ks, lcols, one,
1346 $ work( pdw+nwin*ks ), nwin,
1347 $ work( ipw3+ks ), nwin )
1348 CALL dgemm(
'Transpose',
1349 $
'No Transpose', nwin-ks, lcols,
1351 $ work( pdw+nwin*ks+nwin-ks ),
1352 $ nwin, t((jloc-1)*lldt+iloc1),
1353 $ lldt, one, work( ipw3+ks ),
1358 CALL dlamov(
'All', nwin, lcols,
1360 $ t((jloc-1)*lldt+iloc), lldt )
1363 indxs = iceil(lihi,nb)*nb + 1
1364 DO 260 indx = indxs, n, nb
1365 CALL infog2l( i, indx, desct, nprow,
1366 $ npcol, myrow, mycol, iloc, jloc,
1368 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )
1374 iloc1 = indxg2l( i+nwin-ks, nb,
1375 $ myrow, desct( rsrc_ ), nprow )
1376 lcols =
min( nb, n-indx+1 )
1377 CALL dlamov(
'All', ks, lcols,
1378 $ t((jloc-1)*lldt+iloc1), lldt,
1379 $ work(ipw3), nwin )
1380 CALL dtrmm(
'Left',
'Upper',
1381 $
'Transpose',
'Non-unit', ks,
1383 $ work( pdw+nwin-ks ), nwin,
1384 $ work(ipw3), nwin )
1385 CALL dgemm(
'Transpose',
1386 $
'No transpose', ks, lcols,
1387 $ nwin-ks, one, work(pdw), nwin,
1388 $ t((jloc-1)*lldt+iloc), lldt, one,
1389 $ work(ipw3), nwin )
1394 CALL dlamov(
'All', nwin-ks, lcols,
1395 $ t((jloc-1)*lldt+iloc), lldt,
1396 $ work( ipw3+ks ), nwin )
1397 CALL dtrmm(
'Left',
'Lower',
1398 $
'Transpose',
'Non-unit',
1399 $ nwin-ks, lcols, one,
1400 $ work( pdw+nwin*ks ), nwin,
1401 $ work( ipw3+ks ), nwin )
1402 CALL dgemm(
'Transpose',
1403 $
'No Transpose', nwin-ks, lcols,
1405 $ work( pdw+nwin*ks+nwin-ks ),
1406 $ nwin, t((jloc-1)*lldt+iloc1),
1407 $ lldt, one, work(ipw3+ks), nwin )
1411 CALL dlamov(
'All', nwin, lcols,
1413 $ t((jloc-1)*lldt+iloc), lldt )
1419 ELSEIF( flops.NE.0 )
THEN
1425 DO 270 indx = 1, i-1, nb
1426 CALL infog2l( indx, i, desct, nprow, npcol,
1427 $ myrow, mycol, iloc, jloc, rsrc1, csrc1 )
1428 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
1429 lrows =
min(nb,i-indx)
1430 CALL bdlaapp( 1, lrows, nwin, ncb,
1431 $ t((jloc-1)*lldt+iloc ), lldt, nitraf,
1432 $ iwork(ipiw), work( ipw2 ),
1437 DO 280 indx = 1, n, nb
1438 CALL infog2l( indx, i, descq, nprow, npcol,
1439 $ myrow, mycol, iloc, jloc, rsrc1, csrc1 )
1440 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
1442 lrows =
min(nb,n-indx+1)
1443 CALL bdlaapp( 1, lrows, nwin, ncb,
1444 $ q((jloc-1)*lldq+iloc), lldq, nitraf,
1445 $ iwork(ipiw), work( ipw2 ),
1452 IF( lihi.LT.n )
THEN
1453 IF( mod(lihi,nb).GT.0 )
THEN
1455 CALL infog2l( i, indx, desct, nprow, npcol,
1456 $ myrow, mycol, iloc, jloc, rsrc1, csrc1 )
1457 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
1459 lcols = mod(
min( nb-mod(lihi,nb),
1461 CALL bdlaapp( 0, nwin, lcols, ncb,
1462 $ t((jloc-1)*lldt+iloc), lldt, nitraf,
1463 $ iwork(ipiw), work( ipw2 ),
1467 indxs = iceil(lihi,nb)*nb + 1
1468 DO 290 indx = indxs, n, nb
1469 CALL infog2l( i, indx, desct, nprow, npcol,
1470 $ myrow, mycol, iloc, jloc, rsrc1, csrc1 )
1471 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
1473 lcols =
min( nb, n-indx+1 )
1474 CALL bdlaapp( 0, nwin, lcols, ncb,
1475 $ t((jloc-1)*lldt+iloc), lldt, nitraf,
1476 $ iwork(ipiw), work( ipw2 ),
1507 IF( myrow.EQ.rsrc.AND.mycol.EQ.csrc )
THEN
1509 IF( bufflen.NE.0 .OR. ks.EQ.0 .OR.
1510 $ ( bufflen.EQ.0 .AND. ks.GT.0 ) )
1512 iwork( ilihi+window-1 ) = lihi
1513 IF( .NOT. lihi.GE.lilo+lsel )
THEN
1515 iwork( ililo+window-1 ) = lilo
1518 ELSEIF( myrow.EQ.rsrc .AND. dir.EQ.1 )
THEN
1519 IF( bufflen.NE.0 .OR. ks.EQ.0 .OR.
1520 $ ( bufflen.EQ.0 .AND. ks.GT.0 ) )
1522 iwork( ilihi+window-1 ) = lihi
1523 IF( .NOT. lihi.GE.lilo+lsel )
THEN
1525 iwork( ililo+window-1 ) = lilo
1527 ELSEIF( mycol.EQ.csrc .AND. dir.EQ.2 )
THEN
1528 IF( bufflen.NE.0 .OR. ks.EQ.0 .OR.
1529 $ ( bufflen.EQ.0 .AND. ks.GT.0 ) )
1531 iwork( ilihi+window-1 ) = lihi
1532 IF( .NOT. lihi.GE.lilo+lsel )
THEN
1534 iwork( ililo+window-1 ) = lilo
1552 DO 113 window = 1, nmwin2
1553 rsrc = iwork( irsrc + window - 1 )
1554 IF( myrow.EQ.rsrc .AND. (.NOT. lihi.GE.lilo+lsel ) )
THEN
1555 lilo = iwork( ililo + window - 1 )
1556 lihi = iwork( ilihi + window - 1 )
1557 lsel = iwork( ilsel + window - 1 )
1563 IF( first ) first = .false.
1573 CALL blacs_barrier( ictxt,
'All' )
1580 $
CALL igamx2d( ictxt,
'All', top, 1, 1, ierr, 1, -1,
1583 IF( ierr.NE.0 )
THEN
1588 IF( myierr.NE.0 ) info =
max(1,i+kks-1)
1590 $
CALL igamx2d( ictxt,
'All', top, 1, 1, info, 1, -1,
1646 lastwait = nmwin2.GT.1 .AND. mod(nmwin2,2).EQ.1 .AND.
1647 $ nmwin2.EQ.
min(nprow,npcol)
1652 IF( last.EQ.0 )
THEN
1666 DO 310 window0 = win0s, win0e
1667 DO 320 window = window0, wine, 2
1672 rsrc4 = iwork(irsrc+window-1)
1673 csrc4 = iwork(icsrc+window-1)
1678 csrc3 = mod( csrc4 - 1 + npcol, npcol )
1679 rsrc2 = mod( rsrc4 - 1 + nprow, nprow )
1683 IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.
1684 $ ( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) .OR.
1685 $ ( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) .OR.
1686 $ ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) )
THEN
1698 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
1699 lihi4 = ( iwork( ililo + window - 1 ) +
1700 $ iwork( ilihi + window - 1 ) ) / 2
1701 lihic =
min(lihi4,(iceil(lihi4,nb)-1)*nb+wneicr)
1707 IF( (.NOT. lihic.LE.nb) .AND. lihic.LT.n )
THEN
1708 iloc = indxg2l( lihic+1, nb, myrow,
1709 $ desct( rsrc_ ), nprow )
1710 jloc = indxg2l( lihic, nb, mycol,
1711 $ desct( csrc_ ), npcol )
1712 IF( t( (jloc-1)*lldt+iloc ).NE.zero )
THEN
1713 IF( mod( lihic, nb ).EQ.1 .OR.
1714 $ ( mod( lihic, nb ).EQ.2 .AND.
1715 $
SELECT(lihic-2).EQ.0 ) )
1723 IF( rsrc4.NE.rsrc1 .OR. csrc4.NE.csrc1 )
1724 $
CALL igesd2d( ictxt, 1, 1, lihic, 1, rsrc1,
1726 IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 )
1727 $
CALL igesd2d( ictxt, 1, 1, lihic, 1, rsrc2,
1729 IF( rsrc4.NE.rsrc3 .OR. csrc4.NE.csrc3 )
1730 $
CALL igesd2d( ictxt, 1, 1, lihic, 1, rsrc3,
1733 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
1734 IF( rsrc4.NE.rsrc1 .OR. csrc4.NE.csrc1 )
1735 $
CALL igerv2d( ictxt, 1, 1, lihic, 1, rsrc4,
1738 IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 )
THEN
1739 IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 )
1740 $
CALL igerv2d( ictxt, 1, 1, lihic, 1, rsrc4,
1743 IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 )
THEN
1744 IF( rsrc4.NE.rsrc3 .OR. csrc4.NE.csrc3 )
1745 $
CALL igerv2d( ictxt, 1, 1, lihic, 1, rsrc4,
1754 skip1cr = window.EQ.1 .AND.
1755 $ iceil(lihic,nb).LE.iceil(ilo,nb)
1771 IF( .NOT. skip1cr )
THEN
1772 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
1773 IF( window.EQ.1 )
THEN
1776 lihi1 = iwork( ilihi + window - 2 )
1779 $
min( lihic-2*mod(lihic,nb) + 1,
1780 $ (iceil(lihic,nb)-1)*nb - 1 ) )
1781 iloc = indxg2l( i, nb, myrow, desct( rsrc_ ),
1783 jloc = indxg2l( i-1, nb, mycol, desct( csrc_ ),
1785 IF( t( (jloc-1)*lldt+iloc ).NE.zero )
1787 IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 )
1788 $
CALL igesd2d( ictxt, 1, 1, i, 1, rsrc4,
1790 IF( rsrc1.NE.rsrc2 .OR. csrc1.NE.csrc2 )
1791 $
CALL igesd2d( ictxt, 1, 1, i, 1, rsrc2,
1793 IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 )
1794 $
CALL igesd2d( ictxt, 1, 1, i, 1, rsrc3,
1797 IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 )
THEN
1798 IF( rsrc1.NE.rsrc2 .OR. csrc1.NE.csrc2 )
1799 $
CALL igerv2d( ictxt, 1, 1, i, 1, rsrc1,
1802 IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 )
THEN
1803 IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 )
1804 $
CALL igerv2d( ictxt, 1, 1, i, 1, rsrc1,
1807 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
1808 IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 )
1809 $
CALL igerv2d( ictxt, 1, 1, i, 1, rsrc1,
1819 nwin = lihic - i + 1
1824 IF( skip1cr )
GO TO 360
1830 CALL dlaset(
'All', nwin, nwin, zero, zero,
1831 $ work( ipw2 ), nwin )
1834 ipw3 = ipw2 + nwin*nwin
1841 IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 )
THEN
1842 ilen4 = mod(lihic,nb)
1843 seli4 = iceil(i,nb)*nb+1
1844 ilen1 = nwin - ilen4
1845 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
1846 CALL igesd2d( ictxt, ilen1, 1,
SELECT(i),
1847 $ ilen1, rsrc4, csrc4 )
1848 CALL igerv2d( ictxt, ilen4, 1,
SELECT(seli4),
1849 $ ilen4, rsrc4, csrc4 )
1851 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
1852 CALL igesd2d( ictxt, ilen4, 1,
SELECT(seli4),
1853 $ ilen4, rsrc1, csrc1 )
1854 CALL igerv2d( ictxt, ilen1, 1,
SELECT(i),
1855 $ ilen1, rsrc1, csrc1 )
1862 dim1 = nb - mod(i-1,nb)
1864 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
1865 iloc = indxg2l( i, nb, myrow, desct( rsrc_ ),
1867 jloc = indxg2l( i, nb, mycol, desct( csrc_ ),
1869 CALL dlamov(
'All', dim1, dim1,
1870 $ t((jloc-1)*lldt+iloc), lldt, work(ipw2),
1872 IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 )
THEN
1873 CALL dgesd2d( ictxt, dim1, dim1,
1874 $ work(ipw2), nwin, rsrc4, csrc4 )
1875 CALL dgerv2d( ictxt, dim4, dim4,
1876 $ work(ipw2+dim1*nwin+dim1), nwin, rsrc4,
1880 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
1881 iloc = indxg2l( i+dim1, nb, myrow, desct( rsrc_ ),
1883 jloc = indxg2l( i+dim1, nb, mycol, desct( csrc_ ),
1885 CALL dlamov(
'All', dim4, dim4,
1886 $ t((jloc-1)*lldt+iloc), lldt,
1887 $ work(ipw2+dim1*nwin+dim1), nwin )
1888 IF( rsrc4.NE.rsrc1 .OR. csrc4.NE.csrc1 )
THEN
1889 CALL dgesd2d( ictxt, dim4, dim4,
1890 $ work(ipw2+dim1*nwin+dim1), nwin, rsrc1,
1892 CALL dgerv2d( ictxt, dim1, dim1,
1893 $ work(ipw2), nwin, rsrc1, csrc1 )
1896 IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 )
THEN
1897 iloc = indxg2l( i, nb, myrow, desct( rsrc_ ),
1899 jloc = indxg2l( i+dim1, nb, mycol, desct( csrc_ ),
1901 CALL dlamov(
'All', dim1, dim4,
1902 $ t((jloc-1)*lldt+iloc), lldt,
1903 $ work(ipw2+dim1*nwin), nwin )
1904 IF( rsrc2.NE.rsrc1 .OR. csrc2.NE.csrc1 )
THEN
1905 CALL dgesd2d( ictxt, dim1, dim4,
1906 $ work(ipw2+dim1*nwin), nwin, rsrc1, csrc1 )
1909 IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 )
THEN
1910 IF( rsrc2.NE.rsrc4 .OR. csrc2.NE.csrc4 )
THEN
1911 CALL dgesd2d( ictxt, dim1, dim4,
1912 $ work(ipw2+dim1*nwin), nwin, rsrc4, csrc4 )
1915 IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 )
THEN
1916 iloc = indxg2l( i+dim1, nb, myrow, desct( rsrc_ ),
1918 jloc = indxg2l( i+dim1-1, nb, mycol,
1919 $ desct( csrc_ ), npcol )
1920 CALL dlamov(
'All', 1, 1,
1921 $ t((jloc-1)*lldt+iloc), lldt,
1922 $ work(ipw2+(dim1-1)*nwin+dim1), nwin )
1923 IF( rsrc3.NE.rsrc1 .OR. csrc3.NE.csrc1 )
THEN
1924 CALL dgesd2d( ictxt, 1, 1,
1925 $ work(ipw2+(dim1-1)*nwin+dim1), nwin,
1929 IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 )
THEN
1930 IF( rsrc3.NE.rsrc4 .OR. csrc3.NE.csrc4 )
THEN
1931 CALL dgesd2d( ictxt, 1, 1,
1932 $ work(ipw2+(dim1-1)*nwin+dim1), nwin,
1936 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
1937 IF( rsrc1.NE.rsrc2 .OR. csrc1.NE.csrc2 )
THEN
1938 CALL dgerv2d( ictxt, dim1, dim4,
1939 $ work(ipw2+dim1*nwin), nwin, rsrc2,
1942 IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 )
THEN
1943 CALL dgerv2d( ictxt, 1, 1,
1944 $ work(ipw2+(dim1-1)*nwin+dim1), nwin,
1948 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
1949 IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 )
THEN
1950 CALL dgerv2d( ictxt, dim1, dim4,
1951 $ work(ipw2+dim1*nwin), nwin, rsrc2,
1954 IF( rsrc4.NE.rsrc3 .OR. csrc4.NE.csrc3 )
THEN
1955 CALL dgerv2d( ictxt, 1, 1,
1956 $ work(ipw2+(dim1-1)*nwin+dim1), nwin,
1965 IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.
1966 $ ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) )
THEN
1972 swap =
SELECT( k ).NE.0
1973 IF( k.LT.lihic )
THEN
1974 elem = work(ipw2+(k-i)*nwin+k-i+1)
1987 nitraf = liwork - pitraf + 1
1988 ndtraf = lwork - pdtraf + 1
1989 CALL bdtrexc( nwin, work(ipw2), nwin,
1990 $ kk, kks, nitraf, iwork( pitraf ),
1991 $ ndtraf, work( pdtraf ),
1992 $ work(ipw1), ierr )
1993 pitraf = pitraf + nitraf
1994 pdtraf = pdtraf + ndtraf
1999 DO 340 j = i+kk-1, i+kks, -1
2000 SELECT(j+1) =
SELECT(j-1)
2005 DO 350 j = i+kk-1, i+kks, -1
2006 SELECT(j) =
SELECT(j-1)
2011 IF ( ierr.EQ.1 .OR. ierr.EQ.2 )
THEN
2013 IF ( ierr.EQ.2 )
THEN
2014 SELECT( i+kks-3 ) = 1
2015 SELECT( i+kks-1 ) = 0
2033 IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.
2034 $ ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) )
THEN
2041 ilen = pitraf - ipiw + 1
2042 dlen = pdtraf - ipw3 + 1
2049 IF( .NOT. skip1cr )
THEN
2050 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
2051 iloc = indxg2l( i, nb, myrow, desct( rsrc_ ),
2053 jloc = indxg2l( i, nb, mycol, desct( csrc_ ),
2055 CALL dlamov(
'All', dim1, dim1, work(ipw2),
2056 $ nwin, t((jloc-1)*lldt+iloc), lldt )
2058 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
2059 iloc = indxg2l( i+dim1, nb, myrow,
2060 $ desct( rsrc_ ), nprow )
2061 jloc = indxg2l( i+dim1, nb, mycol,
2062 $ desct( csrc_ ), npcol )
2063 CALL dlamov(
'All', dim4, dim4,
2064 $ work(ipw2+dim1*nwin+dim1), nwin,
2065 $ t((jloc-1)*lldt+iloc), lldt )
2076 IF( window.EQ.1 .AND. skip1cr )
GO TO 325
2080 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
2081 IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 )
THEN
2082 CALL dgesd2d( ictxt, 1, 1,
2083 $ work( ipw2+(dim1-1)*nwin+dim1 ), nwin,
2087 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
2088 IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 )
THEN
2089 CALL dgesd2d( ictxt, dim1, dim4,
2090 $ work( ipw2+dim1*nwin), nwin, rsrc2,
2094 IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 )
THEN
2095 iloc = indxg2l( i, nb, myrow, desct( rsrc_ ),
2097 jloc = indxg2l( i+dim1, nb, mycol,
2098 $ desct( csrc_ ), npcol )
2099 IF( rsrc2.NE.rsrc4 .OR. csrc2.NE.csrc4 )
THEN
2100 CALL dgerv2d( ictxt, dim1, dim4,
2101 $ work(ipw2+dim1*nwin), nwin, rsrc4, csrc4 )
2103 CALL dlamov(
'All', dim1, dim4,
2104 $ work( ipw2+dim1*nwin ), nwin,
2105 $ t((jloc-1)*lldt+iloc), lldt )
2107 IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 )
THEN
2108 iloc = indxg2l( i+dim1, nb, myrow,
2109 $ desct( rsrc_ ), nprow )
2110 jloc = indxg2l( i+dim1-1, nb, mycol,
2111 $ desct( csrc_ ), npcol )
2112 IF( rsrc3.NE.rsrc1 .OR. csrc3.NE.csrc1 )
THEN
2113 CALL dgerv2d( ictxt, 1, 1,
2114 $ work( ipw2+(dim1-1)*nwin+dim1 ), nwin,
2117 t((jloc-1)*lldt+iloc) =
2118 $ work( ipw2+(dim1-1)*nwin+dim1 )
2133 DO 321 window = window0, wine, 2
2134 rsrc4 = iwork(irsrc+window-1)
2135 csrc4 = iwork(icsrc+window-1)
2136 rsrc1 = mod( rsrc4 - 1 + nprow, nprow )
2137 csrc1 = mod( csrc4 - 1 + npcol, npcol )
2138 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
2139 IF( npcol.GT.1 .AND. dir.EQ.1 )
2140 $
CALL igebs2d( ictxt,
'Row', top, 8, 1,
2142 IF( nprow.GT.1 .AND. dir.EQ.2 )
2143 $
CALL igebs2d( ictxt,
'Col', top, 8, 1,
2145 skip1cr = window.EQ.1 .AND.
2146 $ iceil(lihic,nb).LE.iceil(ilo,nb)
2147 ELSEIF( myrow.EQ.rsrc1 .OR. mycol.EQ.csrc1 )
THEN
2148 IF( npcol.GT.1 .AND. dir.EQ.1 .AND.
2149 $ myrow.EQ.rsrc1 )
THEN
2150 CALL igebr2d( ictxt,
'Row', top, 8, 1,
2151 $ ibuff, 8, rsrc1, csrc1 )
2160 bufflen = ilen + dlen
2161 ipw3 = ipw2 + nwin*nwin
2162 dim1 = nb - mod(i-1,nb)
2164 lihic = nwin + i - 1
2165 skip1cr = window.EQ.1 .AND.
2166 $ iceil(lihic,nb).LE.iceil(ilo,nb)
2168 IF( nprow.GT.1 .AND. dir.EQ.2 .AND.
2169 $ mycol.EQ.csrc1 )
THEN
2170 CALL igebr2d( ictxt,
'Col', top, 8, 1,
2171 $ ibuff, 8, rsrc1, csrc1 )
2180 bufflen = ilen + dlen
2181 ipw3 = ipw2 + nwin*nwin
2182 dim1 = nb - mod(i-1,nb)
2184 lihic = nwin + i - 1
2185 skip1cr = window.EQ.1 .AND.
2186 $ iceil(lihic,nb).LE.iceil(ilo,nb)
2189 IF( rsrc1.NE.rsrc4 )
THEN
2190 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
2191 IF( npcol.GT.1 .AND. dir.EQ.1 )
2192 $
CALL igebs2d( ictxt,
'Row', top, 8, 1,
2194 skip1cr = window.EQ.1 .AND.
2195 $ iceil(lihic,nb).LE.iceil(ilo,nb)
2196 ELSEIF( myrow.EQ.rsrc4 )
THEN
2197 IF( npcol.GT.1 .AND. dir.EQ.1 )
THEN
2198 CALL igebr2d( ictxt,
'Row', top, 8, 1,
2199 $ ibuff, 8, rsrc4, csrc4 )
2208 bufflen = ilen + dlen
2209 ipw3 = ipw2 + nwin*nwin
2210 dim1 = nb - mod(i-1,nb)
2212 lihic = nwin + i - 1
2213 skip1cr = window.EQ.1 .AND.
2214 $ iceil(lihic,nb).LE.iceil(ilo,nb)
2218 IF( csrc1.NE.csrc4 )
THEN
2219 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
2220 IF( nprow.GT.1 .AND. dir.EQ.2 )
2221 $
CALL igebs2d( ictxt,
'Col', top, 8, 1,
2223 skip1cr = window.EQ.1 .AND.
2224 $ iceil(lihic,nb).LE.iceil(ilo,nb)
2225 ELSEIF( mycol.EQ.csrc4 )
THEN
2226 IF( nprow.GT.1 .AND. dir.EQ.2 )
THEN
2227 CALL igebr2d( ictxt,
'Col', top, 8, 1,
2228 $ ibuff, 8, rsrc4, csrc4 )
2237 bufflen = ilen + dlen
2238 ipw3 = ipw2 + nwin*nwin
2239 dim1 = nb - mod(i-1,nb)
2241 lihic = nwin + i - 1
2242 skip1cr = window.EQ.1 .AND.
2243 $ iceil(lihic,nb).LE.iceil(ilo,nb)
2250 IF( skip1cr )
GO TO 326
2254 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
2256 bufflen = dlen + ilen
2257 IF( (nprow.GT.1 .AND. dir.EQ.2) .OR.
2258 $ (npcol.GT.1 .AND. dir.EQ.1) )
THEN
2259 DO 370 indx = 1, ilen
2260 work( buffer+indx-1 ) =
2261 $ dble( iwork(ipiw+indx-1) )
2263 CALL dlamov(
'All', dlen, 1, work( ipw3 ),
2264 $ dlen, work(buffer+ilen), dlen )
2266 IF( npcol.GT.1 .AND. dir.EQ.1 )
THEN
2267 CALL dgebs2d( ictxt,
'Row', top, bufflen, 1,
2268 $ work(buffer), bufflen )
2270 IF( nprow.GT.1 .AND. dir.EQ.2 )
THEN
2271 CALL dgebs2d( ictxt,
'Col', top, bufflen, 1,
2272 $ work(buffer), bufflen )
2274 ELSEIF( myrow.EQ.rsrc1 .OR. mycol.EQ.csrc1 )
THEN
2275 IF( npcol.GT.1 .AND. dir.EQ.1 .AND.
2276 $ myrow.EQ.rsrc1 )
THEN
2278 bufflen = dlen + ilen
2279 CALL dgebr2d( ictxt,
'Row', top, bufflen, 1,
2280 $ work(buffer), bufflen, rsrc1, csrc1 )
2282 IF( nprow.GT.1 .AND. dir.EQ.2 .AND.
2283 $ mycol.EQ.csrc1 )
THEN
2285 bufflen = dlen + ilen
2286 CALL dgebr2d( ictxt,
'Col', top, bufflen, 1,
2287 $ work(buffer), bufflen, rsrc1, csrc1 )
2289 IF( (npcol.GT.1.AND.dir.EQ.1.AND.myrow.EQ.rsrc1)
2290 $ .OR. (nprow.GT.1.AND.dir.EQ.2.AND.
2291 $ mycol.EQ.csrc1) )
THEN
2292 DO 380 indx = 1, ilen
2293 iwork(ipiw+indx-1) =
2294 $ int( work( buffer+indx-1 ) )
2296 CALL dlamov(
'All', dlen, 1,
2297 $ work( buffer+ilen ), dlen,
2298 $ work( ipw3 ), dlen )
2301 IF( rsrc1.NE.rsrc4 )
THEN
2302 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
2304 bufflen = dlen + ilen
2305 IF( npcol.GT.1 .AND. dir.EQ.1 )
THEN
2306 DO 390 indx = 1, ilen
2307 work( buffer+indx-1 ) =
2308 $ dble( iwork(ipiw+indx-1) )
2310 CALL dlamov(
'All', dlen, 1, work( ipw3 ),
2311 $ dlen, work(buffer+ilen), dlen )
2312 CALL dgebs2d( ictxt,
'Row', top, bufflen,
2313 $ 1, work(buffer), bufflen )
2315 ELSEIF( myrow.EQ.rsrc4 .AND. dir.EQ.1 .AND.
2318 bufflen = dlen + ilen
2319 CALL dgebr2d( ictxt,
'Row', top, bufflen,
2320 $ 1, work(buffer), bufflen, rsrc4, csrc4 )
2321 DO 400 indx = 1, ilen
2322 iwork(ipiw+indx-1) =
2323 $ int( work( buffer+indx-1 ) )
2325 CALL dlamov(
'All', dlen, 1,
2326 $ work( buffer+ilen ), dlen,
2327 $ work( ipw3 ), dlen )
2330 IF( csrc1.NE.csrc4 )
THEN
2331 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
2333 bufflen = dlen + ilen
2334 IF( nprow.GT.1 .AND. dir.EQ.2 )
THEN
2335 DO 395 indx = 1, ilen
2336 work( buffer+indx-1 ) =
2337 $ dble( iwork(ipiw+indx-1) )
2339 CALL dlamov(
'All', dlen, 1, work( ipw3 ),
2340 $ dlen, work(buffer+ilen), dlen )
2341 CALL dgebs2d( ictxt,
'Col', top, bufflen,
2342 $ 1, work(buffer), bufflen )
2344 ELSEIF( mycol.EQ.csrc4 .AND. dir.EQ.2 .AND.
2347 bufflen = dlen + ilen
2348 CALL dgebr2d( ictxt,
'Col', top, bufflen, 1,
2349 $ work(buffer), bufflen, rsrc4, csrc4 )
2350 DO 402 indx = 1, ilen
2351 iwork(ipiw+indx-1) =
2352 $ int( work( buffer+indx-1 ) )
2354 CALL dlamov(
'All', dlen, 1,
2355 $ work( buffer+ilen ), dlen,
2356 $ work( ipw3 ), dlen )
2366 DO 322 window = window0, wine, 2
2367 IF( window.EQ.1 .AND. skip1cr )
GO TO 327
2368 rsrc4 = iwork(irsrc+window-1)
2369 csrc4 = iwork(icsrc+window-1)
2370 rsrc1 = mod( rsrc4 - 1 + nprow, nprow )
2371 csrc1 = mod( csrc4 - 1 + npcol, npcol )
2383 IF( ((mycol.EQ.csrc1.OR.mycol.EQ.csrc4).AND.dir.EQ.2)
2384 $ .OR. ((myrow.EQ.rsrc1.OR.myrow.EQ.rsrc4).AND.
2389 qrows = numroc( n, nb, myrow, descq( rsrc_ ),
2394 trows = numroc( i-1, nb, myrow, desct( rsrc_ ),
2401 tcols = numroc( n - (i+dim1-1), nb, mycol,
2403 IF( mycol.EQ.csrc4 ) tcols = tcols - dim4
2407 ipw5 = ipw4 + nwin*nwin
2408 ipw6 = ipw5 + trows * nwin
2410 ipw7 = ipw6 + nwin * tcols
2411 ipw8 = ipw7 + qrows * nwin
2413 ipw8 = ipw6 + nwin * tcols
2421 IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 )
THEN
2422 DO 410 indx = 1, nprow
2423 IF( mycol.EQ.csrc1 )
THEN
2424 CALL infog2l( 1+(indx-1)*nb, i, desct,
2425 $ nprow, npcol, myrow, mycol, iloc,
2426 $ jloc1, rsrc, csrc1 )
2427 IF( myrow.EQ.rsrc )
THEN
2428 CALL dlamov(
'All', trows, dim1,
2429 $ t((jloc1-1)*lldt+iloc), lldt,
2430 $ work(ipw5), trows )
2431 IF( npcol.GT.1 )
THEN
2432 east = mod( mycol + 1, npcol )
2433 CALL dgesd2d( ictxt, trows, dim1,
2434 $ work(ipw5), trows, rsrc,
2436 CALL dgerv2d( ictxt, trows, dim4,
2437 $ work(ipw5+trows*dim1), trows,
2442 IF( mycol.EQ.csrc4 )
THEN
2443 CALL infog2l( 1+(indx-1)*nb, i+dim1,
2444 $ desct, nprow, npcol, myrow, mycol,
2445 $ iloc, jloc4, rsrc, csrc4 )
2446 IF( myrow.EQ.rsrc )
THEN
2447 CALL dlamov(
'All', trows, dim4,
2448 $ t((jloc4-1)*lldt+iloc), lldt,
2449 $ work(ipw5+trows*dim1), trows )
2450 IF( npcol.GT.1 )
THEN
2451 west = mod( mycol-1+npcol, npcol )
2452 CALL dgesd2d( ictxt, trows, dim4,
2453 $ work(ipw5+trows*dim1), trows,
2455 CALL dgerv2d( ictxt, trows, dim1,
2456 $ work(ipw5), trows, rsrc,
2466 IF( myrow.EQ.rsrc1 .OR. myrow.EQ.rsrc4 )
THEN
2467 DO 420 indx = 1, npcol
2468 IF( myrow.EQ.rsrc1 )
THEN
2469 IF( indx.EQ.1 )
THEN
2470 CALL infog2l( i, lihic+1, desct, nprow,
2471 $ npcol, myrow, mycol, iloc1, jloc,
2475 $ (iceil(lihic,nb)+(indx-2))*nb+1,
2476 $ desct, nprow, npcol, myrow, mycol,
2477 $ iloc1, jloc, rsrc1, csrc )
2479 IF( mycol.EQ.csrc )
THEN
2480 CALL dlamov(
'All', dim1, tcols,
2481 $ t((jloc-1)*lldt+iloc1), lldt,
2482 $ work(ipw6), nwin )
2483 IF( nprow.GT.1 )
THEN
2484 south = mod( myrow + 1, nprow )
2485 CALL dgesd2d( ictxt, dim1, tcols,
2486 $ work(ipw6), nwin, south,
2488 CALL dgerv2d( ictxt, dim4, tcols,
2489 $ work(ipw6+dim1), nwin, south,
2494 IF( myrow.EQ.rsrc4 )
THEN
2495 IF( indx.EQ.1 )
THEN
2496 CALL infog2l( i+dim1, lihic+1, desct,
2497 $ nprow, npcol, myrow, mycol, iloc4,
2498 $ jloc, rsrc4, csrc )
2501 $ (iceil(lihic,nb)+(indx-2))*nb+1,
2502 $ desct, nprow, npcol, myrow, mycol,
2503 $ iloc4, jloc, rsrc4, csrc )
2505 IF( mycol.EQ.csrc )
THEN
2506 CALL dlamov(
'All', dim4, tcols,
2507 $ t((jloc-1)*lldt+iloc4), lldt,
2508 $ work(ipw6+dim1), nwin )
2509 IF( nprow.GT.1 )
THEN
2510 north = mod( myrow-1+nprow, nprow )
2511 CALL dgesd2d( ictxt, dim4, tcols,
2512 $ work(ipw6+dim1), nwin, north,
2514 CALL dgerv2d( ictxt, dim1, tcols,
2515 $ work(ipw6), nwin, north,
2526 IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 )
THEN
2527 DO 430 indx = 1, nprow
2528 IF( mycol.EQ.csrc1 )
THEN
2529 CALL infog2l( 1+(indx-1)*nb, i, descq,
2530 $ nprow, npcol, myrow, mycol, iloc,
2531 $ jloc1, rsrc, csrc1 )
2532 IF( myrow.EQ.rsrc )
THEN
2533 CALL dlamov(
'All', qrows, dim1,
2534 $ q((jloc1-1)*lldq+iloc), lldq,
2535 $ work(ipw7), qrows )
2536 IF( npcol.GT.1 )
THEN
2537 east = mod( mycol + 1, npcol )
2538 CALL dgesd2d( ictxt, qrows, dim1,
2539 $ work(ipw7), qrows, rsrc,
2541 CALL dgerv2d( ictxt, qrows, dim4,
2542 $ work(ipw7+qrows*dim1),
2543 $ qrows, rsrc, east )
2547 IF( mycol.EQ.csrc4 )
THEN
2548 CALL infog2l( 1+(indx-1)*nb, i+dim1,
2549 $ descq, nprow, npcol, myrow, mycol,
2550 $ iloc, jloc4, rsrc, csrc4 )
2551 IF( myrow.EQ.rsrc )
THEN
2552 CALL dlamov(
'All', qrows, dim4,
2553 $ q((jloc4-1)*lldq+iloc), lldq,
2554 $ work(ipw7+qrows*dim1), qrows )
2555 IF( npcol.GT.1 )
THEN
2556 west = mod( mycol-1+npcol,
2558 CALL dgesd2d( ictxt, qrows, dim4,
2559 $ work(ipw7+qrows*dim1),
2560 $ qrows, rsrc, west )
2561 CALL dgerv2d( ictxt, qrows, dim1,
2562 $ work(ipw7), qrows, rsrc,
2576 DO 323 window = window0, wine, 2
2577 rsrc4 = iwork(irsrc+window-1)
2578 csrc4 = iwork(icsrc+window-1)
2579 rsrc1 = mod( rsrc4 - 1 + nprow, nprow )
2580 csrc1 = mod( csrc4 - 1 + npcol, npcol )
2582 IF( ((mycol.EQ.csrc1.OR.mycol.EQ.csrc4).AND.dir.EQ.2)
2583 $ .OR. ((myrow.EQ.rsrc1.OR.myrow.EQ.rsrc4).AND.
2588 IF( window.EQ.1 .AND. skip1cr )
GO TO 328
2594 nitraf = pitraf - ipiw
2596 DO 405 k = 1, nitraf
2597 IF( iwork( ipiw + k - 1 ).LE.nwin )
THEN
2607 IF( flops.NE.0 .AND.
2608 $ ( 2*flops*100 )/( 2*nwin*nwin ) .GE. mmult )
2611 CALL dlaset(
'All', nwin, nwin, zero, one,
2612 $ work( ipw4 ), nwin )
2613 work(ipw8) = dble(myrow)
2614 work(ipw8+1) = dble(mycol)
2615 CALL bdlaapp( 1, nwin, nwin, ncb, work( ipw4 ),
2616 $ nwin, nitraf, iwork(ipiw), work( ipw3 ),
2622 IF( ishh .OR. dim1.NE.ks .OR. dim4.NE.ks )
THEN
2628 DO 440 indx = 1,
min(i-1,1+(nprow-1)*nb),
2630 IF( mycol.EQ.csrc1 )
THEN
2631 CALL infog2l( indx, i, desct, nprow,
2632 $ npcol, myrow, mycol, iloc,
2633 $ jloc, rsrc, csrc1 )
2634 IF( myrow.EQ.rsrc )
THEN
2635 CALL dgemm(
'No transpose',
2636 $
'No transpose', trows, dim1,
2637 $ nwin, one, work( ipw5 ),
2638 $ trows, work( ipw4 ), nwin,
2639 $ zero, work(ipw8), trows )
2640 CALL dlamov(
'All', trows, dim1,
2641 $ work(ipw8), trows,
2642 $ t((jloc-1)*lldt+iloc),
2646 IF( mycol.EQ.csrc4 )
THEN
2647 CALL infog2l( indx, i+dim1, desct,
2648 $ nprow, npcol, myrow, mycol,
2649 $ iloc, jloc, rsrc, csrc4 )
2650 IF( myrow.EQ.rsrc )
THEN
2651 CALL dgemm(
'No transpose',
2652 $
'No transpose', trows, dim4,
2653 $ nwin, one, work( ipw5 ),
2655 $ work( ipw4+nwin*dim1 ),
2656 $ nwin, zero, work(ipw8),
2658 CALL dlamov(
'All', trows, dim4,
2659 $ work(ipw8), trows,
2660 $ t((jloc-1)*lldt+iloc),
2667 DO 450 indx = 1,
min(n,1+(nprow-1)*nb),
2669 IF( mycol.EQ.csrc1 )
THEN
2671 $ nprow, npcol, myrow, mycol,
2672 $ iloc, jloc, rsrc, csrc1 )
2673 IF( myrow.EQ.rsrc )
THEN
2674 CALL dgemm(
'No transpose',
2675 $
'No transpose', qrows,
2677 $ work( ipw7 ), qrows,
2678 $ work( ipw4 ), nwin,
2681 CALL dlamov(
'All', qrows,
2682 $ dim1, work(ipw8), qrows,
2683 $ q((jloc-1)*lldq+iloc),
2687 IF( mycol.EQ.csrc4 )
THEN
2689 $ descq, nprow, npcol, myrow,
2690 $ mycol, iloc, jloc, rsrc,
2692 IF( myrow.EQ.rsrc )
THEN
2693 CALL dgemm(
'No transpose',
2694 $
'No transpose', qrows,
2696 $ work( ipw7 ), qrows,
2697 $ work( ipw4+nwin*dim1 ),
2698 $ nwin, zero, work(ipw8),
2700 CALL dlamov(
'All', qrows,
2701 $ dim4, work(ipw8), qrows,
2702 $ q((jloc-1)*lldq+iloc),
2714 IF ( lihic.LT.n )
THEN
2715 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc4
2716 $ .AND.mod(lihic,nb).NE.0 )
THEN
2718 CALL infog2l( i, indx, desct, nprow,
2719 $ npcol, myrow, mycol, iloc,
2720 $ jloc, rsrc1, csrc4 )
2721 CALL dgemm(
'Transpose',
2722 $
'No Transpose', dim1, tcols,
2723 $ nwin, one, work(ipw4), nwin,
2724 $ work( ipw6 ), nwin, zero,
2725 $ work(ipw8), dim1 )
2726 CALL dlamov(
'All', dim1, tcols,
2728 $ t((jloc-1)*lldt+iloc), lldt )
2730 IF( myrow.EQ.rsrc4.AND.mycol.EQ.csrc4
2731 $ .AND.mod(lihic,nb).NE.0 )
THEN
2733 CALL infog2l( i+dim1, indx, desct,
2734 $ nprow, npcol, myrow, mycol,
2735 $ iloc, jloc, rsrc4, csrc4 )
2736 CALL dgemm(
'Transpose',
2737 $
'No Transpose', dim4, tcols,
2739 $ work( ipw4+dim1*nwin ), nwin,
2740 $ work( ipw6), nwin, zero,
2741 $ work(ipw8), dim4 )
2742 CALL dlamov(
'All', dim4, tcols,
2744 $ t((jloc-1)*lldt+iloc), lldt )
2746 indxs = iceil(lihic,nb)*nb + 1
2747 indxe =
min(n,indxs+(npcol-2)*nb)
2748 DO 460 indx = indxs, indxe, nb
2749 IF( myrow.EQ.rsrc1 )
THEN
2751 $ nprow, npcol, myrow, mycol,
2752 $ iloc, jloc, rsrc1, csrc )
2753 IF( mycol.EQ.csrc )
THEN
2754 CALL dgemm(
'Transpose',
2755 $
'No Transpose', dim1,
2757 $ work( ipw4 ), nwin,
2758 $ work( ipw6 ), nwin,
2759 $ zero, work(ipw8), dim1 )
2760 CALL dlamov(
'All', dim1,
2761 $ tcols, work(ipw8), dim1,
2762 $ t((jloc-1)*lldt+iloc),
2766 IF( myrow.EQ.rsrc4 )
THEN
2768 $ desct, nprow, npcol, myrow,
2769 $ mycol, iloc, jloc, rsrc4,
2771 IF( mycol.EQ.csrc )
THEN
2772 CALL dgemm(
'Transpose',
2773 $
'No Transpose', dim4,
2775 $ work( ipw4+nwin*dim1 ),
2776 $ nwin, work( ipw6 ),
2777 $ nwin, zero, work(ipw8),
2779 CALL dlamov(
'All', dim4,
2780 $ tcols, work(ipw8), dim4,
2781 $ t((jloc-1)*lldt+iloc),
2822 indxe =
min(i-1,1+(nprow-1)*nb)
2823 DO 470 indx = 1, indxe, nb
2824 IF( mycol.EQ.csrc1 )
THEN
2825 CALL infog2l( indx, i, desct, nprow,
2826 $ npcol, myrow, mycol, iloc,
2827 $ jloc, rsrc, csrc1 )
2828 IF( myrow.EQ.rsrc )
THEN
2829 CALL dlamov(
'All', trows, ks,
2830 $ work( ipw5+trows*dim4),
2831 $ trows, work(ipw8), trows )
2832 CALL dtrmm(
'Right',
'Upper',
2834 $
'Non-unit', trows, ks,
2835 $ one, work( ipw4+dim4 ),
2836 $ nwin, work(ipw8), trows )
2837 CALL dgemm(
'No transpose',
2838 $
'No transpose', trows, ks,
2839 $ dim4, one, work( ipw5 ),
2840 $ trows, work( ipw4 ), nwin,
2841 $ one, work(ipw8), trows )
2842 CALL dlamov(
'All', trows, ks,
2843 $ work(ipw8), trows,
2844 $ t((jloc-1)*lldt+iloc),
2852 IF( mycol.EQ.csrc4 )
THEN
2853 CALL infog2l( indx, i+dim1, desct,
2854 $ nprow, npcol, myrow, mycol,
2855 $ iloc, jloc, rsrc, csrc4 )
2856 IF( myrow.EQ.rsrc )
THEN
2857 CALL dlamov(
'All', trows, dim4,
2858 $ work(ipw5), trows,
2859 $ work( ipw8 ), trows )
2860 CALL dtrmm(
'Right',
'Lower',
2862 $
'Non-unit', trows, dim4,
2863 $ one, work( ipw4+nwin*ks ),
2864 $ nwin, work( ipw8 ), trows )
2865 CALL dgemm(
'No transpose',
2866 $
'No transpose', trows, dim4,
2868 $ work( ipw5+trows*dim4),
2870 $ work( ipw4+nwin*ks+dim4 ),
2871 $ nwin, one, work( ipw8 ),
2873 CALL dlamov(
'All', trows, dim4,
2874 $ work(ipw8), trows,
2875 $ t((jloc-1)*lldt+iloc),
2885 indxe =
min(n,1+(nprow-1)*nb)
2886 DO 480 indx = 1, indxe, nb
2887 IF( mycol.EQ.csrc1 )
THEN
2889 $ nprow, npcol, myrow, mycol,
2890 $ iloc, jloc, rsrc, csrc1 )
2891 IF( myrow.EQ.rsrc )
THEN
2892 CALL dlamov(
'All', qrows, ks,
2893 $ work( ipw7+qrows*dim4),
2894 $ qrows, work(ipw8),
2896 CALL dtrmm(
'Right',
'Upper',
2898 $
'Non-unit', qrows,
2900 $ work( ipw4+dim4 ), nwin,
2901 $ work(ipw8), qrows )
2902 CALL dgemm(
'No transpose',
2903 $
'No transpose', qrows,
2905 $ work( ipw7 ), qrows,
2906 $ work( ipw4 ), nwin, one,
2907 $ work(ipw8), qrows )
2908 CALL dlamov(
'All', qrows, ks,
2909 $ work(ipw8), qrows,
2910 $ q((jloc-1)*lldq+iloc),
2918 IF( mycol.EQ.csrc4 )
THEN
2920 $ descq, nprow, npcol, myrow,
2921 $ mycol, iloc, jloc, rsrc,
2923 IF( myrow.EQ.rsrc )
THEN
2924 CALL dlamov(
'All', qrows,
2925 $ dim4, work(ipw7), qrows,
2926 $ work( ipw8 ), qrows )
2927 CALL dtrmm(
'Right',
'Lower',
2929 $
'Non-unit', qrows,
2931 $ work( ipw4+nwin*ks ),
2932 $ nwin, work( ipw8 ),
2934 CALL dgemm(
'No transpose',
2935 $
'No transpose', qrows,
2937 $ work(ipw7+qrows*(dim4)),
2939 $ work(ipw4+nwin*ks+dim4),
2940 $ nwin, one, work( ipw8 ),
2942 CALL dlamov(
'All', qrows,
2943 $ dim4, work(ipw8), qrows,
2944 $ q((jloc-1)*lldq+iloc),
2953 IF ( lihic.LT.n )
THEN
2958 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc4
2959 $ .AND.mod(lihic,nb).NE.0 )
THEN
2961 CALL infog2l( i, indx, desct, nprow,
2962 $ npcol, myrow, mycol, iloc,
2963 $ jloc, rsrc1, csrc4 )
2964 CALL dlamov(
'All', ks, tcols,
2965 $ work( ipw6+dim4 ), nwin,
2967 CALL dtrmm(
'Left',
'Upper',
2968 $
'Transpose',
'Non-unit',
2970 $ work( ipw4+dim4 ), nwin,
2972 CALL dgemm(
'Transpose',
2973 $
'No transpose', ks, tcols,
2974 $ dim4, one, work(ipw4), nwin,
2975 $ work(ipw6), nwin, one,
2977 CALL dlamov(
'All', ks, tcols,
2979 $ t((jloc-1)*lldt+iloc), lldt )
2985 IF( myrow.EQ.rsrc4.AND.mycol.EQ.csrc4
2986 $ .AND.mod(lihic,nb).NE.0 )
THEN
2988 CALL infog2l( i+dim1, indx, desct,
2989 $ nprow, npcol, myrow, mycol,
2990 $ iloc, jloc, rsrc4, csrc4 )
2991 CALL dlamov(
'All', dim4, tcols,
2992 $ work( ipw6 ), nwin,
2993 $ work( ipw8 ), dim4 )
2994 CALL dtrmm(
'Left',
'Lower',
2995 $
'Transpose',
'Non-unit',
2997 $ work( ipw4+nwin*ks ), nwin,
2998 $ work( ipw8 ), dim4 )
2999 CALL dgemm(
'Transpose',
3000 $
'No Transpose', dim4, tcols,
3002 $ work( ipw4+nwin*ks+dim4 ),
3003 $ nwin, work( ipw6+dim1 ), nwin,
3004 $ one, work( ipw8), dim4 )
3005 CALL dlamov(
'All', dim4, tcols,
3007 $ t((jloc-1)*lldt+iloc), lldt )
3013 indxs = iceil(lihic,nb)*nb+1
3014 indxe =
min(n,indxs+(npcol-2)*nb)
3015 DO 490 indx = indxs, indxe, nb
3016 IF( myrow.EQ.rsrc1 )
THEN
3018 $ nprow, npcol, myrow, mycol,
3019 $ iloc, jloc, rsrc1, csrc )
3020 IF( mycol.EQ.csrc )
THEN
3021 CALL dlamov(
'All', ks, tcols,
3022 $ work( ipw6+dim4 ), nwin,
3024 CALL dtrmm(
'Left',
'Upper',
3028 $ work( ipw4+dim4 ), nwin,
3030 CALL dgemm(
'Transpose',
3031 $
'No transpose', ks,
3034 $ work(ipw6), nwin, one,
3036 CALL dlamov(
'All', ks, tcols,
3038 $ t((jloc-1)*lldt+iloc),
3046 IF( myrow.EQ.rsrc4 )
THEN
3048 $ desct, nprow, npcol, myrow,
3049 $ mycol, iloc, jloc, rsrc4,
3051 IF( mycol.EQ.csrc )
THEN
3052 CALL dlamov(
'All', dim4,
3053 $ tcols, work( ipw6 ),
3054 $ nwin, work( ipw8 ),
3056 CALL dtrmm(
'Left',
'Lower',
3060 $ work( ipw4+nwin*ks ),
3061 $ nwin, work( ipw8 ),
3063 CALL dgemm(
'Transpose',
3064 $
'No Transpose', dim4,
3066 $ work(ipw4+nwin*ks+dim4),
3067 $ nwin, work( ipw6+dim1 ),
3068 $ nwin, one, work( ipw8),
3070 CALL dlamov(
'All', dim4,
3071 $ tcols, work(ipw8), dim4,
3072 $ t((jloc-1)*lldt+iloc),
3080 ELSEIF( flops.NE.0 )
THEN
3095 indxe =
min(i-1,1+(nprow-1)*nb)
3096 DO 500 indx = 1, indxe, nb
3097 CALL infog2l( indx, i, desct, nprow,
3098 $ npcol, myrow, mycol, iloc, jloc,
3100 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
3102 CALL bdlaapp( 1, trows, nwin, ncb,
3103 $ work(ipw5), trows, nitraf,
3104 $ iwork(ipiw), work( ipw3 ),
3106 CALL dlamov(
'All', trows, dim1,
3107 $ work(ipw5), trows,
3108 $ t((jloc-1)*lldt+iloc ), lldt )
3110 CALL infog2l( indx, i+dim1, desct, nprow,
3111 $ npcol, myrow, mycol, iloc, jloc,
3113 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
3116 $
CALL bdlaapp( 1, trows, nwin, ncb,
3117 $ work(ipw5), trows, nitraf,
3118 $ iwork(ipiw), work( ipw3 ),
3120 CALL dlamov(
'All', trows, dim4,
3121 $ work(ipw5+trows*dim1), trows,
3122 $ t((jloc-1)*lldt+iloc ), lldt )
3126 indxe =
min(n,1+(nprow-1)*nb)
3127 DO 510 indx = 1, indxe, nb
3128 CALL infog2l( indx, i, descq, nprow,
3129 $ npcol, myrow, mycol, iloc, jloc,
3131 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
3133 CALL bdlaapp( 1, qrows, nwin, ncb,
3134 $ work(ipw7), qrows, nitraf,
3135 $ iwork(ipiw), work( ipw3 ),
3137 CALL dlamov(
'All', qrows, dim1,
3138 $ work(ipw7), qrows,
3139 $ q((jloc-1)*lldq+iloc ), lldq )
3141 CALL infog2l( indx, i+dim1, descq,
3142 $ nprow, npcol, myrow, mycol, iloc,
3143 $ jloc, rsrc, csrc )
3144 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
3147 $
CALL bdlaapp( 1, qrows, nwin,
3148 $ ncb, work(ipw7), qrows,
3149 $ nitraf, iwork(ipiw),
3150 $ work( ipw3 ), work(ipw8) )
3151 CALL dlamov(
'All', qrows, dim4,
3152 $ work(ipw7+qrows*dim1), qrows,
3153 $ q((jloc-1)*lldq+iloc ), lldq )
3160 IF( lihic.LT.n )
THEN
3162 CALL infog2l( i, indx, desct, nprow,
3163 $ npcol, myrow, mycol, iloc, jloc,
3165 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc.AND.
3166 $ mod(lihic,nb).NE.0 )
THEN
3167 CALL bdlaapp( 0, nwin, tcols, ncb,
3168 $ work( ipw6 ), nwin, nitraf,
3169 $ iwork(ipiw), work( ipw3 ),
3171 CALL dlamov(
'All', dim1, tcols,
3172 $ work( ipw6 ), nwin,
3173 $ t((jloc-1)*lldt+iloc), lldt )
3175 CALL infog2l( i+dim1, indx, desct, nprow,
3176 $ npcol, myrow, mycol, iloc, jloc,
3178 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc.AND.
3179 $ mod(lihic,nb).NE.0 )
THEN
3181 $
CALL bdlaapp( 0, nwin, tcols, ncb,
3182 $ work( ipw6 ), nwin, nitraf,
3183 $ iwork(ipiw), work( ipw3 ),
3185 CALL dlamov(
'All', dim4, tcols,
3186 $ work( ipw6+dim1 ), nwin,
3187 $ t((jloc-1)*lldt+iloc), lldt )
3189 indxs = iceil(lihic,nb)*nb + 1
3190 indxe =
min(n,indxs+(npcol-2)*nb)
3191 DO 520 indx = indxs, indxe, nb
3192 CALL infog2l( i, indx, desct, nprow,
3193 $ npcol, myrow, mycol, iloc, jloc,
3195 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
3197 CALL bdlaapp( 0, nwin, tcols, ncb,
3198 $ work(ipw6), nwin, nitraf,
3199 $ iwork(ipiw), work( ipw3 ),
3201 CALL dlamov(
'All', dim1, tcols,
3202 $ work( ipw6 ), nwin,
3203 $ t((jloc-1)*lldt+iloc), lldt )
3205 CALL infog2l( i+dim1, indx, desct,
3206 $ nprow, npcol, myrow, mycol, iloc,
3207 $ jloc, rsrc, csrc )
3208 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
3211 $
CALL bdlaapp( 0, nwin, tcols,
3212 $ ncb, work(ipw6), nwin, nitraf,
3213 $ iwork(ipiw), work( ipw3 ),
3215 CALL dlamov(
'All', dim4, tcols,
3216 $ work( ipw6+dim1 ), nwin,
3217 $ t((jloc-1)*lldt+iloc), lldt )
3238 IF( lastwait .AND. last.LT.2 )
GO TO 308
3242 CALL blacs_barrier( ictxt,
'All' )
3249 $
CALL igamx2d( ictxt,
'All', top, 1, 1, ierr, 1, -1,
3252 IF( ierr.NE.0 )
THEN
3257 IF( myierr.NE.0 ) info =
max(1,i+kks-1)
3259 $
CALL igamx2d( ictxt,
'All', top, 1, 1, info, 1, -1,
3267 rsrc = indxg2p( k, nb, myrow, desct( rsrc_ ), nprow )
3268 csrc = indxg2p( k, nb, mycol, desct( csrc_ ), npcol )
3269 IF( myrow.NE.rsrc .OR. mycol.NE.csrc )
3273 $
CALL igsum2d( ictxt,
'All', top, n, 1,
SELECT, n, -1, -1 )
3281 IF(
SELECT(ilo).NE.0 )
GO TO 523
3287 IF(
SELECT(ihi).EQ.0 )
GO TO 527
3299 IF( info.NE.0 )
THEN
3301 rsrc = indxg2p( k, nb, myrow, desct( rsrc_ ), nprow )
3302 csrc = indxg2p( k, nb, mycol, desct( csrc_ ), npcol )
3303 IF( myrow.NE.rsrc .OR. mycol.NE.csrc )
3307 $
CALL igsum2d( ictxt,
'All', top, n, 1,
SELECT, n, -1, -1 )
3332 IF( .NOT. pair )
THEN
3333 border = ( k.NE.n .AND. mod( k, nb ).EQ.0 ) .OR.
3334 % ( k.NE.1 .AND. mod( k, nb ).EQ.1 )
3335 IF( .NOT. border )
THEN
3336 CALL infog2l( k, k, desct, nprow, npcol, myrow, mycol,
3337 $ iloc1, jloc1, trsrc1, tcsrc1 )
3338 IF( myrow.EQ.trsrc1 .AND. mycol.EQ.tcsrc1 )
THEN
3339 elem1 = t((jloc1-1)*lldt+iloc1)
3341 elem3 = t((jloc1-1)*lldt+iloc1+1)
3345 IF( elem3.NE.zero )
THEN
3346 elem2 = t((jloc1)*lldt+iloc1)
3347 elem4 = t((jloc1)*lldt+iloc1+1)
3348 CALL dlanv2( elem1, elem2, elem3, elem4,
3349 $ wr( k ), wi( k ), wr( k+1 ), wi( k+1 ), sn,
3354 tmp = t((jloc1-2)*lldt+iloc1)
3355 IF( tmp.NE.zero )
THEN
3356 elem1 = t((jloc1-2)*lldt+iloc1-1)
3357 elem2 = t((jloc1-1)*lldt+iloc1-1)
3358 elem3 = t((jloc1-2)*lldt+iloc1)
3359 elem4 = t((jloc1-1)*lldt+iloc1)
3360 CALL dlanv2( elem1, elem2, elem3, elem4,
3361 $ wr( k-1 ), wi( k-1 ), wr( k ),
3385 DO 570 k = nb, n-1, nb
3386 CALL infog2l( k, k, desct, nprow, npcol, myrow, mycol,
3387 $ iloc1, jloc1, trsrc1, tcsrc1 )
3388 CALL infog2l( k, k+1, desct, nprow, npcol, myrow, mycol,
3389 $ iloc2, jloc2, trsrc2, tcsrc2 )
3390 CALL infog2l( k+1, k, desct, nprow, npcol, myrow, mycol,
3391 $ iloc3, jloc3, trsrc3, tcsrc3 )
3392 CALL infog2l( k+1, k+1, desct, nprow, npcol, myrow, mycol,
3393 $ iloc4, jloc4, trsrc4, tcsrc4 )
3394 IF( myrow.EQ.trsrc2 .AND. mycol.EQ.tcsrc2 )
THEN
3395 elem2 = t((jloc2-1)*lldt+iloc2)
3396 IF( trsrc1.NE.trsrc2 .OR. tcsrc1.NE.tcsrc2 )
3397 $
CALL dgesd2d( ictxt, 1, 1, elem2, 1, trsrc1, tcsrc1 )
3399 IF( myrow.EQ.trsrc3 .AND. mycol.EQ.tcsrc3 )
THEN
3400 elem3 = t((jloc3-1)*lldt+iloc3)
3401 IF( trsrc1.NE.trsrc3 .OR. tcsrc1.NE.tcsrc3 )
3402 $
CALL dgesd2d( ictxt, 1, 1, elem3, 1, trsrc1, tcsrc1 )
3404 IF( myrow.EQ.trsrc4 .AND. mycol.EQ.tcsrc4 )
THEN
3405 work(1) = t((jloc4-1)*lldt+iloc4)
3407 work(2) = t((jloc4-1)*lldt+iloc4+1)
3411 IF( trsrc1.NE.trsrc4 .OR. tcsrc1.NE.tcsrc4 )
3412 $
CALL dgesd2d( ictxt, 2, 1, work, 2, trsrc1, tcsrc1 )
3414 IF( myrow.EQ.trsrc1 .AND. mycol.EQ.tcsrc1 )
THEN
3415 elem1 = t((jloc1-1)*lldt+iloc1)
3416 IF( trsrc1.NE.trsrc2 .OR. tcsrc1.NE.tcsrc2 )
3417 $
CALL dgerv2d( ictxt, 1, 1, elem2, 1, trsrc2, tcsrc2 )
3418 IF( trsrc1.NE.trsrc3 .OR. tcsrc1.NE.tcsrc3 )
3419 $
CALL dgerv2d( ictxt, 1, 1, elem3, 1, trsrc3, tcsrc3 )
3420 IF( trsrc1.NE.trsrc4 .OR. tcsrc1.NE.tcsrc4 )
3421 $
CALL dgerv2d( ictxt, 2, 1, work, 2, trsrc4, tcsrc4 )
3424 IF( elem5.EQ.zero )
THEN
3425 IF( wr( k ).EQ.zero .AND. wi( k ).EQ.zero )
THEN
3426 CALL dlanv2( elem1, elem2, elem3, elem4, wr( k ),
3427 $ wi( k ), wr( k+1 ), wi( k+1 ), sn, cs )
3428 ELSEIF( wr( k+1 ).EQ.zero .AND. wi( k+1 ).EQ.zero )
THEN
3431 ELSEIF( wr( k ).EQ.zero .AND. wi( k ).EQ.zero )
THEN
3437 IF( nprocs.GT.1 )
THEN
3438 CALL dgsum2d( ictxt,
'All', top, n, 1, wr, n, -1, -1 )
3439 CALL dgsum2d( ictxt,
'All', top, n, 1, wi, n, -1, -1 )
3444 work( 1 ) = dble(lwmin)