1 SUBROUTINE pstrord( COMPQ, SELECT, PARA, N, T, IT, JT,
2 $ DESCT, Q, IQ, JQ, DESCQ, WR, WI, M, WORK, LWORK,
3 $ IWORK, LIWORK, INFO )
16 INTEGER INFO, LIWORK, LWORK, M, N,
21 INTEGER PARA( 6 ), DESCT( * ), DESCQ( * ), IWORK( * )
22 REAL Q( * ), T( * ), WI( * ), WORK( * ), WR( * )
299 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
300 $ lld_, mb_, m_, nb_, n_, rsrc_
302 PARAMETER ( TOP =
'1-Tree',
303 $ block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
304 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
305 $ rsrc_ = 7, csrc_ = 8, lld_ = 9,
306 $ zero = 0.0, one = 1.0 )
309 LOGICAL LQUERY, PAIR, SWAP, WANTQ,
310 $ ISHH, FIRST, SKIP1CR, BORDER, LASTWAIT
311 INTEGER NPROW, NPCOL, MYROW, MYCOL, NB, NPROCS,
312 $ IERR, DIM1, INDX, LLDT, TRSRC, TCSRC, ILOC1,
313 $ jloc1, myierr, ictxt,
314 $ rsrc1, csrc1, iloc3, jloc3, trsrc3,
315 $ tcsrc3, iloc, jloc, trsrc4, tcsrc4,
316 $ flops, i, ilo, ihi, j, k, kk, kks,
317 $ ks, liwmin, lwmin, mmult, n1, n2,
318 $ ncb, ndtraf, nitraf, nwin, numwin, pdtraf,
319 $ pitraf, pdw, wineig, winsiz, lldq,
320 $ rsrc, csrc, ililo, ilihi, ilsel, irsrc,
321 $ icsrc, ipiw, ipw1, ipw2, ipw3, tihi, tilo,
322 $ lihi, window, lilo, lsel, buffer,
323 $ nmwin2, bufflen, lrows, lcols, iloc2, jloc2,
324 $ wneicr, window0, rsrc4, csrc4, lihi4, rsrc3,
325 $ csrc3, rsrc2, csrc2, lihic, lihi1, ilen4,
326 $ seli4, ilen1, dim4, ipw4, qrows, trows,
327 $ tcols, ipw5, ipw6, ipw7, ipw8, jloc4,
328 $ east, west, iloc4, south, north, indxs,
329 $ itt, jtt, ilen, dlen, indxe, trsrc1, tcsrc1,
330 $ trsrc2, tcsrc2, ilos, dir, tlihi, tlilo, tlsel,
331 $ round, last, win0s, win0e, wine
332 REAL ELEM, ELEM1, ELEM2, ELEM3, ELEM4, SN, CS, TMP,
336 INTEGER IBUFF( 8 ), IDUM1( 1 ), IDUM2( 1 ), MMAX( 1 ),
341 INTEGER NUMROC, INDXG2P, INDXG2L
342 EXTERNAL lsame, numroc, indxg2p, indxg2l
347 $
infog2l, dgsum2d, sgesd2d, sgerv2d, sgebs2d,
348 $ sgebr2d, igsum2d, blacs_gridinfo, igebs2d,
352 INTRINSIC abs,
max, sqrt,
min
361 ictxt = desct( ctxt_ )
362 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
368 IF( nprow.EQ.-1 )
THEN
374 lquery = lwork.EQ.-1 .OR. liwork.EQ.-1
379 CALL chk1mat( n, 5, n, 5, it, jt, desct, 9, info )
382 CALL chk1mat( n, 5, n, 5, iq, jq, descq, 13, info )
388 IF( desct( mb_ ).NE.desct( nb_ ) ) info = -(1000*9 + mb_)
391 IF( descq( mb_ ).NE.descq( nb_ ) ) info = -(1000*13 + mb_)
394 IF( desct( mb_ ).NE.descq( mb_ ) ) info = -(1000*9 + mb_)
400 IF( n.NE.desct( mb_ ) .AND. desct( mb_ ).LT.3 )
401 $ info = -(1000*9 + mb_)
402 IF( n.NE.descq( mb_ ) .AND. descq( mb_ ).LT.3 )
403 $ info = -(1000*13 + mb_)
410 IF( para(1).LT.1 .OR. para(1).GT.
min(nprow,npcol) )
411 $ info = -(1000 * 4 + 1)
412 IF( para(2).LT.1 .OR. para(2).GE.para(3) )
413 $ info = -(1000 * 4 + 2)
414 IF( para(3).LT.1 .OR. para(3).GT.nb )
415 $ info = -(1000 * 4 + 3)
416 IF( para(4).LT.0 .OR. para(4).GT.100 )
417 $ info = -(1000 * 4 + 4)
418 IF( para(5).LT.1 .OR. para(5).GT.nb )
419 $ info = -(1000 * 4 + 5)
420 IF( para(6).LT.1 .OR. para(6).GT.para(2) )
421 $ info = -(1000 * 4 + 6)
427 IF( it.NE.1 ) info = -6
428 IF( jt.NE.it ) info = -7
429 IF( iq.NE.1 ) info = -10
430 IF( jq.NE.iq ) info = -11
436 CALL pchk1mat( n, 5, n, 5, it, jt, desct, 9, 0, idum1,
440 CALL pchk1mat( n, 5, n, 5, iq, jq, descq, 13, 0, idum1,
444 CALL pchk2mat( n, 5, n, 5, it, jt, desct, 9, n, 5, n, 5,
445 $ iq, jq, descq, 13, 0, idum1, idum2, info )
450 IF( info.EQ.0 .OR. lquery )
THEN
452 wantq = lsame( compq,
'V' )
468 CALL infog2l( k+1, k, desct, nprow, npcol,
469 $ myrow, mycol, itt, jtt, trsrc, tcsrc )
470 IF( myrow.EQ.trsrc .AND. mycol.EQ.tcsrc )
THEN
471 elem = t( (jtt-1)*lldt + itt )
472 IF( elem.NE.zero )
THEN
473 IF(
SELECT(k).NE.0 .AND.
474 $
SELECT(k+1).EQ.0 )
THEN
477 ELSEIF(
SELECT(k).EQ.0 .AND.
478 $
SELECT(k+1).NE.0 )
THEN
485 IF(
SELECT(k).NE.0 ) m = m + 1
490 $
CALL igamx2d( ictxt,
'All', top, 1, 1, mmax( 1 ), 1, -1,
493 $
CALL igamn2d( ictxt,
'All', top, 1, 1, mmin( 1 ), 1, -1,
495 IF( mmax( 1 ).GT.mmin( 1 ) )
THEN
498 $
CALL igamx2d( ictxt,
'All', top, n, 1,
SELECT, n,
499 $ -1, -1, -1, -1, -1 )
507 trows = numroc( n, nb, myrow, desct(rsrc_), nprow )
508 tcols = numroc( n, nb, mycol, desct(csrc_), npcol )
509 lwmin = n + 7*nb**2 + 2*trows*para( 3 ) + tcols*para( 3 ) +
510 $
max( trows*para( 3 ), tcols*para( 3 ) )
511 liwmin = 5*para( 1 ) + para( 2 )*para( 3 ) -
512 $ para( 2 ) * ( para( 2 ) + 1 ) / 2
514 IF( lwork.LT.lwmin .AND. .NOT.lquery )
THEN
516 ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery )
THEN
524 IF( nprocs.GT.1 )
THEN
525 CALL igamx2d( ictxt,
'All', top, 1, 1, info, 1, -1, -1,
531 IF( info.NE.0 .AND. .NOT.lquery )
THEN
533 CALL pxerbla( ictxt,
'PSTRORD', -info )
535 ELSEIF( lquery )
THEN
536 work( 1 ) = float(lwmin)
543 IF( m.EQ.n .OR. m.EQ.0 )
GO TO 545
548 wineig =
max( para( 2 ), 2 )
549 winsiz =
min(
max( para( 3 ), para( 2 )*2 ), nb )
566 ilihi = ililo + numwin
567 ilsel = ilihi + numwin
568 irsrc = ilsel + numwin
569 icsrc = irsrc + numwin
570 ipiw = icsrc + numwin
588 IF(
SELECT(ilo).NE.0 )
GO TO 40
611 IF(
SELECT(ilos).EQ.0 )
GO TO 52
613 IF(
SELECT(ilos+1).NE.0 .AND. mod(ilos,nb).EQ.0 )
THEN
614 CALL pselget(
'All', top, elem, t, ilos+1, ilos, desct )
615 IF( elem.NE.zero )
GO TO 52
630 nmwin2 = (iceil(ihi,nb)*nb - (ilo-mod(ilo,nb)+1)+1) / nb
631 nmwin2 =
min(
min( numwin, nmwin2 ), iceil(n,nb) - j + 1 )
638 iwork( ilsel+k-1) = 0
639 iwork( ililo+k-1) =
max( ilo, (j-1)*nb+(k-1)*nb+1 )
640 lilo = iwork( ililo+k-1 )
642 IF(
SELECT(lilo).NE.0 .AND. lilo.LT.(j+k-1)*nb )
THEN
644 IF( lilo.LE.n )
GO TO 82
646 iwork( ililo+k-1 ) = lilo
651 lilo = iwork(ililo+k-1)
652 IF( lilo.GT.nb )
THEN
653 CALL pselget(
'All', top, elem, t, lilo, lilo-1, desct )
654 IF( elem.NE.zero )
THEN
655 IF( lilo.LT.(j+k-1)*nb )
THEN
656 iwork(ililo+k-1) = iwork(ililo+k-1) + 1
658 iwork(ililo+k-1) = iwork(ililo+k-1) - 1
666 iwork( ilihi+k-1 ) = iwork( ililo+k-1 )
667 iwork( irsrc+k-1 ) = indxg2p( iwork(ililo+k-1), nb, myrow,
668 $ desct( rsrc_ ), nprow )
669 iwork( icsrc+k-1 ) = indxg2p( iwork(ililo+k-1), nb, mycol,
670 $ desct( csrc_ ), npcol )
671 tilo = iwork(ililo+k-1)
672 tihi =
min( n, iceil( tilo, nb ) * nb )
673 DO 90 kk = tihi, tilo, -1
674 IF(
SELECT(kk).NE.0 )
THEN
675 iwork(ilihi+k-1) =
max(iwork(ilihi+k-1) , kk )
676 iwork(ilsel+k-1) = iwork(ilsel+k-1) + 1
677 IF( iwork(ilsel+k-1).GT.wineig )
THEN
678 iwork(ilihi+k-1) = kk
692 lihi = iwork(ilihi+k-1)
694 CALL pselget(
'All', top, elem, t, lihi+1, lihi, desct )
695 IF( elem.NE.zero )
THEN
696 IF( iceil( lihi, nb ) .NE. iceil( lihi+1, nb ) .OR.
697 $ iwork( ilsel+k-1 ).EQ.wineig )
THEN
698 iwork( ilihi+k-1 ) = iwork( ilihi+k-1 ) - 1
699 IF( iwork( ilsel+k-1 ).GT.2 )
700 $ iwork( ilsel+k-1 ) = iwork( ilsel+k-1 ) - 1
702 iwork( ilihi+k-1 ) = iwork( ilihi+k-1 ) + 1
703 IF(
SELECT(lihi+1).NE.0 )
704 $ iwork( ilsel+k-1 ) = iwork( ilsel+k-1 ) + 1
716 lilo = iwork( ililo + k - 1 )
717 lihi = iwork( ilihi + k - 1 )
718 lsel = iwork( ilsel + k - 1 )
719 IF( lsel.EQ.0 .OR. lilo.EQ.lihi )
THEN
720 lihi = iwork( ilihi + k - 1 )
721 iwork( ilihi + k - 1 ) = (iceil(lihi,nb)-1)*nb + 1
722 iwork( ililo + k - 1 ) = iwork( ilihi + k - 1 ) + 1
733 DO 95 window = 1, nmwin2
734 rsrc = iwork(irsrc+window-1)
735 csrc = iwork(icsrc+window-1)
736 IF( myrow.EQ.rsrc .OR. mycol.EQ.csrc )
THEN
737 tlilo = iwork( ililo + window - 1 )
738 tlihi = iwork( ilihi + window - 1 )
739 tlsel = iwork( ilsel + window - 1 )
740 IF( (.NOT. ( lihi .GE. lilo + lsel ) ) .AND.
741 $ ( (tlihi .GE. tlilo + tlsel) .OR. first ) )
THEN
742 IF( first ) first = .false.
756 IF( lilo.EQ.ihi .AND. lihi.EQ.ilo .AND. lsel.EQ.m )
768 IF( first .OR. ( lihi .GE. lilo + lsel ) )
THEN
775 DO 110 window = 1, nmwin2
776 rsrc = iwork(irsrc+window-1)
777 csrc = iwork(icsrc+window-1)
782 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
THEN
783 lilo = iwork(ililo+window-1)
784 lihi = iwork(ilihi+window-1)
785 lsel = iwork(ilsel+window-1)
789 i =
max( lilo, lihi - winsiz + 1 )
795 CALL infog2l( i, i-1, desct, nprow, npcol, myrow,
796 $ mycol, iloc, jloc, rsrc, csrc )
797 IF( t( lldt*(jloc-1) + iloc ).NE.zero )
803 CALL infog2l( i, i, desct, nprow, npcol,
804 $ myrow, mycol, iloc1, jloc1, rsrc, csrc )
820 swap =
SELECT( k ).NE.0
822 CALL infog2l( k+1, k, desct, nprow, npcol,
823 $ myrow, mycol, iloc, jloc, rsrc, csrc )
824 IF( t( lldt*(jloc-1) + iloc ).NE.zero )
836 nitraf = liwork - pitraf + 1
837 ndtraf = lwork - pdtraf + 1
839 $ t(lldt*(jloc1-1) + iloc1), lldt, kk,
840 $ kks, nitraf, iwork( pitraf ), ndtraf,
841 $ work( pdtraf ), work(ipw1), ierr )
842 pitraf = pitraf + nitraf
843 pdtraf = pdtraf + ndtraf
848 DO 150 j = i+kk-1, i+kks, -1
849 SELECT(j+1) =
SELECT(j-1)
854 DO 160 j = i+kk-1, i+kks, -1
855 SELECT(j) =
SELECT(j-1)
860 IF ( ierr.EQ.1 .OR. ierr.EQ.2 )
THEN
870 IF ( ierr.EQ.2 )
THEN
871 SELECT( i+kks-3 ) = 1
872 SELECT( i+kks-1 ) = 0
895 DO 175 window = 1, nmwin2
896 rsrc = iwork(irsrc+window-1)
897 csrc = iwork(icsrc+window-1)
898 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
THEN
926 DO 111 window = 1, nmwin2
927 rsrc = iwork(irsrc+window-1)
928 csrc = iwork(icsrc+window-1)
929 IF( myrow.EQ.rsrc .OR. mycol.EQ.csrc )
THEN
930 lilo = iwork(ililo+window-1)
931 lihi = iwork(ilihi+window-1)
932 lsel = iwork(ilsel+window-1)
934 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
THEN
935 IF( npcol.GT.1 .AND. dir.EQ.1 )
936 $
CALL igebs2d( ictxt,
'Row', top, 8, 1, ibuff, 8 )
937 IF( nprow.GT.1 .AND. dir.EQ.2 )
938 $
CALL igebs2d( ictxt,
'Col', top, 8, 1, ibuff, 8 )
939 ELSEIF( myrow.EQ.rsrc .OR. mycol.EQ.csrc )
THEN
940 IF( npcol.GT.1 .AND. dir.EQ.1 .AND. myrow.EQ.rsrc )
942 IF( first .OR. (lihi .GE. lilo + lsel) )
THEN
943 CALL igebr2d( ictxt,
'Row', top, 8, 1, ibuff, 8,
959 IF( nprow.GT.1 .AND. dir.EQ.2 .AND. mycol.EQ.csrc )
961 IF( first .OR. (lihi .GE. lilo + lsel) )
THEN
962 CALL igebr2d( ictxt,
'Col', top, 8, 1, ibuff, 8,
989 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
THEN
991 bufflen = dlen + ilen
992 IF( bufflen.NE.0 )
THEN
993 DO 180 indx = 1, ilen
994 work( buffer+indx-1 ) =
995 $ float( iwork(ipiw+indx-1) )
997 CALL slamov(
'All', dlen, 1, work( ipw2 ),
998 $ dlen, work(buffer+ilen), dlen )
999 IF( npcol.GT.1 .AND. dir.EQ.1 )
THEN
1000 CALL sgebs2d( ictxt,
'Row', top, bufflen, 1,
1001 $ work(buffer), bufflen )
1003 IF( nprow.GT.1 .AND. dir.EQ.2 )
THEN
1004 CALL sgebs2d( ictxt,
'Col', top, bufflen, 1,
1005 $ work(buffer), bufflen )
1008 ELSEIF( myrow.EQ.rsrc .OR. mycol.EQ.csrc )
THEN
1009 IF( npcol.GT.1 .AND. dir.EQ.1 .AND. myrow.EQ.rsrc )
1012 bufflen = dlen + ilen
1013 IF( bufflen.NE.0 )
THEN
1014 CALL sgebr2d( ictxt,
'Row', top, bufflen, 1,
1015 $ work(buffer), bufflen, rsrc, csrc )
1018 IF( nprow.GT.1 .AND. dir.EQ.2 .AND. mycol.EQ.csrc )
1021 bufflen = dlen + ilen
1022 IF( bufflen.NE.0 )
THEN
1023 CALL sgebr2d( ictxt,
'Col', top, bufflen, 1,
1024 $ work(buffer), bufflen, rsrc, csrc )
1027 IF((npcol.GT.1.AND.dir.EQ.1.AND.myrow.EQ.rsrc).OR.
1028 $ (nprow.GT.1.AND.dir.EQ.2.AND.mycol.EQ.csrc ) )
1030 IF( bufflen.NE.0 )
THEN
1031 DO 190 indx = 1, ilen
1032 iwork(ipiw+indx-1) =
1033 $ int(work( buffer+indx-1 ))
1035 CALL slamov(
'All', dlen, 1,
1036 $ work( buffer+ilen ), dlen,
1037 $ work( ipw2 ), dlen )
1052 DO 112 window = 1, nmwin2
1053 rsrc = iwork(irsrc+window-1)
1054 csrc = iwork(icsrc+window-1)
1056 IF( (myrow.EQ.rsrc .AND. dir.EQ.1 ).OR.
1057 $ (mycol.EQ.csrc .AND. dir.EQ.2 ) )
THEN
1058 lilo = iwork(ililo+window-1)
1059 lihi = iwork(ilihi+window-1)
1060 lsel = iwork(ilsel+window-1)
1064 IF( bufflen.EQ.0 )
GO TO 295
1066 nitraf = pitraf - ipiw
1069 DO 200 k = 1, nitraf
1070 IF( iwork( ipiw + k - 1 ).LE.nwin )
THEN
1082 ipw3 = pdw + nwin*nwin
1087 IF( flops.NE.0 .AND.
1088 $ ( flops*100 ) / ( 2*nwin*nwin ) .GE. mmult )
THEN
1096 CALL slaset(
'All', nwin, nwin, zero, one,
1097 $ work( pdw ), nwin )
1098 CALL bslaapp( 1, nwin, nwin, ncb, work( pdw ), nwin,
1099 $ nitraf, iwork(ipiw), work( ipw2 ), work(ipw3) )
1111 DO 210 indx = 1, i-1, nb
1112 CALL infog2l( indx, i, desct, nprow, npcol,
1113 $ myrow, mycol, iloc, jloc, rsrc1, csrc1 )
1114 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
1116 lrows =
min(nb,i-indx)
1117 CALL sgemm(
'No transpose',
1118 $
'No transpose', lrows, nwin, nwin,
1119 $ one, t((jloc-1)*lldt+iloc), lldt,
1120 $ work( pdw ), nwin, zero,
1121 $ work(ipw3), lrows )
1122 CALL slamov(
'All', lrows, nwin,
1123 $ work(ipw3), lrows,
1124 $ t((jloc-1)*lldt+iloc), lldt )
1128 DO 220 indx = 1, n, nb
1129 CALL infog2l( indx, i, descq, nprow,
1130 $ npcol, myrow, mycol, iloc, jloc,
1132 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )
1134 lrows =
min(nb,n-indx+1)
1135 CALL sgemm(
'No transpose',
1136 $
'No transpose', lrows, nwin, nwin,
1137 $ one, q((jloc-1)*lldq+iloc), lldq,
1138 $ work( pdw ), nwin, zero,
1139 $ work(ipw3), lrows )
1140 CALL slamov(
'All', lrows, nwin,
1141 $ work(ipw3), lrows,
1142 $ q((jloc-1)*lldq+iloc), lldq )
1151 IF( lihi.LT.n )
THEN
1152 IF( mod(lihi,nb).GT.0 )
THEN
1154 CALL infog2l( i, indx, desct, nprow,
1155 $ npcol, myrow, mycol, iloc, jloc,
1157 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )
1159 lcols = mod(
min( nb-mod(lihi,nb),
1161 CALL sgemm(
'Transpose',
1162 $
'No Transpose', nwin, lcols, nwin,
1163 $ one, work( pdw ), nwin,
1164 $ t((jloc-1)*lldt+iloc), lldt, zero,
1165 $ work(ipw3), nwin )
1166 CALL slamov(
'All', nwin, lcols,
1168 $ t((jloc-1)*lldt+iloc), lldt )
1171 indxs = iceil(lihi,nb)*nb + 1
1172 DO 230 indx = indxs, n, nb
1173 CALL infog2l( i, indx, desct, nprow,
1174 $ npcol, myrow, mycol, iloc, jloc,
1176 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )
1178 lcols =
min( nb, n-indx+1 )
1179 CALL sgemm(
'Transpose',
1180 $
'No Transpose', nwin, lcols, nwin,
1181 $ one, work( pdw ), nwin,
1182 $ t((jloc-1)*lldt+iloc), lldt, zero,
1183 $ work(ipw3), nwin )
1184 CALL slamov(
'All', nwin, lcols,
1186 $ t((jloc-1)*lldt+iloc), lldt )
1210 DO 240 indx = 1, i-1, nb
1211 CALL infog2l( indx, i, desct, nprow, npcol,
1212 $ myrow, mycol, iloc, jloc, rsrc1, csrc1 )
1213 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
1215 jloc1 = indxg2l( i+nwin-ks, nb, mycol,
1216 $ desct( csrc_ ), npcol )
1217 lrows =
min(nb,i-indx)
1218 CALL slamov(
'All', lrows, ks,
1219 $ t((jloc1-1)*lldt+iloc ), lldt,
1220 $ work(ipw3), lrows )
1221 CALL strmm(
'Right',
'Upper',
1223 $
'Non-unit', lrows, ks, one,
1224 $ work( pdw+nwin-ks ), nwin,
1225 $ work(ipw3), lrows )
1226 CALL sgemm(
'No transpose',
1227 $
'No transpose', lrows, ks, nwin-ks,
1228 $ one, t((jloc-1)*lldt+iloc), lldt,
1229 $ work( pdw ), nwin, one, work(ipw3),
1234 CALL slamov(
'All', lrows, nwin-ks,
1235 $ t((jloc-1)*lldt+iloc), lldt,
1236 $ work( ipw3+ks*lrows ), lrows )
1237 CALL strmm(
'Right',
'Lower',
1238 $
'No transpose',
'Non-unit',
1239 $ lrows, nwin-ks, one,
1240 $ work( pdw+nwin*ks ), nwin,
1241 $ work( ipw3+ks*lrows ), lrows )
1242 CALL sgemm(
'No transpose',
1243 $
'No transpose', lrows, nwin-ks, ks,
1244 $ one, t((jloc1-1)*lldt+iloc), lldt,
1245 $ work( pdw+nwin*ks+nwin-ks ), nwin,
1246 $ one, work( ipw3+ks*lrows ), lrows )
1250 CALL slamov(
'All', lrows, nwin,
1251 $ work(ipw3), lrows,
1252 $ t((jloc-1)*lldt+iloc), lldt )
1259 DO 250 indx = 1, n, nb
1260 CALL infog2l( indx, i, descq, nprow,
1261 $ npcol, myrow, mycol, iloc, jloc,
1263 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )
1265 jloc1 = indxg2l( i+nwin-ks, nb,
1266 $ mycol, descq( csrc_ ), npcol )
1267 lrows =
min(nb,n-indx+1)
1268 CALL slamov(
'All', lrows, ks,
1269 $ q((jloc1-1)*lldq+iloc ), lldq,
1270 $ work(ipw3), lrows )
1271 CALL strmm(
'Right',
'Upper',
1272 $
'No transpose',
'Non-unit',
1274 $ work( pdw+nwin-ks ), nwin,
1275 $ work(ipw3), lrows )
1276 CALL sgemm(
'No transpose',
1277 $
'No transpose', lrows, ks,
1279 $ q((jloc-1)*lldq+iloc), lldq,
1280 $ work( pdw ), nwin, one,
1281 $ work(ipw3), lrows )
1285 CALL slamov(
'All', lrows, nwin-ks,
1286 $ q((jloc-1)*lldq+iloc), lldq,
1287 $ work( ipw3+ks*lrows ), lrows)
1288 CALL strmm(
'Right',
'Lower',
1289 $
'No transpose',
'Non-unit',
1290 $ lrows, nwin-ks, one,
1291 $ work( pdw+nwin*ks ), nwin,
1292 $ work( ipw3+ks*lrows ), lrows)
1293 CALL sgemm(
'No transpose',
1294 $
'No transpose', lrows, nwin-ks,
1295 $ ks, one, q((jloc1-1)*lldq+iloc),
1296 $ lldq, work(pdw+nwin*ks+nwin-ks),
1297 $ nwin, one, work( ipw3+ks*lrows ),
1302 CALL slamov(
'All', lrows, nwin,
1303 $ work(ipw3), lrows,
1304 $ q((jloc-1)*lldq+iloc), lldq )
1311 IF ( lihi.LT.n )
THEN
1315 IF( mod(lihi,nb).GT.0 )
THEN
1317 CALL infog2l( i, indx, desct, nprow,
1318 $ npcol, myrow, mycol, iloc, jloc,
1320 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )
1322 iloc1 = indxg2l( i+nwin-ks, nb, myrow,
1323 $ desct( rsrc_ ), nprow )
1324 lcols = mod(
min( nb-mod(lihi,nb),
1326 CALL slamov(
'All', ks, lcols,
1327 $ t((jloc-1)*lldt+iloc1), lldt,
1328 $ work(ipw3), nwin )
1329 CALL strmm(
'Left',
'Upper',
1330 $
'Transpose',
'Non-unit', ks,
1331 $ lcols, one, work( pdw+nwin-ks ),
1332 $ nwin, work(ipw3), nwin )
1333 CALL sgemm(
'Transpose',
1334 $
'No transpose', ks, lcols,
1335 $ nwin-ks, one, work(pdw), nwin,
1336 $ t((jloc-1)*lldt+iloc), lldt, one,
1337 $ work(ipw3), nwin )
1342 CALL slamov(
'All', nwin-ks, lcols,
1343 $ t((jloc-1)*lldt+iloc), lldt,
1344 $ work( ipw3+ks ), nwin )
1345 CALL strmm(
'Left',
'Lower',
1346 $
'Transpose',
'Non-unit',
1347 $ nwin-ks, lcols, one,
1348 $ work( pdw+nwin*ks ), nwin,
1349 $ work( ipw3+ks ), nwin )
1350 CALL sgemm(
'Transpose',
1351 $
'No Transpose', nwin-ks, lcols,
1353 $ work( pdw+nwin*ks+nwin-ks ),
1354 $ nwin, t((jloc-1)*lldt+iloc1),
1355 $ lldt, one, work( ipw3+ks ),
1360 CALL slamov(
'All', nwin, lcols,
1362 $ t((jloc-1)*lldt+iloc), lldt )
1365 indxs = iceil(lihi,nb)*nb + 1
1366 DO 260 indx = indxs, n, nb
1367 CALL infog2l( i, indx, desct, nprow,
1368 $ npcol, myrow, mycol, iloc, jloc,
1370 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )
1376 iloc1 = indxg2l( i+nwin-ks, nb,
1377 $ myrow, desct( rsrc_ ), nprow )
1378 lcols =
min( nb, n-indx+1 )
1379 CALL slamov(
'All', ks, lcols,
1380 $ t((jloc-1)*lldt+iloc1), lldt,
1381 $ work(ipw3), nwin )
1382 CALL strmm(
'Left',
'Upper',
1383 $
'Transpose',
'Non-unit', ks,
1385 $ work( pdw+nwin-ks ), nwin,
1386 $ work(ipw3), nwin )
1387 CALL sgemm(
'Transpose',
1388 $
'No transpose', ks, lcols,
1389 $ nwin-ks, one, work(pdw), nwin,
1390 $ t((jloc-1)*lldt+iloc), lldt, one,
1391 $ work(ipw3), nwin )
1396 CALL slamov(
'All', nwin-ks, lcols,
1397 $ t((jloc-1)*lldt+iloc), lldt,
1398 $ work( ipw3+ks ), nwin )
1399 CALL strmm(
'Left',
'Lower',
1400 $
'Transpose',
'Non-unit',
1401 $ nwin-ks, lcols, one,
1402 $ work( pdw+nwin*ks ), nwin,
1403 $ work( ipw3+ks ), nwin )
1404 CALL sgemm(
'Transpose',
1405 $
'No Transpose', nwin-ks, lcols,
1407 $ work( pdw+nwin*ks+nwin-ks ),
1408 $ nwin, t((jloc-1)*lldt+iloc1),
1409 $ lldt, one, work(ipw3+ks), nwin )
1413 CALL slamov(
'All', nwin, lcols,
1415 $ t((jloc-1)*lldt+iloc), lldt )
1421 ELSEIF( flops.NE.0 )
THEN
1427 DO 270 indx = 1, i-1, nb
1428 CALL infog2l( indx, i, desct, nprow, npcol,
1429 $ myrow, mycol, iloc, jloc, rsrc1, csrc1 )
1430 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
1431 lrows =
min(nb,i-indx)
1432 CALL bslaapp( 1, lrows, nwin, ncb,
1433 $ t((jloc-1)*lldt+iloc ), lldt, nitraf,
1434 $ iwork(ipiw), work( ipw2 ),
1439 DO 280 indx = 1, n, nb
1440 CALL infog2l( indx, i, descq, nprow, npcol,
1441 $ myrow, mycol, iloc, jloc, rsrc1, csrc1 )
1442 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
1444 lrows =
min(nb,n-indx+1)
1445 CALL bslaapp( 1, lrows, nwin, ncb,
1446 $ q((jloc-1)*lldq+iloc), lldq, nitraf,
1447 $ iwork(ipiw), work( ipw2 ),
1454 IF( lihi.LT.n )
THEN
1455 IF( mod(lihi,nb).GT.0 )
THEN
1457 CALL infog2l( i, indx, desct, nprow, npcol,
1458 $ myrow, mycol, iloc, jloc, rsrc1, csrc1 )
1459 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
1461 lcols = mod(
min( nb-mod(lihi,nb),
1463 CALL bslaapp( 0, nwin, lcols, ncb,
1464 $ t((jloc-1)*lldt+iloc), lldt, nitraf,
1465 $ iwork(ipiw), work( ipw2 ),
1469 indxs = iceil(lihi,nb)*nb + 1
1470 DO 290 indx = indxs, n, nb
1471 CALL infog2l( i, indx, desct, nprow, npcol,
1472 $ myrow, mycol, iloc, jloc, rsrc1, csrc1 )
1473 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
1475 lcols =
min( nb, n-indx+1 )
1476 CALL bslaapp( 0, nwin, lcols, ncb,
1477 $ t((jloc-1)*lldt+iloc), lldt, nitraf,
1478 $ iwork(ipiw), work( ipw2 ),
1509 IF( myrow.EQ.rsrc.AND.mycol.EQ.csrc )
THEN
1511 IF( bufflen.NE.0 .OR. ks.EQ.0 .OR.
1512 $ ( bufflen.EQ.0 .AND. ks.GT.0 ) )
1514 iwork( ilihi+window-1 ) = lihi
1515 IF( .NOT. lihi.GE.lilo+lsel )
THEN
1517 iwork( ililo+window-1 ) = lilo
1520 ELSEIF( myrow.EQ.rsrc .AND. dir.EQ.1 )
THEN
1521 IF( bufflen.NE.0 .OR. ks.EQ.0 .OR.
1522 $ ( bufflen.EQ.0 .AND. ks.GT.0 ) )
1524 iwork( ilihi+window-1 ) = lihi
1525 IF( .NOT. lihi.GE.lilo+lsel )
THEN
1527 iwork( ililo+window-1 ) = lilo
1529 ELSEIF( mycol.EQ.csrc .AND. dir.EQ.2 )
THEN
1530 IF( bufflen.NE.0 .OR. ks.EQ.0 .OR.
1531 $ ( bufflen.EQ.0 .AND. ks.GT.0 ) )
1533 iwork( ilihi+window-1 ) = lihi
1534 IF( .NOT. lihi.GE.lilo+lsel )
THEN
1536 iwork( ililo+window-1 ) = lilo
1554 DO 113 window = 1, nmwin2
1555 rsrc = iwork( irsrc + window - 1 )
1556 IF( myrow.EQ.rsrc .AND. (.NOT. lihi.GE.lilo+lsel ) )
THEN
1557 lilo = iwork( ililo + window - 1 )
1558 lihi = iwork( ilihi + window - 1 )
1559 lsel = iwork( ilsel + window - 1 )
1565 IF( first ) first = .false.
1575 CALL blacs_barrier( ictxt,
'All' )
1581 IF( nprocs.GT.1 )
THEN
1582 CALL igamx2d( ictxt,
'All', top, 1, 1, ierr, 1, -1,
1586 IF( ierr.NE.0 )
THEN
1591 IF( myierr.NE.0 ) info =
max(1,i+kks-1)
1592 IF( nprocs.GT.1 )
THEN
1593 CALL igamx2d( ictxt,
'All', top, 1, 1, info, 1, -1,
1650 lastwait = nmwin2.GT.1 .AND. mod(nmwin2,2).EQ.1 .AND.
1651 $ nmwin2.EQ.
min(nprow,npcol)
1656 IF( last.EQ.0 )
THEN
1670 DO 310 window0 = win0s, win0e
1671 DO 320 window = window0, wine, 2
1676 rsrc4 = iwork(irsrc+window-1)
1677 csrc4 = iwork(icsrc+window-1)
1682 csrc3 = mod( csrc4 - 1 + npcol, npcol )
1683 rsrc2 = mod( rsrc4 - 1 + nprow, nprow )
1687 IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.
1688 $ ( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) .OR.
1689 $ ( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) .OR.
1690 $ ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) )
THEN
1702 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
1703 lihi4 = ( iwork( ililo + window - 1 ) +
1704 $ iwork( ilihi + window - 1 ) ) / 2
1705 lihic =
min(lihi4,(iceil(lihi4,nb)-1)*nb+wneicr)
1711 IF( (.NOT. lihic.LE.nb) .AND. lihic.LT.n )
THEN
1712 iloc = indxg2l( lihic+1, nb, myrow,
1713 $ desct( rsrc_ ), nprow )
1714 jloc = indxg2l( lihic, nb, mycol,
1715 $ desct( csrc_ ), npcol )
1716 IF( t( (jloc-1)*lldt+iloc ).NE.zero )
THEN
1717 IF( mod( lihic, nb ).EQ.1 .OR.
1718 $ ( mod( lihic, nb ).EQ.2 .AND.
1719 $
SELECT(lihic-2).EQ.0 ) )
1727 IF( rsrc4.NE.rsrc1 .OR. csrc4.NE.csrc1 )
1728 $
CALL igesd2d( ictxt, 1, 1, lihic, 1, rsrc1,
1730 IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 )
1731 $
CALL igesd2d( ictxt, 1, 1, lihic, 1, rsrc2,
1733 IF( rsrc4.NE.rsrc3 .OR. csrc4.NE.csrc3 )
1734 $
CALL igesd2d( ictxt, 1, 1, lihic, 1, rsrc3,
1737 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
1738 IF( rsrc4.NE.rsrc1 .OR. csrc4.NE.csrc1 )
1739 $
CALL igerv2d( ictxt, 1, 1, lihic, 1, rsrc4,
1742 IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 )
THEN
1743 IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 )
1744 $
CALL igerv2d( ictxt, 1, 1, lihic, 1, rsrc4,
1747 IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 )
THEN
1748 IF( rsrc4.NE.rsrc3 .OR. csrc4.NE.csrc3 )
1749 $
CALL igerv2d( ictxt, 1, 1, lihic, 1, rsrc4,
1758 skip1cr = window.EQ.1 .AND.
1759 $ iceil(lihic,nb).LE.iceil(ilo,nb)
1775 IF( .NOT. skip1cr )
THEN
1776 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
1777 IF( window.EQ.1 )
THEN
1780 lihi1 = iwork( ilihi + window - 2 )
1783 $
min( lihic-2*mod(lihic,nb) + 1,
1784 $ (iceil(lihic,nb)-1)*nb - 1 ) )
1785 iloc = indxg2l( i, nb, myrow, desct( rsrc_ ),
1787 jloc = indxg2l( i-1, nb, mycol, desct( csrc_ ),
1789 IF( t( (jloc-1)*lldt+iloc ).NE.zero )
1791 IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 )
1792 $
CALL igesd2d( ictxt, 1, 1, i, 1, rsrc4,
1794 IF( rsrc1.NE.rsrc2 .OR. csrc1.NE.csrc2 )
1795 $
CALL igesd2d( ictxt, 1, 1, i, 1, rsrc2,
1797 IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 )
1798 $
CALL igesd2d( ictxt, 1, 1, i, 1, rsrc3,
1801 IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 )
THEN
1802 IF( rsrc1.NE.rsrc2 .OR. csrc1.NE.csrc2 )
1803 $
CALL igerv2d( ictxt, 1, 1, i, 1, rsrc1,
1806 IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 )
THEN
1807 IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 )
1808 $
CALL igerv2d( ictxt, 1, 1, i, 1, rsrc1,
1811 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
1812 IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 )
1813 $
CALL igerv2d( ictxt, 1, 1, i, 1, rsrc1,
1823 nwin = lihic - i + 1
1828 IF( skip1cr )
GO TO 360
1834 CALL slaset(
'All', nwin, nwin, zero, zero,
1835 $ work( ipw2 ), nwin )
1838 ipw3 = ipw2 + nwin*nwin
1845 IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 )
THEN
1846 ilen4 = mod(lihic,nb)
1847 seli4 = iceil(i,nb)*nb+1
1848 ilen1 = nwin - ilen4
1849 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
1850 CALL igesd2d( ictxt, ilen1, 1,
SELECT(i),
1851 $ ilen1, rsrc4, csrc4 )
1852 CALL igerv2d( ictxt, ilen4, 1,
SELECT(seli4),
1853 $ ilen4, rsrc4, csrc4 )
1855 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
1856 CALL igesd2d( ictxt, ilen4, 1,
SELECT(seli4),
1857 $ ilen4, rsrc1, csrc1 )
1858 CALL igerv2d( ictxt, ilen1, 1,
SELECT(i),
1859 $ ilen1, rsrc1, csrc1 )
1866 dim1 = nb - mod(i-1,nb)
1868 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
1869 iloc = indxg2l( i, nb, myrow, desct( rsrc_ ),
1871 jloc = indxg2l( i, nb, mycol, desct( csrc_ ),
1873 CALL slamov(
'All', dim1, dim1,
1874 $ t((jloc-1)*lldt+iloc), lldt, work(ipw2),
1876 IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 )
THEN
1877 CALL sgesd2d( ictxt, dim1, dim1,
1878 $ work(ipw2), nwin, rsrc4, csrc4 )
1879 CALL sgerv2d( ictxt, dim4, dim4,
1880 $ work(ipw2+dim1*nwin+dim1), nwin, rsrc4,
1884 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
1885 iloc = indxg2l( i+dim1, nb, myrow, desct( rsrc_ ),
1887 jloc = indxg2l( i+dim1, nb, mycol, desct( csrc_ ),
1889 CALL slamov(
'All', dim4, dim4,
1890 $ t((jloc-1)*lldt+iloc), lldt,
1891 $ work(ipw2+dim1*nwin+dim1), nwin )
1892 IF( rsrc4.NE.rsrc1 .OR. csrc4.NE.csrc1 )
THEN
1893 CALL sgesd2d( ictxt, dim4, dim4,
1894 $ work(ipw2+dim1*nwin+dim1), nwin, rsrc1,
1896 CALL sgerv2d( ictxt, dim1, dim1,
1897 $ work(ipw2), nwin, rsrc1, csrc1 )
1900 IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 )
THEN
1901 iloc = indxg2l( i, nb, myrow, desct( rsrc_ ),
1903 jloc = indxg2l( i+dim1, nb, mycol, desct( csrc_ ),
1905 CALL slamov(
'All', dim1, dim4,
1906 $ t((jloc-1)*lldt+iloc), lldt,
1907 $ work(ipw2+dim1*nwin), nwin )
1908 IF( rsrc2.NE.rsrc1 .OR. csrc2.NE.csrc1 )
THEN
1909 CALL sgesd2d( ictxt, dim1, dim4,
1910 $ work(ipw2+dim1*nwin), nwin, rsrc1, csrc1 )
1913 IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 )
THEN
1914 IF( rsrc2.NE.rsrc4 .OR. csrc2.NE.csrc4 )
THEN
1915 CALL sgesd2d( ictxt, dim1, dim4,
1916 $ work(ipw2+dim1*nwin), nwin, rsrc4, csrc4 )
1919 IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 )
THEN
1920 iloc = indxg2l( i+dim1, nb, myrow, desct( rsrc_ ),
1922 jloc = indxg2l( i+dim1-1, nb, mycol,
1923 $ desct( csrc_ ), npcol )
1924 CALL slamov(
'All', 1, 1,
1925 $ t((jloc-1)*lldt+iloc), lldt,
1926 $ work(ipw2+(dim1-1)*nwin+dim1), nwin )
1927 IF( rsrc3.NE.rsrc1 .OR. csrc3.NE.csrc1 )
THEN
1928 CALL sgesd2d( ictxt, 1, 1,
1929 $ work(ipw2+(dim1-1)*nwin+dim1), nwin,
1933 IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 )
THEN
1934 IF( rsrc3.NE.rsrc4 .OR. csrc3.NE.csrc4 )
THEN
1935 CALL sgesd2d( ictxt, 1, 1,
1936 $ work(ipw2+(dim1-1)*nwin+dim1), nwin,
1940 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
1941 IF( rsrc1.NE.rsrc2 .OR. csrc1.NE.csrc2 )
THEN
1942 CALL sgerv2d( ictxt, dim1, dim4,
1943 $ work(ipw2+dim1*nwin), nwin, rsrc2,
1946 IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 )
THEN
1947 CALL sgerv2d( ictxt, 1, 1,
1948 $ work(ipw2+(dim1-1)*nwin+dim1), nwin,
1952 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
1953 IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 )
THEN
1954 CALL sgerv2d( ictxt, dim1, dim4,
1955 $ work(ipw2+dim1*nwin), nwin, rsrc2,
1958 IF( rsrc4.NE.rsrc3 .OR. csrc4.NE.csrc3 )
THEN
1959 CALL sgerv2d( ictxt, 1, 1,
1960 $ work(ipw2+(dim1-1)*nwin+dim1), nwin,
1969 IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.
1970 $ ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) )
THEN
1976 swap =
SELECT( k ).NE.0
1977 IF( k.LT.lihic )
THEN
1978 elem = work(ipw2+(k-i)*nwin+k-i+1)
1991 nitraf = liwork - pitraf + 1
1992 ndtraf = lwork - pdtraf + 1
1993 CALL bstrexc( nwin, work(ipw2), nwin,
1994 $ kk, kks, nitraf, iwork( pitraf ),
1995 $ ndtraf, work( pdtraf ),
1996 $ work(ipw1), ierr )
1997 pitraf = pitraf + nitraf
1998 pdtraf = pdtraf + ndtraf
2003 DO 340 j = i+kk-1, i+kks, -1
2004 SELECT(j+1) =
SELECT(j-1)
2009 DO 350 j = i+kk-1, i+kks, -1
2010 SELECT(j) =
SELECT(j-1)
2015 IF ( ierr.EQ.1 .OR. ierr.EQ.2 )
THEN
2017 IF ( ierr.EQ.2 )
THEN
2018 SELECT( i+kks-3 ) = 1
2019 SELECT( i+kks-1 ) = 0
2037 IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.
2038 $ ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) )
THEN
2045 ilen = pitraf - ipiw + 1
2046 dlen = pdtraf - ipw3 + 1
2053 IF( .NOT. skip1cr )
THEN
2054 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
2055 iloc = indxg2l( i, nb, myrow, desct( rsrc_ ),
2057 jloc = indxg2l( i, nb, mycol, desct( csrc_ ),
2059 CALL slamov(
'All', dim1, dim1, work(ipw2),
2060 $ nwin, t((jloc-1)*lldt+iloc), lldt )
2062 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
2063 iloc = indxg2l( i+dim1, nb, myrow,
2064 $ desct( rsrc_ ), nprow )
2065 jloc = indxg2l( i+dim1, nb, mycol,
2066 $ desct( csrc_ ), npcol )
2067 CALL slamov(
'All', dim4, dim4,
2068 $ work(ipw2+dim1*nwin+dim1), nwin,
2069 $ t((jloc-1)*lldt+iloc), lldt )
2080 IF( window.EQ.1 .AND. skip1cr )
GO TO 325
2084 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
2085 IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 )
THEN
2086 CALL sgesd2d( ictxt, 1, 1,
2087 $ work( ipw2+(dim1-1)*nwin+dim1 ), nwin,
2091 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
2092 IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 )
THEN
2093 CALL sgesd2d( ictxt, dim1, dim4,
2094 $ work( ipw2+dim1*nwin), nwin, rsrc2,
2098 IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 )
THEN
2099 iloc = indxg2l( i, nb, myrow, desct( rsrc_ ),
2101 jloc = indxg2l( i+dim1, nb, mycol,
2102 $ desct( csrc_ ), npcol )
2103 IF( rsrc2.NE.rsrc4 .OR. csrc2.NE.csrc4 )
THEN
2104 CALL sgerv2d( ictxt, dim1, dim4,
2105 $ work(ipw2+dim1*nwin), nwin, rsrc4, csrc4 )
2107 CALL slamov(
'All', dim1, dim4,
2108 $ work( ipw2+dim1*nwin ), nwin,
2109 $ t((jloc-1)*lldt+iloc), lldt )
2111 IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 )
THEN
2112 iloc = indxg2l( i+dim1, nb, myrow,
2113 $ desct( rsrc_ ), nprow )
2114 jloc = indxg2l( i+dim1-1, nb, mycol,
2115 $ desct( csrc_ ), npcol )
2116 IF( rsrc3.NE.rsrc1 .OR. csrc3.NE.csrc1 )
THEN
2117 CALL sgerv2d( ictxt, 1, 1,
2118 $ work( ipw2+(dim1-1)*nwin+dim1 ), nwin,
2121 t((jloc-1)*lldt+iloc) =
2122 $ work( ipw2+(dim1-1)*nwin+dim1 )
2137 DO 321 window = window0, wine, 2
2138 rsrc4 = iwork(irsrc+window-1)
2139 csrc4 = iwork(icsrc+window-1)
2140 rsrc1 = mod( rsrc4 - 1 + nprow, nprow )
2141 csrc1 = mod( csrc4 - 1 + npcol, npcol )
2142 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
2143 IF( npcol.GT.1 .AND. dir.EQ.1 )
2144 $
CALL igebs2d( ictxt,
'Row', top, 8, 1,
2146 IF( nprow.GT.1 .AND. dir.EQ.2 )
2147 $
CALL igebs2d( ictxt,
'Col', top, 8, 1,
2149 skip1cr = window.EQ.1 .AND.
2150 $ iceil(lihic,nb).LE.iceil(ilo,nb)
2151 ELSEIF( myrow.EQ.rsrc1 .OR. mycol.EQ.csrc1 )
THEN
2152 IF( npcol.GT.1 .AND. dir.EQ.1 .AND.
2153 $ myrow.EQ.rsrc1 )
THEN
2154 CALL igebr2d( ictxt,
'Row', top, 8, 1,
2155 $ ibuff, 8, rsrc1, csrc1 )
2164 bufflen = ilen + dlen
2165 ipw3 = ipw2 + nwin*nwin
2166 dim1 = nb - mod(i-1,nb)
2168 lihic = nwin + i - 1
2169 skip1cr = window.EQ.1 .AND.
2170 $ iceil(lihic,nb).LE.iceil(ilo,nb)
2172 IF( nprow.GT.1 .AND. dir.EQ.2 .AND.
2173 $ mycol.EQ.csrc1 )
THEN
2174 CALL igebr2d( ictxt,
'Col', top, 8, 1,
2175 $ ibuff, 8, rsrc1, csrc1 )
2184 bufflen = ilen + dlen
2185 ipw3 = ipw2 + nwin*nwin
2186 dim1 = nb - mod(i-1,nb)
2188 lihic = nwin + i - 1
2189 skip1cr = window.EQ.1 .AND.
2190 $ iceil(lihic,nb).LE.iceil(ilo,nb)
2193 IF( rsrc1.NE.rsrc4 )
THEN
2194 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
2195 IF( npcol.GT.1 .AND. dir.EQ.1 )
2196 $
CALL igebs2d( ictxt,
'Row', top, 8, 1,
2198 skip1cr = window.EQ.1 .AND.
2199 $ iceil(lihic,nb).LE.iceil(ilo,nb)
2200 ELSEIF( myrow.EQ.rsrc4 )
THEN
2201 IF( npcol.GT.1 .AND. dir.EQ.1 )
THEN
2202 CALL igebr2d( ictxt,
'Row', top, 8, 1,
2203 $ ibuff, 8, rsrc4, csrc4 )
2212 bufflen = ilen + dlen
2213 ipw3 = ipw2 + nwin*nwin
2214 dim1 = nb - mod(i-1,nb)
2216 lihic = nwin + i - 1
2217 skip1cr = window.EQ.1 .AND.
2218 $ iceil(lihic,nb).LE.iceil(ilo,nb)
2222 IF( csrc1.NE.csrc4 )
THEN
2223 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
2224 IF( nprow.GT.1 .AND. dir.EQ.2 )
2225 $
CALL igebs2d( ictxt,
'Col', top, 8, 1,
2227 skip1cr = window.EQ.1 .AND.
2228 $ iceil(lihic,nb).LE.iceil(ilo,nb)
2229 ELSEIF( mycol.EQ.csrc4 )
THEN
2230 IF( nprow.GT.1 .AND. dir.EQ.2 )
THEN
2231 CALL igebr2d( ictxt,
'Col', top, 8, 1,
2232 $ ibuff, 8, rsrc4, csrc4 )
2241 bufflen = ilen + dlen
2242 ipw3 = ipw2 + nwin*nwin
2243 dim1 = nb - mod(i-1,nb)
2245 lihic = nwin + i - 1
2246 skip1cr = window.EQ.1 .AND.
2247 $ iceil(lihic,nb).LE.iceil(ilo,nb)
2254 IF( skip1cr )
GO TO 326
2258 IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )
THEN
2260 bufflen = dlen + ilen
2261 IF( (nprow.GT.1 .AND. dir.EQ.2) .OR.
2262 $ (npcol.GT.1 .AND. dir.EQ.1) )
THEN
2263 DO 370 indx = 1, ilen
2264 work( buffer+indx-1 ) =
2265 $ float( iwork(ipiw+indx-1) )
2267 CALL slamov(
'All', dlen, 1, work( ipw3 ),
2268 $ dlen, work(buffer+ilen), dlen )
2270 IF( npcol.GT.1 .AND. dir.EQ.1 )
THEN
2271 CALL sgebs2d( ictxt,
'Row', top, bufflen, 1,
2272 $ work(buffer), bufflen )
2274 IF( nprow.GT.1 .AND. dir.EQ.2 )
THEN
2275 CALL sgebs2d( ictxt,
'Col', top, bufflen, 1,
2276 $ work(buffer), bufflen )
2278 ELSEIF( myrow.EQ.rsrc1 .OR. mycol.EQ.csrc1 )
THEN
2279 IF( npcol.GT.1 .AND. dir.EQ.1 .AND.
2280 $ myrow.EQ.rsrc1 )
THEN
2282 bufflen = dlen + ilen
2283 CALL sgebr2d( ictxt,
'Row', top, bufflen, 1,
2284 $ work(buffer), bufflen, rsrc1, csrc1 )
2286 IF( nprow.GT.1 .AND. dir.EQ.2 .AND.
2287 $ mycol.EQ.csrc1 )
THEN
2289 bufflen = dlen + ilen
2290 CALL sgebr2d( ictxt,
'Col', top, bufflen, 1,
2291 $ work(buffer), bufflen, rsrc1, csrc1 )
2293 IF( (npcol.GT.1.AND.dir.EQ.1.AND.myrow.EQ.rsrc1)
2294 $ .OR. (nprow.GT.1.AND.dir.EQ.2.AND.
2295 $ mycol.EQ.csrc1) )
THEN
2296 DO 380 indx = 1, ilen
2297 iwork(ipiw+indx-1) =
2298 $ int( work( buffer+indx-1 ) )
2300 CALL slamov(
'All', dlen, 1,
2301 $ work( buffer+ilen ), dlen,
2302 $ work( ipw3 ), dlen )
2305 IF( rsrc1.NE.rsrc4 )
THEN
2306 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
2308 bufflen = dlen + ilen
2309 IF( npcol.GT.1 .AND. dir.EQ.1 )
THEN
2310 DO 390 indx = 1, ilen
2311 work( buffer+indx-1 ) =
2312 $ float( iwork(ipiw+indx-1) )
2314 CALL slamov(
'All', dlen, 1, work( ipw3 ),
2315 $ dlen, work(buffer+ilen), dlen )
2316 CALL sgebs2d( ictxt,
'Row', top, bufflen,
2317 $ 1, work(buffer), bufflen )
2319 ELSEIF( myrow.EQ.rsrc4 .AND. dir.EQ.1 .AND.
2322 bufflen = dlen + ilen
2323 CALL sgebr2d( ictxt,
'Row', top, bufflen,
2324 $ 1, work(buffer), bufflen, rsrc4, csrc4 )
2325 DO 400 indx = 1, ilen
2326 iwork(ipiw+indx-1) =
2327 $ int( work( buffer+indx-1 ) )
2329 CALL slamov(
'All', dlen, 1,
2330 $ work( buffer+ilen ), dlen,
2331 $ work( ipw3 ), dlen )
2334 IF( csrc1.NE.csrc4 )
THEN
2335 IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 )
THEN
2337 bufflen = dlen + ilen
2338 IF( nprow.GT.1 .AND. dir.EQ.2 )
THEN
2339 DO 395 indx = 1, ilen
2340 work( buffer+indx-1 ) =
2341 $ float( iwork(ipiw+indx-1) )
2343 CALL slamov(
'All', dlen, 1, work( ipw3 ),
2344 $ dlen, work(buffer+ilen), dlen )
2345 CALL sgebs2d( ictxt,
'Col', top, bufflen,
2346 $ 1, work(buffer), bufflen )
2348 ELSEIF( mycol.EQ.csrc4 .AND. dir.EQ.2 .AND.
2351 bufflen = dlen + ilen
2352 CALL sgebr2d( ictxt,
'Col', top, bufflen, 1,
2353 $ work(buffer), bufflen, rsrc4, csrc4 )
2354 DO 402 indx = 1, ilen
2355 iwork(ipiw+indx-1) =
2356 $ int( work( buffer+indx-1 ) )
2358 CALL slamov(
'All', dlen, 1,
2359 $ work( buffer+ilen ), dlen,
2360 $ work( ipw3 ), dlen )
2370 DO 322 window = window0, wine, 2
2371 IF( window.EQ.1 .AND. skip1cr )
GO TO 327
2372 rsrc4 = iwork(irsrc+window-1)
2373 csrc4 = iwork(icsrc+window-1)
2374 rsrc1 = mod( rsrc4 - 1 + nprow, nprow )
2375 csrc1 = mod( csrc4 - 1 + npcol, npcol )
2387 IF( ((mycol.EQ.csrc1.OR.mycol.EQ.csrc4).AND.dir.EQ.2)
2388 $ .OR. ((myrow.EQ.rsrc1.OR.myrow.EQ.rsrc4).AND.
2393 qrows = numroc( n, nb, myrow, descq( rsrc_ ),
2398 trows = numroc( i-1, nb, myrow, desct( rsrc_ ),
2405 tcols = numroc( n - (i+dim1-1), nb, mycol,
2407 IF( mycol.EQ.csrc4 ) tcols = tcols - dim4
2411 ipw5 = ipw4 + nwin*nwin
2412 ipw6 = ipw5 + trows * nwin
2414 ipw7 = ipw6 + nwin * tcols
2415 ipw8 = ipw7 + qrows * nwin
2417 ipw8 = ipw6 + nwin * tcols
2425 IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 )
THEN
2426 DO 410 indx = 1, nprow
2427 IF( mycol.EQ.csrc1 )
THEN
2428 CALL infog2l( 1+(indx-1)*nb, i, desct,
2429 $ nprow, npcol, myrow, mycol, iloc,
2430 $ jloc1, rsrc, csrc1 )
2431 IF( myrow.EQ.rsrc )
THEN
2432 CALL slamov(
'All', trows, dim1,
2433 $ t((jloc1-1)*lldt+iloc), lldt,
2434 $ work(ipw5), trows )
2435 IF( npcol.GT.1 )
THEN
2436 east = mod( mycol + 1, npcol )
2437 CALL sgesd2d( ictxt, trows, dim1,
2438 $ work(ipw5), trows, rsrc,
2440 CALL sgerv2d( ictxt, trows, dim4,
2441 $ work(ipw5+trows*dim1), trows,
2446 IF( mycol.EQ.csrc4 )
THEN
2447 CALL infog2l( 1+(indx-1)*nb, i+dim1,
2448 $ desct, nprow, npcol, myrow, mycol,
2449 $ iloc, jloc4, rsrc, csrc4 )
2450 IF( myrow.EQ.rsrc )
THEN
2451 CALL slamov(
'All', trows, dim4,
2452 $ t((jloc4-1)*lldt+iloc), lldt,
2453 $ work(ipw5+trows*dim1), trows )
2454 IF( npcol.GT.1 )
THEN
2455 west = mod( mycol-1+npcol, npcol )
2456 CALL sgesd2d( ictxt, trows, dim4,
2457 $ work(ipw5+trows*dim1), trows,
2459 CALL sgerv2d( ictxt, trows, dim1,
2460 $ work(ipw5), trows, rsrc,
2470 IF( myrow.EQ.rsrc1 .OR. myrow.EQ.rsrc4 )
THEN
2471 DO 420 indx = 1, npcol
2472 IF( myrow.EQ.rsrc1 )
THEN
2473 IF( indx.EQ.1 )
THEN
2474 CALL infog2l( i, lihic+1, desct, nprow,
2475 $ npcol, myrow, mycol, iloc1, jloc,
2479 $ (iceil(lihic,nb)+(indx-2))*nb+1,
2480 $ desct, nprow, npcol, myrow, mycol,
2481 $ iloc1, jloc, rsrc1, csrc )
2483 IF( mycol.EQ.csrc )
THEN
2484 CALL slamov(
'All', dim1, tcols,
2485 $ t((jloc-1)*lldt+iloc1), lldt,
2486 $ work(ipw6), nwin )
2487 IF( nprow.GT.1 )
THEN
2488 south = mod( myrow + 1, nprow )
2489 CALL sgesd2d( ictxt, dim1, tcols,
2490 $ work(ipw6), nwin, south,
2492 CALL sgerv2d( ictxt, dim4, tcols,
2493 $ work(ipw6+dim1), nwin, south,
2498 IF( myrow.EQ.rsrc4 )
THEN
2499 IF( indx.EQ.1 )
THEN
2500 CALL infog2l( i+dim1, lihic+1, desct,
2501 $ nprow, npcol, myrow, mycol, iloc4,
2502 $ jloc, rsrc4, csrc )
2505 $ (iceil(lihic,nb)+(indx-2))*nb+1,
2506 $ desct, nprow, npcol, myrow, mycol,
2507 $ iloc4, jloc, rsrc4, csrc )
2509 IF( mycol.EQ.csrc )
THEN
2510 CALL slamov(
'All', dim4, tcols,
2511 $ t((jloc-1)*lldt+iloc4), lldt,
2512 $ work(ipw6+dim1), nwin )
2513 IF( nprow.GT.1 )
THEN
2514 north = mod( myrow-1+nprow, nprow )
2515 CALL sgesd2d( ictxt, dim4, tcols,
2516 $ work(ipw6+dim1), nwin, north,
2518 CALL sgerv2d( ictxt, dim1, tcols,
2519 $ work(ipw6), nwin, north,
2530 IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 )
THEN
2531 DO 430 indx = 1, nprow
2532 IF( mycol.EQ.csrc1 )
THEN
2533 CALL infog2l( 1+(indx-1)*nb, i, descq,
2534 $ nprow, npcol, myrow, mycol, iloc,
2535 $ jloc1, rsrc, csrc1 )
2536 IF( myrow.EQ.rsrc )
THEN
2537 CALL slamov(
'All', qrows, dim1,
2538 $ q((jloc1-1)*lldq+iloc), lldq,
2539 $ work(ipw7), qrows )
2540 IF( npcol.GT.1 )
THEN
2541 east = mod( mycol + 1, npcol )
2542 CALL sgesd2d( ictxt, qrows, dim1,
2543 $ work(ipw7), qrows, rsrc,
2545 CALL sgerv2d( ictxt, qrows, dim4,
2546 $ work(ipw7+qrows*dim1),
2547 $ qrows, rsrc, east )
2551 IF( mycol.EQ.csrc4 )
THEN
2552 CALL infog2l( 1+(indx-1)*nb, i+dim1,
2553 $ descq, nprow, npcol, myrow, mycol,
2554 $ iloc, jloc4, rsrc, csrc4 )
2555 IF( myrow.EQ.rsrc )
THEN
2556 CALL slamov(
'All', qrows, dim4,
2557 $ q((jloc4-1)*lldq+iloc), lldq,
2558 $ work(ipw7+qrows*dim1), qrows )
2559 IF( npcol.GT.1 )
THEN
2560 west = mod( mycol-1+npcol,
2562 CALL sgesd2d( ictxt, qrows, dim4,
2563 $ work(ipw7+qrows*dim1),
2564 $ qrows, rsrc, west )
2565 CALL sgerv2d( ictxt, qrows, dim1,
2566 $ work(ipw7), qrows, rsrc,
2580 DO 323 window = window0, wine, 2
2581 rsrc4 = iwork(irsrc+window-1)
2582 csrc4 = iwork(icsrc+window-1)
2583 rsrc1 = mod( rsrc4 - 1 + nprow, nprow )
2584 csrc1 = mod( csrc4 - 1 + npcol, npcol )
2586 IF( ((mycol.EQ.csrc1.OR.mycol.EQ.csrc4).AND.dir.EQ.2)
2587 $ .OR. ((myrow.EQ.rsrc1.OR.myrow.EQ.rsrc4).AND.
2592 IF( window.EQ.1 .AND. skip1cr )
GO TO 328
2598 nitraf = pitraf - ipiw
2600 DO 405 k = 1, nitraf
2601 IF( iwork( ipiw + k - 1 ).LE.nwin )
THEN
2611 IF( flops.NE.0 .AND.
2612 $ ( 2*flops*100 )/( 2*nwin*nwin ) .GE. mmult )
2615 CALL slaset(
'All', nwin, nwin, zero, one,
2616 $ work( ipw4 ), nwin )
2617 work(ipw8) = float(myrow)
2618 work(ipw8+1) = float(mycol)
2619 CALL bslaapp( 1, nwin, nwin, ncb, work( ipw4 ),
2620 $ nwin, nitraf, iwork(ipiw), work( ipw3 ),
2626 IF( ishh .OR. dim1.NE.ks .OR. dim4.NE.ks )
THEN
2632 DO 440 indx = 1,
min(i-1,1+(nprow-1)*nb),
2634 IF( mycol.EQ.csrc1 )
THEN
2635 CALL infog2l( indx, i, desct, nprow,
2636 $ npcol, myrow, mycol, iloc,
2637 $ jloc, rsrc, csrc1 )
2638 IF( myrow.EQ.rsrc )
THEN
2639 CALL sgemm(
'No transpose',
2640 $
'No transpose', trows, dim1,
2641 $ nwin, one, work( ipw5 ),
2642 $ trows, work( ipw4 ), nwin,
2643 $ zero, work(ipw8), trows )
2644 CALL slamov(
'All', trows, dim1,
2645 $ work(ipw8), trows,
2646 $ t((jloc-1)*lldt+iloc),
2650 IF( mycol.EQ.csrc4 )
THEN
2651 CALL infog2l( indx, i+dim1, desct,
2652 $ nprow, npcol, myrow, mycol,
2653 $ iloc, jloc, rsrc, csrc4 )
2654 IF( myrow.EQ.rsrc )
THEN
2655 CALL sgemm(
'No transpose',
2656 $
'No transpose', trows, dim4,
2657 $ nwin, one, work( ipw5 ),
2659 $ work( ipw4+nwin*dim1 ),
2660 $ nwin, zero, work(ipw8),
2662 CALL slamov(
'All', trows, dim4,
2663 $ work(ipw8), trows,
2664 $ t((jloc-1)*lldt+iloc),
2671 DO 450 indx = 1,
min(n,1+(nprow-1)*nb),
2673 IF( mycol.EQ.csrc1 )
THEN
2675 $ nprow, npcol, myrow, mycol,
2676 $ iloc, jloc, rsrc, csrc1 )
2677 IF( myrow.EQ.rsrc )
THEN
2678 CALL sgemm(
'No transpose',
2679 $
'No transpose', qrows,
2681 $ work( ipw7 ), qrows,
2682 $ work( ipw4 ), nwin,
2685 CALL slamov(
'All', qrows,
2686 $ dim1, work(ipw8), qrows,
2687 $ q((jloc-1)*lldq+iloc),
2691 IF( mycol.EQ.csrc4 )
THEN
2693 $ descq, nprow, npcol, myrow,
2694 $ mycol, iloc, jloc, rsrc,
2696 IF( myrow.EQ.rsrc )
THEN
2697 CALL sgemm(
'No transpose',
2698 $
'No transpose', qrows,
2700 $ work( ipw7 ), qrows,
2701 $ work( ipw4+nwin*dim1 ),
2702 $ nwin, zero, work(ipw8),
2704 CALL slamov(
'All', qrows,
2705 $ dim4, work(ipw8), qrows,
2706 $ q((jloc-1)*lldq+iloc),
2718 IF ( lihic.LT.n )
THEN
2719 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc4
2720 $ .AND.mod(lihic,nb).NE.0 )
THEN
2722 CALL infog2l( i, indx, desct, nprow,
2723 $ npcol, myrow, mycol, iloc,
2724 $ jloc, rsrc1, csrc4 )
2725 CALL sgemm(
'Transpose',
2726 $
'No Transpose', dim1, tcols,
2727 $ nwin, one, work(ipw4), nwin,
2728 $ work( ipw6 ), nwin, zero,
2729 $ work(ipw8), dim1 )
2730 CALL slamov(
'All', dim1, tcols,
2732 $ t((jloc-1)*lldt+iloc), lldt )
2734 IF( myrow.EQ.rsrc4.AND.mycol.EQ.csrc4
2735 $ .AND.mod(lihic,nb).NE.0 )
THEN
2737 CALL infog2l( i+dim1, indx, desct,
2738 $ nprow, npcol, myrow, mycol,
2739 $ iloc, jloc, rsrc4, csrc4 )
2740 CALL sgemm(
'Transpose',
2741 $
'No Transpose', dim4, tcols,
2743 $ work( ipw4+dim1*nwin ), nwin,
2744 $ work( ipw6), nwin, zero,
2745 $ work(ipw8), dim4 )
2746 CALL slamov(
'All', dim4, tcols,
2748 $ t((jloc-1)*lldt+iloc), lldt )
2750 indxs = iceil(lihic,nb)*nb + 1
2751 indxe =
min(n,indxs+(npcol-2)*nb)
2752 DO 460 indx = indxs, indxe, nb
2753 IF( myrow.EQ.rsrc1 )
THEN
2755 $ nprow, npcol, myrow, mycol,
2756 $ iloc, jloc, rsrc1, csrc )
2757 IF( mycol.EQ.csrc )
THEN
2758 CALL sgemm(
'Transpose',
2759 $
'No Transpose', dim1,
2761 $ work( ipw4 ), nwin,
2762 $ work( ipw6 ), nwin,
2763 $ zero, work(ipw8), dim1 )
2764 CALL slamov(
'All', dim1,
2765 $ tcols, work(ipw8), dim1,
2766 $ t((jloc-1)*lldt+iloc),
2770 IF( myrow.EQ.rsrc4 )
THEN
2772 $ desct, nprow, npcol, myrow,
2773 $ mycol, iloc, jloc, rsrc4,
2775 IF( mycol.EQ.csrc )
THEN
2776 CALL sgemm(
'Transpose',
2777 $
'No Transpose', dim4,
2779 $ work( ipw4+nwin*dim1 ),
2780 $ nwin, work( ipw6 ),
2781 $ nwin, zero, work(ipw8),
2783 CALL slamov(
'All', dim4,
2784 $ tcols, work(ipw8), dim4,
2785 $ t((jloc-1)*lldt+iloc),
2826 indxe =
min(i-1,1+(nprow-1)*nb)
2827 DO 470 indx = 1, indxe, nb
2828 IF( mycol.EQ.csrc1 )
THEN
2829 CALL infog2l( indx, i, desct, nprow,
2830 $ npcol, myrow, mycol, iloc,
2831 $ jloc, rsrc, csrc1 )
2832 IF( myrow.EQ.rsrc )
THEN
2833 CALL slamov(
'All', trows, ks,
2834 $ work( ipw5+trows*dim4),
2835 $ trows, work(ipw8), trows )
2836 CALL strmm(
'Right',
'Upper',
2838 $
'Non-unit', trows, ks,
2839 $ one, work( ipw4+dim4 ),
2840 $ nwin, work(ipw8), trows )
2841 CALL sgemm(
'No transpose',
2842 $
'No transpose', trows, ks,
2843 $ dim4, one, work( ipw5 ),
2844 $ trows, work( ipw4 ), nwin,
2845 $ one, work(ipw8), trows )
2846 CALL slamov(
'All', trows, ks,
2847 $ work(ipw8), trows,
2848 $ t((jloc-1)*lldt+iloc),
2856 IF( mycol.EQ.csrc4 )
THEN
2857 CALL infog2l( indx, i+dim1, desct,
2858 $ nprow, npcol, myrow, mycol,
2859 $ iloc, jloc, rsrc, csrc4 )
2860 IF( myrow.EQ.rsrc )
THEN
2861 CALL slamov(
'All', trows, dim4,
2862 $ work(ipw5), trows,
2863 $ work( ipw8 ), trows )
2864 CALL strmm(
'Right',
'Lower',
2866 $
'Non-unit', trows, dim4,
2867 $ one, work( ipw4+nwin*ks ),
2868 $ nwin, work( ipw8 ), trows )
2869 CALL sgemm(
'No transpose',
2870 $
'No transpose', trows, dim4,
2872 $ work( ipw5+trows*dim4),
2874 $ work( ipw4+nwin*ks+dim4 ),
2875 $ nwin, one, work( ipw8 ),
2877 CALL slamov(
'All', trows, dim4,
2878 $ work(ipw8), trows,
2879 $ t((jloc-1)*lldt+iloc),
2889 indxe =
min(n,1+(nprow-1)*nb)
2890 DO 480 indx = 1, indxe, nb
2891 IF( mycol.EQ.csrc1 )
THEN
2893 $ nprow, npcol, myrow, mycol,
2894 $ iloc, jloc, rsrc, csrc1 )
2895 IF( myrow.EQ.rsrc )
THEN
2896 CALL slamov(
'All', qrows, ks,
2897 $ work( ipw7+qrows*dim4),
2898 $ qrows, work(ipw8),
2900 CALL strmm(
'Right',
'Upper',
2902 $
'Non-unit', qrows,
2904 $ work( ipw4+dim4 ), nwin,
2905 $ work(ipw8), qrows )
2906 CALL sgemm(
'No transpose',
2907 $
'No transpose', qrows,
2909 $ work( ipw7 ), qrows,
2910 $ work( ipw4 ), nwin, one,
2911 $ work(ipw8), qrows )
2912 CALL slamov(
'All', qrows, ks,
2913 $ work(ipw8), qrows,
2914 $ q((jloc-1)*lldq+iloc),
2922 IF( mycol.EQ.csrc4 )
THEN
2924 $ descq, nprow, npcol, myrow,
2925 $ mycol, iloc, jloc, rsrc,
2927 IF( myrow.EQ.rsrc )
THEN
2928 CALL slamov(
'All', qrows,
2929 $ dim4, work(ipw7), qrows,
2930 $ work( ipw8 ), qrows )
2931 CALL strmm(
'Right',
'Lower',
2933 $
'Non-unit', qrows,
2935 $ work( ipw4+nwin*ks ),
2936 $ nwin, work( ipw8 ),
2938 CALL sgemm(
'No transpose',
2939 $
'No transpose', qrows,
2941 $ work(ipw7+qrows*(dim4)),
2943 $ work(ipw4+nwin*ks+dim4),
2944 $ nwin, one, work( ipw8 ),
2946 CALL slamov(
'All', qrows,
2947 $ dim4, work(ipw8), qrows,
2948 $ q((jloc-1)*lldq+iloc),
2957 IF ( lihic.LT.n )
THEN
2962 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc4
2963 $ .AND.mod(lihic,nb).NE.0 )
THEN
2965 CALL infog2l( i, indx, desct, nprow,
2966 $ npcol, myrow, mycol, iloc,
2967 $ jloc, rsrc1, csrc4 )
2968 CALL slamov(
'All', ks, tcols,
2969 $ work( ipw6+dim4 ), nwin,
2971 CALL strmm(
'Left',
'Upper',
2972 $
'Transpose',
'Non-unit',
2974 $ work( ipw4+dim4 ), nwin,
2976 CALL sgemm(
'Transpose',
2977 $
'No transpose', ks, tcols,
2978 $ dim4, one, work(ipw4), nwin,
2979 $ work(ipw6), nwin, one,
2981 CALL slamov(
'All', ks, tcols,
2983 $ t((jloc-1)*lldt+iloc), lldt )
2989 IF( myrow.EQ.rsrc4.AND.mycol.EQ.csrc4
2990 $ .AND.mod(lihic,nb).NE.0 )
THEN
2992 CALL infog2l( i+dim1, indx, desct,
2993 $ nprow, npcol, myrow, mycol,
2994 $ iloc, jloc, rsrc4, csrc4 )
2995 CALL slamov(
'All', dim4, tcols,
2996 $ work( ipw6 ), nwin,
2997 $ work( ipw8 ), dim4 )
2998 CALL strmm(
'Left',
'Lower',
2999 $
'Transpose',
'Non-unit',
3001 $ work( ipw4+nwin*ks ), nwin,
3002 $ work( ipw8 ), dim4 )
3003 CALL sgemm(
'Transpose',
3004 $
'No Transpose', dim4, tcols,
3006 $ work( ipw4+nwin*ks+dim4 ),
3007 $ nwin, work( ipw6+dim1 ), nwin,
3008 $ one, work( ipw8), dim4 )
3009 CALL slamov(
'All', dim4, tcols,
3011 $ t((jloc-1)*lldt+iloc), lldt )
3017 indxs = iceil(lihic,nb)*nb+1
3018 indxe =
min(n,indxs+(npcol-2)*nb)
3019 DO 490 indx = indxs, indxe, nb
3020 IF( myrow.EQ.rsrc1 )
THEN
3022 $ nprow, npcol, myrow, mycol,
3023 $ iloc, jloc, rsrc1, csrc )
3024 IF( mycol.EQ.csrc )
THEN
3025 CALL slamov(
'All', ks, tcols,
3026 $ work( ipw6+dim4 ), nwin,
3028 CALL strmm(
'Left',
'Upper',
3032 $ work( ipw4+dim4 ), nwin,
3034 CALL sgemm(
'Transpose',
3035 $
'No transpose', ks,
3038 $ work(ipw6), nwin, one,
3040 CALL slamov(
'All', ks, tcols,
3042 $ t((jloc-1)*lldt+iloc),
3050 IF( myrow.EQ.rsrc4 )
THEN
3052 $ desct, nprow, npcol, myrow,
3053 $ mycol, iloc, jloc, rsrc4,
3055 IF( mycol.EQ.csrc )
THEN
3056 CALL slamov(
'All', dim4,
3057 $ tcols, work( ipw6 ),
3058 $ nwin, work( ipw8 ),
3060 CALL strmm(
'Left',
'Lower',
3064 $ work( ipw4+nwin*ks ),
3065 $ nwin, work( ipw8 ),
3067 CALL sgemm(
'Transpose',
3068 $
'No Transpose', dim4,
3070 $ work(ipw4+nwin*ks+dim4),
3071 $ nwin, work( ipw6+dim1 ),
3072 $ nwin, one, work( ipw8),
3074 CALL slamov(
'All', dim4,
3075 $ tcols, work(ipw8), dim4,
3076 $ t((jloc-1)*lldt+iloc),
3084 ELSEIF( flops.NE.0 )
THEN
3099 indxe =
min(i-1,1+(nprow-1)*nb)
3100 DO 500 indx = 1, indxe, nb
3101 CALL infog2l( indx, i, desct, nprow,
3102 $ npcol, myrow, mycol, iloc, jloc,
3104 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
3106 CALL bslaapp( 1, trows, nwin, ncb,
3107 $ work(ipw5), trows, nitraf,
3108 $ iwork(ipiw), work( ipw3 ),
3110 CALL slamov(
'All', trows, dim1,
3111 $ work(ipw5), trows,
3112 $ t((jloc-1)*lldt+iloc ), lldt )
3114 CALL infog2l( indx, i+dim1, desct, nprow,
3115 $ npcol, myrow, mycol, iloc, jloc,
3117 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
3120 $
CALL bslaapp( 1, trows, nwin, ncb,
3121 $ work(ipw5), trows, nitraf,
3122 $ iwork(ipiw), work( ipw3 ),
3124 CALL slamov(
'All', trows, dim4,
3125 $ work(ipw5+trows*dim1), trows,
3126 $ t((jloc-1)*lldt+iloc ), lldt )
3130 indxe =
min(n,1+(nprow-1)*nb)
3131 DO 510 indx = 1, indxe, nb
3132 CALL infog2l( indx, i, descq, nprow,
3133 $ npcol, myrow, mycol, iloc, jloc,
3135 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
3137 CALL bslaapp( 1, qrows, nwin, ncb,
3138 $ work(ipw7), qrows, nitraf,
3139 $ iwork(ipiw), work( ipw3 ),
3141 CALL slamov(
'All', qrows, dim1,
3142 $ work(ipw7), qrows,
3143 $ q((jloc-1)*lldq+iloc ), lldq )
3145 CALL infog2l( indx, i+dim1, descq,
3146 $ nprow, npcol, myrow, mycol, iloc,
3147 $ jloc, rsrc, csrc )
3148 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
3151 $
CALL bslaapp( 1, qrows, nwin,
3152 $ ncb, work(ipw7), qrows,
3153 $ nitraf, iwork(ipiw),
3154 $ work( ipw3 ), work(ipw8) )
3155 CALL slamov(
'All', qrows, dim4,
3156 $ work(ipw7+qrows*dim1), qrows,
3157 $ q((jloc-1)*lldq+iloc ), lldq )
3164 IF( lihic.LT.n )
THEN
3166 CALL infog2l( i, indx, desct, nprow,
3167 $ npcol, myrow, mycol, iloc, jloc,
3169 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc.AND.
3170 $ mod(lihic,nb).NE.0 )
THEN
3171 CALL bslaapp( 0, nwin, tcols, ncb,
3172 $ work( ipw6 ), nwin, nitraf,
3173 $ iwork(ipiw), work( ipw3 ),
3175 CALL slamov(
'All', dim1, tcols,
3176 $ work( ipw6 ), nwin,
3177 $ t((jloc-1)*lldt+iloc), lldt )
3179 CALL infog2l( i+dim1, indx, desct, nprow,
3180 $ npcol, myrow, mycol, iloc, jloc,
3182 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc.AND.
3183 $ mod(lihic,nb).NE.0 )
THEN
3185 $
CALL bslaapp( 0, nwin, tcols, ncb,
3186 $ work( ipw6 ), nwin, nitraf,
3187 $ iwork(ipiw), work( ipw3 ),
3189 CALL slamov(
'All', dim4, tcols,
3190 $ work( ipw6+dim1 ), nwin,
3191 $ t((jloc-1)*lldt+iloc), lldt )
3193 indxs = iceil(lihic,nb)*nb + 1
3194 indxe =
min(n,indxs+(npcol-2)*nb)
3195 DO 520 indx = indxs, indxe, nb
3196 CALL infog2l( i, indx, desct, nprow,
3197 $ npcol, myrow, mycol, iloc, jloc,
3199 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
3201 CALL bslaapp( 0, nwin, tcols, ncb,
3202 $ work(ipw6), nwin, nitraf,
3203 $ iwork(ipiw), work( ipw3 ),
3205 CALL slamov(
'All', dim1, tcols,
3206 $ work( ipw6 ), nwin,
3207 $ t((jloc-1)*lldt+iloc), lldt )
3209 CALL infog2l( i+dim1, indx, desct,
3210 $ nprow, npcol, myrow, mycol, iloc,
3211 $ jloc, rsrc, csrc )
3212 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )
3215 $
CALL bslaapp( 0, nwin, tcols,
3216 $ ncb, work(ipw6), nwin, nitraf,
3217 $ iwork(ipiw), work( ipw3 ),
3219 CALL slamov(
'All', dim4, tcols,
3220 $ work( ipw6+dim1 ), nwin,
3221 $ t((jloc-1)*lldt+iloc), lldt )
3242 IF( lastwait .AND. last.LT.2 )
GO TO 308
3246 CALL blacs_barrier( ictxt,
'All' )
3252 IF( nprocs.GT.1 )
THEN
3253 CALL igamx2d( ictxt,
'All', top, 1, 1, ierr, 1, -1,
3257 IF( ierr.NE.0 )
THEN
3262 IF( myierr.NE.0 ) info =
max(1,i+kks-1)
3263 IF( nprocs.GT.1 )
THEN
3264 CALL igamx2d( ictxt,
'All', top, 1, 1, info, 1, -1,
3273 rsrc = indxg2p( k, nb, myrow, desct( rsrc_ ), nprow )
3274 csrc = indxg2p( k, nb, mycol, desct( csrc_ ), npcol )
3275 IF( myrow.NE.rsrc .OR. mycol.NE.csrc )
3279 $
CALL igsum2d( ictxt,
'All', top, n, 1,
SELECT, n, -1, -1 )
3287 IF(
SELECT(ilo).NE.0 )
GO TO 523
3293 IF(
SELECT(ihi).EQ.0 )
GO TO 527
3305 IF( info.NE.0 )
THEN
3307 rsrc = indxg2p( k, nb, myrow, desct( rsrc_ ), nprow )
3308 csrc = indxg2p( k, nb, mycol, desct( csrc_ ), npcol )
3309 IF( myrow.NE.rsrc .OR. mycol.NE.csrc )
3313 $
CALL igsum2d( ictxt,
'All', top, n, 1,
SELECT, n, -1, -1 )
3338 IF( .NOT. pair )
THEN
3339 border = ( k.NE.n .AND. mod( k, nb ).EQ.0 ) .OR.
3340 % ( k.NE.1 .AND. mod( k, nb ).EQ.1 )
3341 IF( .NOT. border )
THEN
3342 CALL infog2l( k, k, desct, nprow, npcol, myrow, mycol,
3343 $ iloc1, jloc1, trsrc1, tcsrc1 )
3344 IF( myrow.EQ.trsrc1 .AND. mycol.EQ.tcsrc1 )
THEN
3345 elem1 = t((jloc1-1)*lldt+iloc1)
3347 elem3 = t((jloc1-1)*lldt+iloc1+1)
3351 IF( elem3.NE.zero )
THEN
3352 elem2 = t((jloc1)*lldt+iloc1)
3353 elem4 = t((jloc1)*lldt+iloc1+1)
3354 CALL slanv2( elem1, elem2, elem3, elem4,
3355 $ wr( k ), wi( k ), wr( k+1 ), wi( k+1 ), sn,
3360 tmp = t((jloc1-2)*lldt+iloc1)
3361 IF( tmp.NE.zero )
THEN
3362 elem1 = t((jloc1-2)*lldt+iloc1-1)
3363 elem2 = t((jloc1-1)*lldt+iloc1-1)
3364 elem3 = t((jloc1-2)*lldt+iloc1)
3365 elem4 = t((jloc1-1)*lldt+iloc1)
3366 CALL slanv2( elem1, elem2, elem3, elem4,
3367 $ wr( k-1 ), wi( k-1 ), wr( k ),
3391 DO 570 k = nb, n-1, nb
3392 CALL infog2l( k, k, desct, nprow, npcol, myrow, mycol,
3393 $ iloc1, jloc1, trsrc1, tcsrc1 )
3394 CALL infog2l( k, k+1, desct, nprow, npcol, myrow, mycol,
3395 $ iloc2, jloc2, trsrc2, tcsrc2 )
3396 CALL infog2l( k+1, k, desct, nprow, npcol, myrow, mycol,
3397 $ iloc3, jloc3, trsrc3, tcsrc3 )
3398 CALL infog2l( k+1, k+1, desct, nprow, npcol, myrow, mycol,
3399 $ iloc4, jloc4, trsrc4, tcsrc4 )
3400 IF( myrow.EQ.trsrc2 .AND. mycol.EQ.tcsrc2 )
THEN
3401 elem2 = t((jloc2-1)*lldt+iloc2)
3402 IF( trsrc1.NE.trsrc2 .OR. tcsrc1.NE.tcsrc2 )
3403 $
CALL sgesd2d( ictxt, 1, 1, elem2, 1, trsrc1, tcsrc1 )
3405 IF( myrow.EQ.trsrc3 .AND. mycol.EQ.tcsrc3 )
THEN
3406 elem3 = t((jloc3-1)*lldt+iloc3)
3407 IF( trsrc1.NE.trsrc3 .OR. tcsrc1.NE.tcsrc3 )
3408 $
CALL sgesd2d( ictxt, 1, 1, elem3, 1, trsrc1, tcsrc1 )
3410 IF( myrow.EQ.trsrc4 .AND. mycol.EQ.tcsrc4 )
THEN
3411 work(1) = t((jloc4-1)*lldt+iloc4)
3413 work(2) = t((jloc4-1)*lldt+iloc4+1)
3417 IF( trsrc1.NE.trsrc4 .OR. tcsrc1.NE.tcsrc4 )
3418 $
CALL sgesd2d( ictxt, 2, 1, work, 2, trsrc1, tcsrc1 )
3420 IF( myrow.EQ.trsrc1 .AND. mycol.EQ.tcsrc1 )
THEN
3421 elem1 = t((jloc1-1)*lldt+iloc1)
3422 IF( trsrc1.NE.trsrc2 .OR. tcsrc1.NE.tcsrc2 )
3423 $
CALL sgerv2d( ictxt, 1, 1, elem2, 1, trsrc2, tcsrc2 )
3424 IF( trsrc1.NE.trsrc3 .OR. tcsrc1.NE.tcsrc3 )
3425 $
CALL sgerv2d( ictxt, 1, 1, elem3, 1, trsrc3, tcsrc3 )
3426 IF( trsrc1.NE.trsrc4 .OR. tcsrc1.NE.tcsrc4 )
3427 $
CALL sgerv2d( ictxt, 2, 1, work, 2, trsrc4, tcsrc4 )
3430 IF( elem5.EQ.zero )
THEN
3431 IF( wr( k ).EQ.zero .AND. wi( k ).EQ.zero )
THEN
3432 CALL slanv2( elem1, elem2, elem3, elem4, wr( k ),
3433 $ wi( k ), wr( k+1 ), wi( k+1 ), sn, cs )
3434 ELSEIF( wr( k+1 ).EQ.zero .AND. wi( k+1 ).EQ.zero )
THEN
3437 ELSEIF( wr( k ).EQ.zero .AND. wi( k ).EQ.zero )
THEN
3443 IF( nprocs.GT.1 )
THEN
3444 CALL sgsum2d( ictxt,
'All', top, n, 1, wr, n, -1, -1 )
3445 CALL sgsum2d( ictxt,
'All', top, n, 1, wi, n, -1, -1 )
3450 work( 1 ) = float(lwmin)