LAPACK  3.6.1
LAPACK: Linear Algebra PACKage
subroutine cbbcsd ( character  JOBU1,
character  JOBU2,
character  JOBV1T,
character  JOBV2T,
character  TRANS,
integer  M,
integer  P,
integer  Q,
real, dimension( * )  THETA,
real, dimension( * )  PHI,
complex, dimension( ldu1, * )  U1,
integer  LDU1,
complex, dimension( ldu2, * )  U2,
integer  LDU2,
complex, dimension( ldv1t, * )  V1T,
integer  LDV1T,
complex, dimension( ldv2t, * )  V2T,
integer  LDV2T,
real, dimension( * )  B11D,
real, dimension( * )  B11E,
real, dimension( * )  B12D,
real, dimension( * )  B12E,
real, dimension( * )  B21D,
real, dimension( * )  B21E,
real, dimension( * )  B22D,
real, dimension( * )  B22E,
real, dimension( * )  RWORK,
integer  LRWORK,
integer  INFO 
)

CBBCSD

Download CBBCSD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 CBBCSD computes the CS decomposition of a unitary matrix in
 bidiagonal-block form,


     [ B11 | B12 0  0 ]
     [  0  |  0 -I  0 ]
 X = [----------------]
     [ B21 | B22 0  0 ]
     [  0  |  0  0  I ]

                               [  C | -S  0  0 ]
                   [ U1 |    ] [  0 |  0 -I  0 ] [ V1 |    ]**H
                 = [---------] [---------------] [---------]   .
                   [    | U2 ] [  S |  C  0  0 ] [    | V2 ]
                               [  0 |  0  0  I ]

 X is M-by-M, its top-left block is P-by-Q, and Q must be no larger
 than P, M-P, or M-Q. (If Q is not the smallest index, then X must be
 transposed and/or permuted. This can be done in constant time using
 the TRANS and SIGNS options. See CUNCSD for details.)

 The bidiagonal matrices B11, B12, B21, and B22 are represented
 implicitly by angles THETA(1:Q) and PHI(1:Q-1).

 The unitary matrices U1, U2, V1T, and V2T are input/output.
 The input matrices are pre- or post-multiplied by the appropriate
 singular vector matrices.
Parameters
[in]JOBU1
          JOBU1 is CHARACTER
          = 'Y':      U1 is updated;
          otherwise:  U1 is not updated.
[in]JOBU2
          JOBU2 is CHARACTER
          = 'Y':      U2 is updated;
          otherwise:  U2 is not updated.
[in]JOBV1T
          JOBV1T is CHARACTER
          = 'Y':      V1T is updated;
          otherwise:  V1T is not updated.
[in]JOBV2T
          JOBV2T is CHARACTER
          = 'Y':      V2T is updated;
          otherwise:  V2T is not updated.
[in]TRANS
          TRANS is CHARACTER
          = 'T':      X, U1, U2, V1T, and V2T are stored in row-major
                      order;
          otherwise:  X, U1, U2, V1T, and V2T are stored in column-
                      major order.
[in]M
          M is INTEGER
          The number of rows and columns in X, the unitary matrix in
          bidiagonal-block form.
[in]P
          P is INTEGER
          The number of rows in the top-left block of X. 0 <= P <= M.
[in]Q
          Q is INTEGER
          The number of columns in the top-left block of X.
          0 <= Q <= MIN(P,M-P,M-Q).
[in,out]THETA
          THETA is REAL array, dimension (Q)
          On entry, the angles THETA(1),...,THETA(Q) that, along with
          PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block
          form. On exit, the angles whose cosines and sines define the
          diagonal blocks in the CS decomposition.
[in,out]PHI
          PHI is REAL array, dimension (Q-1)
          The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),...,
          THETA(Q), define the matrix in bidiagonal-block form.
[in,out]U1
          U1 is COMPLEX array, dimension (LDU1,P)
          On entry, a P-by-P matrix. On exit, U1 is postmultiplied
          by the left singular vector matrix common to [ B11 ; 0 ] and
          [ B12 0 0 ; 0 -I 0 0 ].
[in]LDU1
          LDU1 is INTEGER
          The leading dimension of the array U1, LDU1 >= MAX(1,P).
[in,out]U2
          U2 is COMPLEX array, dimension (LDU2,M-P)
          On entry, an (M-P)-by-(M-P) matrix. On exit, U2 is
          postmultiplied by the left singular vector matrix common to
          [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ].
[in]LDU2
          LDU2 is INTEGER
          The leading dimension of the array U2, LDU2 >= MAX(1,M-P).
[in,out]V1T
          V1T is COMPLEX array, dimension (LDV1T,Q)
          On entry, a Q-by-Q matrix. On exit, V1T is premultiplied
          by the conjugate transpose of the right singular vector
          matrix common to [ B11 ; 0 ] and [ B21 ; 0 ].
[in]LDV1T
          LDV1T is INTEGER
          The leading dimension of the array V1T, LDV1T >= MAX(1,Q).
[in,out]V2T
          V2T is COMPLEX array, dimenison (LDV2T,M-Q)
          On entry, an (M-Q)-by-(M-Q) matrix. On exit, V2T is
          premultiplied by the conjugate transpose of the right
          singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and
          [ B22 0 0 ; 0 0 I ].
[in]LDV2T
          LDV2T is INTEGER
          The leading dimension of the array V2T, LDV2T >= MAX(1,M-Q).
[out]B11D
          B11D is REAL array, dimension (Q)
          When CBBCSD converges, B11D contains the cosines of THETA(1),
          ..., THETA(Q). If CBBCSD fails to converge, then B11D
          contains the diagonal of the partially reduced top-left
          block.
[out]B11E
          B11E is REAL array, dimension (Q-1)
          When CBBCSD converges, B11E contains zeros. If CBBCSD fails
          to converge, then B11E contains the superdiagonal of the
          partially reduced top-left block.
[out]B12D
          B12D is REAL array, dimension (Q)
          When CBBCSD converges, B12D contains the negative sines of
          THETA(1), ..., THETA(Q). If CBBCSD fails to converge, then
          B12D contains the diagonal of the partially reduced top-right
          block.
[out]B12E
          B12E is REAL array, dimension (Q-1)
          When CBBCSD converges, B12E contains zeros. If CBBCSD fails
          to converge, then B12E contains the subdiagonal of the
          partially reduced top-right block.
[out]B21D
          B21D is REAL array, dimension (Q)
          When CBBCSD converges, B21D contains the negative sines of
          THETA(1), ..., THETA(Q). If CBBCSD fails to converge, then
          B21D contains the diagonal of the partially reduced bottom-left
          block.
[out]B21E
          B21E is REAL array, dimension (Q-1)
          When CBBCSD converges, B21E contains zeros. If CBBCSD fails
          to converge, then B21E contains the subdiagonal of the
          partially reduced bottom-left block.
[out]B22D
          B22D is REAL array, dimension (Q)
          When CBBCSD converges, B22D contains the negative sines of
          THETA(1), ..., THETA(Q). If CBBCSD fails to converge, then
          B22D contains the diagonal of the partially reduced bottom-right
          block.
[out]B22E
          B22E is REAL array, dimension (Q-1)
          When CBBCSD converges, B22E contains zeros. If CBBCSD fails
          to converge, then B22E contains the subdiagonal of the
          partially reduced bottom-right block.
[out]RWORK
          RWORK is REAL array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LRWORK
          LRWORK is INTEGER
          The dimension of the array RWORK. LRWORK >= MAX(1,8*Q).

          If LRWORK = -1, then a workspace query is assumed; the
          routine only calculates the optimal size of the RWORK array,
          returns this value as the first entry of the work array, and
          no error message related to LRWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit.
          < 0:  if INFO = -i, the i-th argument had an illegal value.
          > 0:  if CBBCSD did not converge, INFO specifies the number
                of nonzero entries in PHI, and B11D, B11E, etc.,
                contain the partially reduced matrix.
Internal Parameters:
  TOLMUL  REAL, default = MAX(10,MIN(100,EPS**(-1/8)))
          TOLMUL controls the convergence criterion of the QR loop.
          Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they
          are within TOLMUL*EPS of either bound.
References:
[1] Brian D. Sutton. Computing the complete CS decomposition. Numer. Algorithms, 50(1):33-65, 2009.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
June 2016

Definition at line 334 of file cbbcsd.f.

334 *
335 * -- LAPACK computational routine (version 3.6.1) --
336 * -- LAPACK is a software package provided by Univ. of Tennessee, --
337 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
338 * June 2016
339 *
340 * .. Scalar Arguments ..
341  CHARACTER jobu1, jobu2, jobv1t, jobv2t, trans
342  INTEGER info, ldu1, ldu2, ldv1t, ldv2t, lrwork, m, p, q
343 * ..
344 * .. Array Arguments ..
345  REAL b11d( * ), b11e( * ), b12d( * ), b12e( * ),
346  $ b21d( * ), b21e( * ), b22d( * ), b22e( * ),
347  $ phi( * ), theta( * ), rwork( * )
348  COMPLEX u1( ldu1, * ), u2( ldu2, * ), v1t( ldv1t, * ),
349  $ v2t( ldv2t, * )
350 * ..
351 *
352 * ===================================================================
353 *
354 * .. Parameters ..
355  INTEGER maxitr
356  parameter ( maxitr = 6 )
357  REAL hundred, meighth, one, piover2, ten, zero
358  parameter ( hundred = 100.0e0, meighth = -0.125e0,
359  $ one = 1.0e0, piover2 = 1.57079632679489662e0,
360  $ ten = 10.0e0, zero = 0.0e0 )
361  COMPLEX negonecomplex
362  parameter ( negonecomplex = (-1.0e0,0.0e0) )
363 * ..
364 * .. Local Scalars ..
365  LOGICAL colmajor, lquery, restart11, restart12,
366  $ restart21, restart22, wantu1, wantu2, wantv1t,
367  $ wantv2t
368  INTEGER i, imin, imax, iter, iu1cs, iu1sn, iu2cs,
369  $ iu2sn, iv1tcs, iv1tsn, iv2tcs, iv2tsn, j,
370  $ lrworkmin, lrworkopt, maxit, mini
371  REAL b11bulge, b12bulge, b21bulge, b22bulge, dummy,
372  $ eps, mu, nu, r, sigma11, sigma21,
373  $ temp, thetamax, thetamin, thresh, tol, tolmul,
374  $ unfl, x1, x2, y1, y2
375 *
376 * .. External Subroutines ..
377  EXTERNAL clasr, cscal, cswap, slartgp, slartgs, slas2,
378  $ xerbla
379 * ..
380 * .. External Functions ..
381  REAL slamch
382  LOGICAL lsame
383  EXTERNAL lsame, slamch
384 * ..
385 * .. Intrinsic Functions ..
386  INTRINSIC abs, atan2, cos, max, min, sin, sqrt
387 * ..
388 * .. Executable Statements ..
389 *
390 * Test input arguments
391 *
392  info = 0
393  lquery = lrwork .EQ. -1
394  wantu1 = lsame( jobu1, 'Y' )
395  wantu2 = lsame( jobu2, 'Y' )
396  wantv1t = lsame( jobv1t, 'Y' )
397  wantv2t = lsame( jobv2t, 'Y' )
398  colmajor = .NOT. lsame( trans, 'T' )
399 *
400  IF( m .LT. 0 ) THEN
401  info = -6
402  ELSE IF( p .LT. 0 .OR. p .GT. m ) THEN
403  info = -7
404  ELSE IF( q .LT. 0 .OR. q .GT. m ) THEN
405  info = -8
406  ELSE IF( q .GT. p .OR. q .GT. m-p .OR. q .GT. m-q ) THEN
407  info = -8
408  ELSE IF( wantu1 .AND. ldu1 .LT. p ) THEN
409  info = -12
410  ELSE IF( wantu2 .AND. ldu2 .LT. m-p ) THEN
411  info = -14
412  ELSE IF( wantv1t .AND. ldv1t .LT. q ) THEN
413  info = -16
414  ELSE IF( wantv2t .AND. ldv2t .LT. m-q ) THEN
415  info = -18
416  END IF
417 *
418 * Quick return if Q = 0
419 *
420  IF( info .EQ. 0 .AND. q .EQ. 0 ) THEN
421  lrworkmin = 1
422  rwork(1) = lrworkmin
423  RETURN
424  END IF
425 *
426 * Compute workspace
427 *
428  IF( info .EQ. 0 ) THEN
429  iu1cs = 1
430  iu1sn = iu1cs + q
431  iu2cs = iu1sn + q
432  iu2sn = iu2cs + q
433  iv1tcs = iu2sn + q
434  iv1tsn = iv1tcs + q
435  iv2tcs = iv1tsn + q
436  iv2tsn = iv2tcs + q
437  lrworkopt = iv2tsn + q - 1
438  lrworkmin = lrworkopt
439  rwork(1) = lrworkopt
440  IF( lrwork .LT. lrworkmin .AND. .NOT. lquery ) THEN
441  info = -28
442  END IF
443  END IF
444 *
445  IF( info .NE. 0 ) THEN
446  CALL xerbla( 'CBBCSD', -info )
447  RETURN
448  ELSE IF( lquery ) THEN
449  RETURN
450  END IF
451 *
452 * Get machine constants
453 *
454  eps = slamch( 'Epsilon' )
455  unfl = slamch( 'Safe minimum' )
456  tolmul = max( ten, min( hundred, eps**meighth ) )
457  tol = tolmul*eps
458  thresh = max( tol, maxitr*q*q*unfl )
459 *
460 * Test for negligible sines or cosines
461 *
462  DO i = 1, q
463  IF( theta(i) .LT. thresh ) THEN
464  theta(i) = zero
465  ELSE IF( theta(i) .GT. piover2-thresh ) THEN
466  theta(i) = piover2
467  END IF
468  END DO
469  DO i = 1, q-1
470  IF( phi(i) .LT. thresh ) THEN
471  phi(i) = zero
472  ELSE IF( phi(i) .GT. piover2-thresh ) THEN
473  phi(i) = piover2
474  END IF
475  END DO
476 *
477 * Initial deflation
478 *
479  imax = q
480  DO WHILE( imax .GT. 1 )
481  IF( phi(imax-1) .NE. zero ) THEN
482  EXIT
483  END IF
484  imax = imax - 1
485  END DO
486  imin = imax - 1
487  IF ( imin .GT. 1 ) THEN
488  DO WHILE( phi(imin-1) .NE. zero )
489  imin = imin - 1
490  IF ( imin .LE. 1 ) EXIT
491  END DO
492  END IF
493 *
494 * Initialize iteration counter
495 *
496  maxit = maxitr*q*q
497  iter = 0
498 *
499 * Begin main iteration loop
500 *
501  DO WHILE( imax .GT. 1 )
502 *
503 * Compute the matrix entries
504 *
505  b11d(imin) = cos( theta(imin) )
506  b21d(imin) = -sin( theta(imin) )
507  DO i = imin, imax - 1
508  b11e(i) = -sin( theta(i) ) * sin( phi(i) )
509  b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) )
510  b12d(i) = sin( theta(i) ) * cos( phi(i) )
511  b12e(i) = cos( theta(i+1) ) * sin( phi(i) )
512  b21e(i) = -cos( theta(i) ) * sin( phi(i) )
513  b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) )
514  b22d(i) = cos( theta(i) ) * cos( phi(i) )
515  b22e(i) = -sin( theta(i+1) ) * sin( phi(i) )
516  END DO
517  b12d(imax) = sin( theta(imax) )
518  b22d(imax) = cos( theta(imax) )
519 *
520 * Abort if not converging; otherwise, increment ITER
521 *
522  IF( iter .GT. maxit ) THEN
523  info = 0
524  DO i = 1, q
525  IF( phi(i) .NE. zero )
526  $ info = info + 1
527  END DO
528  RETURN
529  END IF
530 *
531  iter = iter + imax - imin
532 *
533 * Compute shifts
534 *
535  thetamax = theta(imin)
536  thetamin = theta(imin)
537  DO i = imin+1, imax
538  IF( theta(i) > thetamax )
539  $ thetamax = theta(i)
540  IF( theta(i) < thetamin )
541  $ thetamin = theta(i)
542  END DO
543 *
544  IF( thetamax .GT. piover2 - thresh ) THEN
545 *
546 * Zero on diagonals of B11 and B22; induce deflation with a
547 * zero shift
548 *
549  mu = zero
550  nu = one
551 *
552  ELSE IF( thetamin .LT. thresh ) THEN
553 *
554 * Zero on diagonals of B12 and B22; induce deflation with a
555 * zero shift
556 *
557  mu = one
558  nu = zero
559 *
560  ELSE
561 *
562 * Compute shifts for B11 and B21 and use the lesser
563 *
564  CALL slas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,
565  $ dummy )
566  CALL slas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,
567  $ dummy )
568 *
569  IF( sigma11 .LE. sigma21 ) THEN
570  mu = sigma11
571  nu = sqrt( one - mu**2 )
572  IF( mu .LT. thresh ) THEN
573  mu = zero
574  nu = one
575  END IF
576  ELSE
577  nu = sigma21
578  mu = sqrt( 1.0 - nu**2 )
579  IF( nu .LT. thresh ) THEN
580  mu = one
581  nu = zero
582  END IF
583  END IF
584  END IF
585 *
586 * Rotate to produce bulges in B11 and B21
587 *
588  IF( mu .LE. nu ) THEN
589  CALL slartgs( b11d(imin), b11e(imin), mu,
590  $ rwork(iv1tcs+imin-1), rwork(iv1tsn+imin-1) )
591  ELSE
592  CALL slartgs( b21d(imin), b21e(imin), nu,
593  $ rwork(iv1tcs+imin-1), rwork(iv1tsn+imin-1) )
594  END IF
595 *
596  temp = rwork(iv1tcs+imin-1)*b11d(imin) +
597  $ rwork(iv1tsn+imin-1)*b11e(imin)
598  b11e(imin) = rwork(iv1tcs+imin-1)*b11e(imin) -
599  $ rwork(iv1tsn+imin-1)*b11d(imin)
600  b11d(imin) = temp
601  b11bulge = rwork(iv1tsn+imin-1)*b11d(imin+1)
602  b11d(imin+1) = rwork(iv1tcs+imin-1)*b11d(imin+1)
603  temp = rwork(iv1tcs+imin-1)*b21d(imin) +
604  $ rwork(iv1tsn+imin-1)*b21e(imin)
605  b21e(imin) = rwork(iv1tcs+imin-1)*b21e(imin) -
606  $ rwork(iv1tsn+imin-1)*b21d(imin)
607  b21d(imin) = temp
608  b21bulge = rwork(iv1tsn+imin-1)*b21d(imin+1)
609  b21d(imin+1) = rwork(iv1tcs+imin-1)*b21d(imin+1)
610 *
611 * Compute THETA(IMIN)
612 *
613  theta( imin ) = atan2( sqrt( b21d(imin)**2+b21bulge**2 ),
614  $ sqrt( b11d(imin)**2+b11bulge**2 ) )
615 *
616 * Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN)
617 *
618  IF( b11d(imin)**2+b11bulge**2 .GT. thresh**2 ) THEN
619  CALL slartgp( b11bulge, b11d(imin), rwork(iu1sn+imin-1),
620  $ rwork(iu1cs+imin-1), r )
621  ELSE IF( mu .LE. nu ) THEN
622  CALL slartgs( b11e( imin ), b11d( imin + 1 ), mu,
623  $ rwork(iu1cs+imin-1), rwork(iu1sn+imin-1) )
624  ELSE
625  CALL slartgs( b12d( imin ), b12e( imin ), nu,
626  $ rwork(iu1cs+imin-1), rwork(iu1sn+imin-1) )
627  END IF
628  IF( b21d(imin)**2+b21bulge**2 .GT. thresh**2 ) THEN
629  CALL slartgp( b21bulge, b21d(imin), rwork(iu2sn+imin-1),
630  $ rwork(iu2cs+imin-1), r )
631  ELSE IF( nu .LT. mu ) THEN
632  CALL slartgs( b21e( imin ), b21d( imin + 1 ), nu,
633  $ rwork(iu2cs+imin-1), rwork(iu2sn+imin-1) )
634  ELSE
635  CALL slartgs( b22d(imin), b22e(imin), mu,
636  $ rwork(iu2cs+imin-1), rwork(iu2sn+imin-1) )
637  END IF
638  rwork(iu2cs+imin-1) = -rwork(iu2cs+imin-1)
639  rwork(iu2sn+imin-1) = -rwork(iu2sn+imin-1)
640 *
641  temp = rwork(iu1cs+imin-1)*b11e(imin) +
642  $ rwork(iu1sn+imin-1)*b11d(imin+1)
643  b11d(imin+1) = rwork(iu1cs+imin-1)*b11d(imin+1) -
644  $ rwork(iu1sn+imin-1)*b11e(imin)
645  b11e(imin) = temp
646  IF( imax .GT. imin+1 ) THEN
647  b11bulge = rwork(iu1sn+imin-1)*b11e(imin+1)
648  b11e(imin+1) = rwork(iu1cs+imin-1)*b11e(imin+1)
649  END IF
650  temp = rwork(iu1cs+imin-1)*b12d(imin) +
651  $ rwork(iu1sn+imin-1)*b12e(imin)
652  b12e(imin) = rwork(iu1cs+imin-1)*b12e(imin) -
653  $ rwork(iu1sn+imin-1)*b12d(imin)
654  b12d(imin) = temp
655  b12bulge = rwork(iu1sn+imin-1)*b12d(imin+1)
656  b12d(imin+1) = rwork(iu1cs+imin-1)*b12d(imin+1)
657  temp = rwork(iu2cs+imin-1)*b21e(imin) +
658  $ rwork(iu2sn+imin-1)*b21d(imin+1)
659  b21d(imin+1) = rwork(iu2cs+imin-1)*b21d(imin+1) -
660  $ rwork(iu2sn+imin-1)*b21e(imin)
661  b21e(imin) = temp
662  IF( imax .GT. imin+1 ) THEN
663  b21bulge = rwork(iu2sn+imin-1)*b21e(imin+1)
664  b21e(imin+1) = rwork(iu2cs+imin-1)*b21e(imin+1)
665  END IF
666  temp = rwork(iu2cs+imin-1)*b22d(imin) +
667  $ rwork(iu2sn+imin-1)*b22e(imin)
668  b22e(imin) = rwork(iu2cs+imin-1)*b22e(imin) -
669  $ rwork(iu2sn+imin-1)*b22d(imin)
670  b22d(imin) = temp
671  b22bulge = rwork(iu2sn+imin-1)*b22d(imin+1)
672  b22d(imin+1) = rwork(iu2cs+imin-1)*b22d(imin+1)
673 *
674 * Inner loop: chase bulges from B11(IMIN,IMIN+2),
675 * B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to
676 * bottom-right
677 *
678  DO i = imin+1, imax-1
679 *
680 * Compute PHI(I-1)
681 *
682  x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1)
683  x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge
684  y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1)
685  y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge
686 *
687  phi(i-1) = atan2( sqrt(x1**2+x2**2), sqrt(y1**2+y2**2) )
688 *
689 * Determine if there are bulges to chase or if a new direct
690 * summand has been reached
691 *
692  restart11 = b11e(i-1)**2 + b11bulge**2 .LE. thresh**2
693  restart21 = b21e(i-1)**2 + b21bulge**2 .LE. thresh**2
694  restart12 = b12d(i-1)**2 + b12bulge**2 .LE. thresh**2
695  restart22 = b22d(i-1)**2 + b22bulge**2 .LE. thresh**2
696 *
697 * If possible, chase bulges from B11(I-1,I+1), B12(I-1,I),
698 * B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge-
699 * chasing by applying the original shift again.
700 *
701  IF( .NOT. restart11 .AND. .NOT. restart21 ) THEN
702  CALL slartgp( x2, x1, rwork(iv1tsn+i-1),
703  $ rwork(iv1tcs+i-1), r )
704  ELSE IF( .NOT. restart11 .AND. restart21 ) THEN
705  CALL slartgp( b11bulge, b11e(i-1), rwork(iv1tsn+i-1),
706  $ rwork(iv1tcs+i-1), r )
707  ELSE IF( restart11 .AND. .NOT. restart21 ) THEN
708  CALL slartgp( b21bulge, b21e(i-1), rwork(iv1tsn+i-1),
709  $ rwork(iv1tcs+i-1), r )
710  ELSE IF( mu .LE. nu ) THEN
711  CALL slartgs( b11d(i), b11e(i), mu, rwork(iv1tcs+i-1),
712  $ rwork(iv1tsn+i-1) )
713  ELSE
714  CALL slartgs( b21d(i), b21e(i), nu, rwork(iv1tcs+i-1),
715  $ rwork(iv1tsn+i-1) )
716  END IF
717  rwork(iv1tcs+i-1) = -rwork(iv1tcs+i-1)
718  rwork(iv1tsn+i-1) = -rwork(iv1tsn+i-1)
719  IF( .NOT. restart12 .AND. .NOT. restart22 ) THEN
720  CALL slartgp( y2, y1, rwork(iv2tsn+i-1-1),
721  $ rwork(iv2tcs+i-1-1), r )
722  ELSE IF( .NOT. restart12 .AND. restart22 ) THEN
723  CALL slartgp( b12bulge, b12d(i-1), rwork(iv2tsn+i-1-1),
724  $ rwork(iv2tcs+i-1-1), r )
725  ELSE IF( restart12 .AND. .NOT. restart22 ) THEN
726  CALL slartgp( b22bulge, b22d(i-1), rwork(iv2tsn+i-1-1),
727  $ rwork(iv2tcs+i-1-1), r )
728  ELSE IF( nu .LT. mu ) THEN
729  CALL slartgs( b12e(i-1), b12d(i), nu,
730  $ rwork(iv2tcs+i-1-1), rwork(iv2tsn+i-1-1) )
731  ELSE
732  CALL slartgs( b22e(i-1), b22d(i), mu,
733  $ rwork(iv2tcs+i-1-1), rwork(iv2tsn+i-1-1) )
734  END IF
735 *
736  temp = rwork(iv1tcs+i-1)*b11d(i) + rwork(iv1tsn+i-1)*b11e(i)
737  b11e(i) = rwork(iv1tcs+i-1)*b11e(i) -
738  $ rwork(iv1tsn+i-1)*b11d(i)
739  b11d(i) = temp
740  b11bulge = rwork(iv1tsn+i-1)*b11d(i+1)
741  b11d(i+1) = rwork(iv1tcs+i-1)*b11d(i+1)
742  temp = rwork(iv1tcs+i-1)*b21d(i) + rwork(iv1tsn+i-1)*b21e(i)
743  b21e(i) = rwork(iv1tcs+i-1)*b21e(i) -
744  $ rwork(iv1tsn+i-1)*b21d(i)
745  b21d(i) = temp
746  b21bulge = rwork(iv1tsn+i-1)*b21d(i+1)
747  b21d(i+1) = rwork(iv1tcs+i-1)*b21d(i+1)
748  temp = rwork(iv2tcs+i-1-1)*b12e(i-1) +
749  $ rwork(iv2tsn+i-1-1)*b12d(i)
750  b12d(i) = rwork(iv2tcs+i-1-1)*b12d(i) -
751  $ rwork(iv2tsn+i-1-1)*b12e(i-1)
752  b12e(i-1) = temp
753  b12bulge = rwork(iv2tsn+i-1-1)*b12e(i)
754  b12e(i) = rwork(iv2tcs+i-1-1)*b12e(i)
755  temp = rwork(iv2tcs+i-1-1)*b22e(i-1) +
756  $ rwork(iv2tsn+i-1-1)*b22d(i)
757  b22d(i) = rwork(iv2tcs+i-1-1)*b22d(i) -
758  $ rwork(iv2tsn+i-1-1)*b22e(i-1)
759  b22e(i-1) = temp
760  b22bulge = rwork(iv2tsn+i-1-1)*b22e(i)
761  b22e(i) = rwork(iv2tcs+i-1-1)*b22e(i)
762 *
763 * Compute THETA(I)
764 *
765  x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1)
766  x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge
767  y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1)
768  y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge
769 *
770  theta(i) = atan2( sqrt(y1**2+y2**2), sqrt(x1**2+x2**2) )
771 *
772 * Determine if there are bulges to chase or if a new direct
773 * summand has been reached
774 *
775  restart11 = b11d(i)**2 + b11bulge**2 .LE. thresh**2
776  restart12 = b12e(i-1)**2 + b12bulge**2 .LE. thresh**2
777  restart21 = b21d(i)**2 + b21bulge**2 .LE. thresh**2
778  restart22 = b22e(i-1)**2 + b22bulge**2 .LE. thresh**2
779 *
780 * If possible, chase bulges from B11(I+1,I), B12(I+1,I-1),
781 * B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge-
782 * chasing by applying the original shift again.
783 *
784  IF( .NOT. restart11 .AND. .NOT. restart12 ) THEN
785  CALL slartgp( x2, x1, rwork(iu1sn+i-1), rwork(iu1cs+i-1),
786  $ r )
787  ELSE IF( .NOT. restart11 .AND. restart12 ) THEN
788  CALL slartgp( b11bulge, b11d(i), rwork(iu1sn+i-1),
789  $ rwork(iu1cs+i-1), r )
790  ELSE IF( restart11 .AND. .NOT. restart12 ) THEN
791  CALL slartgp( b12bulge, b12e(i-1), rwork(iu1sn+i-1),
792  $ rwork(iu1cs+i-1), r )
793  ELSE IF( mu .LE. nu ) THEN
794  CALL slartgs( b11e(i), b11d(i+1), mu, rwork(iu1cs+i-1),
795  $ rwork(iu1sn+i-1) )
796  ELSE
797  CALL slartgs( b12d(i), b12e(i), nu, rwork(iu1cs+i-1),
798  $ rwork(iu1sn+i-1) )
799  END IF
800  IF( .NOT. restart21 .AND. .NOT. restart22 ) THEN
801  CALL slartgp( y2, y1, rwork(iu2sn+i-1), rwork(iu2cs+i-1),
802  $ r )
803  ELSE IF( .NOT. restart21 .AND. restart22 ) THEN
804  CALL slartgp( b21bulge, b21d(i), rwork(iu2sn+i-1),
805  $ rwork(iu2cs+i-1), r )
806  ELSE IF( restart21 .AND. .NOT. restart22 ) THEN
807  CALL slartgp( b22bulge, b22e(i-1), rwork(iu2sn+i-1),
808  $ rwork(iu2cs+i-1), r )
809  ELSE IF( nu .LT. mu ) THEN
810  CALL slartgs( b21e(i), b21e(i+1), nu, rwork(iu2cs+i-1),
811  $ rwork(iu2sn+i-1) )
812  ELSE
813  CALL slartgs( b22d(i), b22e(i), mu, rwork(iu2cs+i-1),
814  $ rwork(iu2sn+i-1) )
815  END IF
816  rwork(iu2cs+i-1) = -rwork(iu2cs+i-1)
817  rwork(iu2sn+i-1) = -rwork(iu2sn+i-1)
818 *
819  temp = rwork(iu1cs+i-1)*b11e(i) + rwork(iu1sn+i-1)*b11d(i+1)
820  b11d(i+1) = rwork(iu1cs+i-1)*b11d(i+1) -
821  $ rwork(iu1sn+i-1)*b11e(i)
822  b11e(i) = temp
823  IF( i .LT. imax - 1 ) THEN
824  b11bulge = rwork(iu1sn+i-1)*b11e(i+1)
825  b11e(i+1) = rwork(iu1cs+i-1)*b11e(i+1)
826  END IF
827  temp = rwork(iu2cs+i-1)*b21e(i) + rwork(iu2sn+i-1)*b21d(i+1)
828  b21d(i+1) = rwork(iu2cs+i-1)*b21d(i+1) -
829  $ rwork(iu2sn+i-1)*b21e(i)
830  b21e(i) = temp
831  IF( i .LT. imax - 1 ) THEN
832  b21bulge = rwork(iu2sn+i-1)*b21e(i+1)
833  b21e(i+1) = rwork(iu2cs+i-1)*b21e(i+1)
834  END IF
835  temp = rwork(iu1cs+i-1)*b12d(i) + rwork(iu1sn+i-1)*b12e(i)
836  b12e(i) = rwork(iu1cs+i-1)*b12e(i) -
837  $ rwork(iu1sn+i-1)*b12d(i)
838  b12d(i) = temp
839  b12bulge = rwork(iu1sn+i-1)*b12d(i+1)
840  b12d(i+1) = rwork(iu1cs+i-1)*b12d(i+1)
841  temp = rwork(iu2cs+i-1)*b22d(i) + rwork(iu2sn+i-1)*b22e(i)
842  b22e(i) = rwork(iu2cs+i-1)*b22e(i) -
843  $ rwork(iu2sn+i-1)*b22d(i)
844  b22d(i) = temp
845  b22bulge = rwork(iu2sn+i-1)*b22d(i+1)
846  b22d(i+1) = rwork(iu2cs+i-1)*b22d(i+1)
847 *
848  END DO
849 *
850 * Compute PHI(IMAX-1)
851 *
852  x1 = sin(theta(imax-1))*b11e(imax-1) +
853  $ cos(theta(imax-1))*b21e(imax-1)
854  y1 = sin(theta(imax-1))*b12d(imax-1) +
855  $ cos(theta(imax-1))*b22d(imax-1)
856  y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge
857 *
858  phi(imax-1) = atan2( abs(x1), sqrt(y1**2+y2**2) )
859 *
860 * Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX)
861 *
862  restart12 = b12d(imax-1)**2 + b12bulge**2 .LE. thresh**2
863  restart22 = b22d(imax-1)**2 + b22bulge**2 .LE. thresh**2
864 *
865  IF( .NOT. restart12 .AND. .NOT. restart22 ) THEN
866  CALL slartgp( y2, y1, rwork(iv2tsn+imax-1-1),
867  $ rwork(iv2tcs+imax-1-1), r )
868  ELSE IF( .NOT. restart12 .AND. restart22 ) THEN
869  CALL slartgp( b12bulge, b12d(imax-1),
870  $ rwork(iv2tsn+imax-1-1),
871  $ rwork(iv2tcs+imax-1-1), r )
872  ELSE IF( restart12 .AND. .NOT. restart22 ) THEN
873  CALL slartgp( b22bulge, b22d(imax-1),
874  $ rwork(iv2tsn+imax-1-1),
875  $ rwork(iv2tcs+imax-1-1), r )
876  ELSE IF( nu .LT. mu ) THEN
877  CALL slartgs( b12e(imax-1), b12d(imax), nu,
878  $ rwork(iv2tcs+imax-1-1),
879  $ rwork(iv2tsn+imax-1-1) )
880  ELSE
881  CALL slartgs( b22e(imax-1), b22d(imax), mu,
882  $ rwork(iv2tcs+imax-1-1),
883  $ rwork(iv2tsn+imax-1-1) )
884  END IF
885 *
886  temp = rwork(iv2tcs+imax-1-1)*b12e(imax-1) +
887  $ rwork(iv2tsn+imax-1-1)*b12d(imax)
888  b12d(imax) = rwork(iv2tcs+imax-1-1)*b12d(imax) -
889  $ rwork(iv2tsn+imax-1-1)*b12e(imax-1)
890  b12e(imax-1) = temp
891  temp = rwork(iv2tcs+imax-1-1)*b22e(imax-1) +
892  $ rwork(iv2tsn+imax-1-1)*b22d(imax)
893  b22d(imax) = rwork(iv2tcs+imax-1-1)*b22d(imax) -
894  $ rwork(iv2tsn+imax-1-1)*b22e(imax-1)
895  b22e(imax-1) = temp
896 *
897 * Update singular vectors
898 *
899  IF( wantu1 ) THEN
900  IF( colmajor ) THEN
901  CALL clasr( 'R', 'V', 'F', p, imax-imin+1,
902  $ rwork(iu1cs+imin-1), rwork(iu1sn+imin-1),
903  $ u1(1,imin), ldu1 )
904  ELSE
905  CALL clasr( 'L', 'V', 'F', imax-imin+1, p,
906  $ rwork(iu1cs+imin-1), rwork(iu1sn+imin-1),
907  $ u1(imin,1), ldu1 )
908  END IF
909  END IF
910  IF( wantu2 ) THEN
911  IF( colmajor ) THEN
912  CALL clasr( 'R', 'V', 'F', m-p, imax-imin+1,
913  $ rwork(iu2cs+imin-1), rwork(iu2sn+imin-1),
914  $ u2(1,imin), ldu2 )
915  ELSE
916  CALL clasr( 'L', 'V', 'F', imax-imin+1, m-p,
917  $ rwork(iu2cs+imin-1), rwork(iu2sn+imin-1),
918  $ u2(imin,1), ldu2 )
919  END IF
920  END IF
921  IF( wantv1t ) THEN
922  IF( colmajor ) THEN
923  CALL clasr( 'L', 'V', 'F', imax-imin+1, q,
924  $ rwork(iv1tcs+imin-1), rwork(iv1tsn+imin-1),
925  $ v1t(imin,1), ldv1t )
926  ELSE
927  CALL clasr( 'R', 'V', 'F', q, imax-imin+1,
928  $ rwork(iv1tcs+imin-1), rwork(iv1tsn+imin-1),
929  $ v1t(1,imin), ldv1t )
930  END IF
931  END IF
932  IF( wantv2t ) THEN
933  IF( colmajor ) THEN
934  CALL clasr( 'L', 'V', 'F', imax-imin+1, m-q,
935  $ rwork(iv2tcs+imin-1), rwork(iv2tsn+imin-1),
936  $ v2t(imin,1), ldv2t )
937  ELSE
938  CALL clasr( 'R', 'V', 'F', m-q, imax-imin+1,
939  $ rwork(iv2tcs+imin-1), rwork(iv2tsn+imin-1),
940  $ v2t(1,imin), ldv2t )
941  END IF
942  END IF
943 *
944 * Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX)
945 *
946  IF( b11e(imax-1)+b21e(imax-1) .GT. 0 ) THEN
947  b11d(imax) = -b11d(imax)
948  b21d(imax) = -b21d(imax)
949  IF( wantv1t ) THEN
950  IF( colmajor ) THEN
951  CALL cscal( q, negonecomplex, v1t(imax,1), ldv1t )
952  ELSE
953  CALL cscal( q, negonecomplex, v1t(1,imax), 1 )
954  END IF
955  END IF
956  END IF
957 *
958 * Compute THETA(IMAX)
959 *
960  x1 = cos(phi(imax-1))*b11d(imax) +
961  $ sin(phi(imax-1))*b12e(imax-1)
962  y1 = cos(phi(imax-1))*b21d(imax) +
963  $ sin(phi(imax-1))*b22e(imax-1)
964 *
965  theta(imax) = atan2( abs(y1), abs(x1) )
966 *
967 * Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX),
968 * and B22(IMAX,IMAX-1)
969 *
970  IF( b11d(imax)+b12e(imax-1) .LT. 0 ) THEN
971  b12d(imax) = -b12d(imax)
972  IF( wantu1 ) THEN
973  IF( colmajor ) THEN
974  CALL cscal( p, negonecomplex, u1(1,imax), 1 )
975  ELSE
976  CALL cscal( p, negonecomplex, u1(imax,1), ldu1 )
977  END IF
978  END IF
979  END IF
980  IF( b21d(imax)+b22e(imax-1) .GT. 0 ) THEN
981  b22d(imax) = -b22d(imax)
982  IF( wantu2 ) THEN
983  IF( colmajor ) THEN
984  CALL cscal( m-p, negonecomplex, u2(1,imax), 1 )
985  ELSE
986  CALL cscal( m-p, negonecomplex, u2(imax,1), ldu2 )
987  END IF
988  END IF
989  END IF
990 *
991 * Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX)
992 *
993  IF( b12d(imax)+b22d(imax) .LT. 0 ) THEN
994  IF( wantv2t ) THEN
995  IF( colmajor ) THEN
996  CALL cscal( m-q, negonecomplex, v2t(imax,1), ldv2t )
997  ELSE
998  CALL cscal( m-q, negonecomplex, v2t(1,imax), 1 )
999  END IF
1000  END IF
1001  END IF
1002 *
1003 * Test for negligible sines or cosines
1004 *
1005  DO i = imin, imax
1006  IF( theta(i) .LT. thresh ) THEN
1007  theta(i) = zero
1008  ELSE IF( theta(i) .GT. piover2-thresh ) THEN
1009  theta(i) = piover2
1010  END IF
1011  END DO
1012  DO i = imin, imax-1
1013  IF( phi(i) .LT. thresh ) THEN
1014  phi(i) = zero
1015  ELSE IF( phi(i) .GT. piover2-thresh ) THEN
1016  phi(i) = piover2
1017  END IF
1018  END DO
1019 *
1020 * Deflate
1021 *
1022  IF (imax .GT. 1) THEN
1023  DO WHILE( phi(imax-1) .EQ. zero )
1024  imax = imax - 1
1025  IF (imax .LE. 1) EXIT
1026  END DO
1027  END IF
1028  IF( imin .GT. imax - 1 )
1029  $ imin = imax - 1
1030  IF (imin .GT. 1) THEN
1031  DO WHILE (phi(imin-1) .NE. zero)
1032  imin = imin - 1
1033  IF (imin .LE. 1) EXIT
1034  END DO
1035  END IF
1036 *
1037 * Repeat main iteration loop
1038 *
1039  END DO
1040 *
1041 * Postprocessing: order THETA from least to greatest
1042 *
1043  DO i = 1, q
1044 *
1045  mini = i
1046  thetamin = theta(i)
1047  DO j = i+1, q
1048  IF( theta(j) .LT. thetamin ) THEN
1049  mini = j
1050  thetamin = theta(j)
1051  END IF
1052  END DO
1053 *
1054  IF( mini .NE. i ) THEN
1055  theta(mini) = theta(i)
1056  theta(i) = thetamin
1057  IF( colmajor ) THEN
1058  IF( wantu1 )
1059  $ CALL cswap( p, u1(1,i), 1, u1(1,mini), 1 )
1060  IF( wantu2 )
1061  $ CALL cswap( m-p, u2(1,i), 1, u2(1,mini), 1 )
1062  IF( wantv1t )
1063  $ CALL cswap( q, v1t(i,1), ldv1t, v1t(mini,1), ldv1t )
1064  IF( wantv2t )
1065  $ CALL cswap( m-q, v2t(i,1), ldv2t, v2t(mini,1),
1066  $ ldv2t )
1067  ELSE
1068  IF( wantu1 )
1069  $ CALL cswap( p, u1(i,1), ldu1, u1(mini,1), ldu1 )
1070  IF( wantu2 )
1071  $ CALL cswap( m-p, u2(i,1), ldu2, u2(mini,1), ldu2 )
1072  IF( wantv1t )
1073  $ CALL cswap( q, v1t(1,i), 1, v1t(1,mini), 1 )
1074  IF( wantv2t )
1075  $ CALL cswap( m-q, v2t(1,i), 1, v2t(1,mini), 1 )
1076  END IF
1077  END IF
1078 *
1079  END DO
1080 *
1081  RETURN
1082 *
1083 * End of CBBCSD
1084 *
subroutine slartgs(X, Y, SIGMA, CS, SN)
SLARTGS generates a plane rotation designed to introduce a bulge in implicit QR iteration for the bid...
Definition: slartgs.f:92
subroutine slas2(F, G, H, SSMIN, SSMAX)
SLAS2 computes singular values of a 2-by-2 triangular matrix.
Definition: slas2.f:109
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine cscal(N, CA, CX, INCX)
CSCAL
Definition: cscal.f:54
subroutine clasr(SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA)
CLASR applies a sequence of plane rotations to a general rectangular matrix.
Definition: clasr.f:202
subroutine slartgp(F, G, CS, SN, R)
SLARTGP generates a plane rotation so that the diagonal is nonnegative.
Definition: slartgp.f:97
subroutine cswap(N, CX, INCX, CY, INCY)
CSWAP
Definition: cswap.f:52
real function slamch(CMACH)
SLAMCH
Definition: slamch.f:69
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55

Here is the call graph for this function:

Here is the caller graph for this function: