function [p,L,P,Q,gap_ratio] = lqlp(A,tol_gap,fixed_rank)
%  lqlp --> Pivoted QLP matrix approximation with interleaved factorizations.
%
%  <Synopsis>
%    [p,L,P,Q,gap_ratio] = lqlp(A)
%    [p,L,P,Q,gap_ratio] = lqlp(A,tol_gap)
%    [p,L,P,Q,gap_ratio] = lqlp(A,tol_gap,fixed_rank)
%
%  <Description>
%    Computes a rank-p pivoted QLP matrix approximation of an m-by-n matrix A
%    (m >= n) satisfying A*P = Q*L with a lower triangular p-by-p matrix L.
%    The rank (or stopping point) p is either fixed_rank or is dynamically
%    determined by tol_gap.  The absolute value of the diagonal elements of L
%    are approximations to the first p singular values of A, while the columns
%    of Q and P approximate the first p left and right singular vectors of A.
%
%  <Input Parameters>
%    1. A          --> m-by-n matrix (m >= n);
%    2. tol_gap    --> truncate the decomposition after compuing a rank-p
%                      approximation to A, where p is the smallest 
%                      integer such that abs(L(p,p)/L(p+1,p+1)) >= tol_gap;
%    3. fixed_rank --> ignore tol_gap and truncate the decomposition after
%                      computing an approximation to A of rank fixed_rank.
%
%    Defaults: tol_gap    = n/eps;
%              fixed_rank = n.
%
%  <Output parameters>
%    1. p         --> smallest integer such that abs(L(p,p)/L(p+1,p+1)) > tol_gap
%                     (or fixed_rank);
%    2. L         --> p-by-p lower triangular matrix whose diagonal elements,
%                     in absolute value, track the largest p singular values of A;
%    3-4. P,Q     --> matrices with p orthonormal columns;
%    5. gap_ratio --> abs(L(p+1,p+1)/L(p,p)), that is, the ratio of the first
%                     approximate singular value excluded to the last one included,
%                     empty if p = n or fixed_rank = n.
%
%  <Algorithm>
%    The first p rows and columns of the pivoted QR factorization of A are
%    computed, A*Pi_1 = Q*R.  Then the pivoted QR factorization of R' is computed,
%    R'*Pi_2 = P*L', where L' is p-by-p.  The rank p is either fixed_rank or is
%    determined dynamically by the following.  The computation of R is stopped
%    after k1 rows and columns, where abs(R(k1,k1)/R(k1-1,k1-1)) <= tol_gap.
%    Then rows and columns of L' are computed to see whether abs(L(j,j)/L(j-1,j-1))
%    <= tol_gap for any j <= k1.  If so, the algorithm halts after computing these
%    j rows and columns of L, and the final approximation to the SVD of A is rank j.
%    If not, the next k2 rows and columns of R are computed until tol_gap is
%    achieved, then corresponding rows and columns of L (k1 + j, 1 <= j <= k2) are
%    computed to see whether tol_gap holds at any j and the computation can be
%    halted.  If not, more rows and columns of R are computed, etc.  This process
%    is called interleaving.
%
%  <See Also>
%    hqlp --> high-rank gap-revealing QLP factorization.

%  <References>
%  [1] G.W. Stewart, "The QLP approximation to the singular value
%      decomposition," SIAM J. Sci. Comp., 20 (1999), pp. 1336-1348.
%  [2] D.A. Huckaby and T.F. Chan, "Stewart's pivoted QLP decomposition
%      for low-rank matrices," Tech. Report CAM-02-54, Dept. Mathematics,
%      UCLA, 2002.
%
%  <Revision>
%    D.A. Huckaby, Dept. of Mathematics, UCLA
%    Minor revisions by P.C. Hansen, IMM, Technical University of Denmark
%
%    Last revised: September 3, 2003.
%-----------------------------------------------------------------------

% Check the required input arguments.
if (nargin < 1)
  error('Not enough input arguments.')
end

[m,n] = size(A);
if (m*n == 0)
  error('Empty input matrix A not allowed.')
elseif (m < n)
  error('The system is underdetermined.')
end

% Check the optional input arguments, and set defaults.
if (nargin == 1)
  fixed_rank = n; fixed_rank_flag = 0; tol_gap = n/eps;
elseif (nargin == 2)
  if isempty(tol_gap)
     tol_gap = n*eps;
  elseif (tol_gap <= 1)
     error('Input parameter tol_gap must be greater than 1.'); 
  end
  fixed_rank = n; fixed_rank_flag = 0;
else
  if isempty(fixed_rank)
     fixed_rank = n;
  elseif (fixed_rank < 1 | fixed_rank > n)
     error('Input parameter fixed_rank must be between 1 and n.')
  end
  fixed_rank_flag = 1;
  p = fixed_rank;
end

% Check the number of output arguments.
qflag     = 1;
pflag     = 1;
% pi_1flag  = 1;
% pi_2flag  = 1;
gap_ratioflag = 1;
if (nargout <= 1)
  qflag = 0; Q = [];
  pflag = 0; P = [];
  gap_ratioflag = 0; gap_ratio = [];
elseif (nargout == 3)
  qflag = 0; Q = [];
  gap_ratioflag = 0; gap_ratio = [];
elseif (nargout == 4)
  gap_ratioflag = 0; gap_ratio = [];
end

% Choose between fixed-rank or interleaved approach.

if (fixed_rank_flag)
   % The following code computes a size fixed_rank pivoted QLP decomposition.

   if (fixed_rank == 0)
      gap_ratio = 0;
      L = []; Q = []; P = []; Pi_1 = []; Pi_2 = [];
      
   elseif (gap_ratioflag & fixed_rank < n)
      % QR factorizations.
      [R,U_1,betas_1,pvt_1] = housetriang(A,fixed_rank + 1);
      [L,U_2,betas_2,pvt_2] = housetriang(R',fixed_rank + 1);
      gap_ratio = ...
          abs( L(fixed_rank,fixed_rank) / L(fixed_rank+1,fixed_rank+1) );
   
      % Construct Q and P, if requested.  Also remove extra column that
      % was needed to determine gap_ratio.
      L = L(1:fixed_rank,1:fixed_rank)';
      if qflag
         Q = formQ(U_1,betas_1);
         Q(:,pvt_2) = Q;
         Q = Q(:,1:fixed_rank);
      end
      if pflag
         P = formQ(U_2,betas_2);
         P(pvt_1,:) = P;
         P = P(:,1:fixed_rank);
      end

   else
      % QR factorizations.
      [R,U_1,betas_1,pvt_1] = housetriang(A,fixed_rank);
      [L,U_2,betas_2,pvt_2] = housetriang(R',fixed_rank);
      L = L';
      if (gap_ratioflag), gap_ratio = []; end
   
      % Construct Q and P, if requested.
      if qflag
         Q = formQ(U_1,betas_1);
         Q(:,pvt_2) = Q;
      end
      if pflag
         P = formQ(U_2,betas_2);
         P(pvt_1,:) = P;
      end

   end

else
   % The following code computes a pivoted QLP decomposition truncated
   % between the first pair of L-values whose ratio is smaller than tol_gap.
   % This stopping point will be determined dynamically by interleaving
   % the computation, if necessary.
   
   tol_gap_attained = 0; entire_matrix_factorized = 0;
   previous_R_diag_elm = 0;  % For gap ratio testing purposes.
   previous_L_diag_elm = 0;  % See function housetriang_gap.
   sweeps = 0;
   prevsize = 0;
   currsize = 0;
   theU_1s = []; theU_2s = [];
   thebeta_1s = []; thebeta_2s = [];
   thepvt_1s = []; thepvt_2s = [];

   while (~tol_gap_attained & ~entire_matrix_factorized)
      sweeps = sweeps + 1;
      % Compute the sweeps-th sweep of R (and Q).
      [A,R,U_1,betas_1,pvt_1,tol_gap_attained] = ... 
                       housetriang_gap(A,tol_gap,previous_R_diag_elm);
      [R_sweep,dummy] = size(R);
      previous_R_diag_elm = R(R_sweep,R_sweep);
      prevsize = currsize;
      currsize = currsize + R_sweep;

      if (sweeps == 1)
         % Compute the first sweep of L.
         [dummy,L,U_2,betas_2,pvt_2,tol_gap_attained]... 
                      = housetriang_gap(R',tol_gap,previous_L_diag_elm);
         [L_sweep,dummy] = size(L);

      else
         % Apply to the new piece of R all the Householder transformations
         % that have been applied to the pieces of R from previous sweeps. 
         vertical_strip_R_t = ...
              apply_previous_house(R,theU_2s,thebeta_2s);

         % The portion of vertical_strip_R_t that would have been part
         % of L had were we not interleaving is now assigned to L. 
         % The rest (the bottom portion) is sent to housetriang_gap.
         % Then the pieces of L are put together. 
         L(1:prevsize,prevsize+1:currsize) = vertical_strip_R_t(1:prevsize,:);
         R_t = vertical_strip_R_t(prevsize+1:n,:);
         [dummy,lower_right_of_L,U_2,betas_2,pvt_2,tol_gap_attained] = ... 
                             housetriang_gap(R_t,tol_gap,previous_L_diag_elm);
         [L_sweep,dummy] = size(lower_right_of_L);
         lower_left_of_L = zeros(L_sweep,prevsize);
         L = [L; lower_left_of_L lower_right_of_L];
      end

      % Update the building blocks of Q and P, if needed.
      if qflag
         if (sweeps > 1)
            % The vectors of U_2 have fewer than n rows, and must be
            % padded on top with zeros to be correct.
            U_1 = [zeros(prevsize,R_sweep) ; U_1];
         end
         theU_1s = [theU_1s U_1];
         thebeta_1s = [thebeta_1s betas_1];
      end

      % The U_2s and thebeta_2s must in any case be updated, since
      % they are needed in subsequent sweeps for computing L.
      
      if (sweeps > 1)
         % The vectors of U_2 have fewer than n rows, and must be
         % padded on top with zeros to be correct.
         U_2 = [zeros(prevsize,L_sweep) ; U_2];
      end
      theU_2s = [theU_2s U_2];
      thebeta_2s = [thebeta_2s betas_2];

      % pvt_1 is computed with indices starting at 1 in
      % housetriang_gap, so it must be adjusted here.
      pvt_1 = pvt_1 + prevsize;
      thepvt_1s(prevsize+1:n) = pvt_1;
      % pvt_2 is computed with indices starting at 1 in
      % housetriang_gap, so it must be adjusted here.
      pvt_2 = pvt_2 + prevsize;
      thepvt_2s = [thepvt_2s pvt_2];
     
      if (~tol_gap_attained)
         previous_L_diag_elm = L(currsize,currsize);
      end
      entire_matrix_factorized = (currsize == n);

   end % while loop

   % Determine the final "rank" finalsize of the matrix.
   % That is, L will be finalsize-by-finalsize.
   if entire_matrix_factorized
      if tol_gap_attained
         finalsize = prevsize + L_sweep - 1;
      else
         finalsize = prevsize + L_sweep;
      end
   else
      finalsize = prevsize + L_sweep - 1;
   end

   % Compute gap_ratio, if requested as an output argument.
   if gap_ratioflag
      if entire_matrix_factorized
         if tol_gap_attained
            gap_ratio = abs(L(finalsize,finalsize)/L(finalsize+1,finalsize+1));
         else
            gap_ratio = [];
         end
      else
         gap_ratio = abs(L(finalsize,finalsize)/L(finalsize+1,finalsize+1));
      end
   end

   % Construct Q and P, if requested.  Also remove extra column if necessary.
   L = L(1:finalsize,1:finalsize)';
   if qflag
      Q = formQ(theU_1s,thebeta_1s);
      Q(:,thepvt_2s) = Q;
      Q = Q(:,1:finalsize);
   end
   if pflag
      P = formQ(theU_2s,thebeta_2s);
      P = P(:,1:finalsize);
      P(thepvt_1s,:) = P;
   end
   
   p = finalsize;

end % Fixed-rank/interleaved.

%-----------------------------------------------------------------------
% End of main procedure of function lqlp
%-----------------------------------------------------------------------
% Following are subfunctions used only by function lqlp.
%-----------------------------------------------------------------------

function [R,U,betas,pvt] = housetriang(X,dimen)
% [R,U,betas,pvt] = housetriang(X,dimen)
%
% This function computes the first dimen rows of R and the first dimen
% columns of Q of the pivoted QR factorization of X by using Householder
% transformations.  That is, X*Pi = Q*R.  The Householder transformations
% that constitute Q are stored in U and betas, while the pivoting
% information that defines Pi is stored in pvt.

[n,p] = size(X);
U = zeros(n,dimen); betas = zeros(dimen); R = zeros(dimen,p);
pvt = (1:p);

for k = 1:dimen
   % Find the column with max norm, then swap that colunm with the
   % current (k-th) column.
   [val,ind] = max( sum( X(k:n,k:p).*X(k:n,k:p) ) );
   pvtk = ind + k - 1;
   pvt([k pvtk]) = pvt([pvtk k]);
   X(k:n, [k pvtk]) = X(k:n, [pvtk k]);
   R(1:k-1, [k pvtk]) = R(1:k-1, [pvtk k]);

   % Generate the proper Householder transformation and apply it
   % to X(k:n,k+1:p).
   [betas(k),U(k:n,k),R(k,k)] = gen_hous( X(k:n,k) );
   X(k:n,k+1:p) = app_hous( X(k:n,k+1:p), betas(k), U(k:n,k) );
   R(k,k+1:p) = X(k,k+1:p);
end

%%%%%%%%%%%%% end housetriang %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function [newX,R,U,betas,pvt,tol_gap_attained] = ...
                          housetriang_gap(X,tol_gap,previous_diag_elm)
% [newX,R,U,betas,pvt,tol_gap_attained] =
%                          housetriang_gap(X,tol_gap,previous_diag_elm)
%
% This function computes the first k rows of R and the first k columns
% of Q of the pivoted QR factorization of X by using Householder trans-
% formations.  That is, X*Pi = Q*R.  The Householder transformations
% that constitute Q are stored in U and betas, while the pivoting
% information that defines Pi is stored in pvt.
%
% The "rank" k - 1 of X is determined dynamically by the condition that
% k is the lowest integer such that |R(k,k)|/|R(k-1,k-1)| <= tol_gap.
% If R here is R of the QLP decomposition, this will mark a suggested
% stopping point for the entire algorithm.  If R here is actually L
% of the QLP decomposition, this will mark the definite stopping point
% of the entire algorithm.  In either case, tol_gap_attained is set to 1.
% If the condition never occurs (that is, all of the gap ratios are
% greater than tol_gap), the tol_gap_attained is  returned as 0.
% newX is set to X(k+1:n,k+1:p) unless all of R has been factorized,
% in which case newX is set to [].
%
% previous_diag_elm is the last diagonal element that was computed on R
% in the previous sweep.  (Note again that R here could be either R or L
% in the QLP decomposition.)  It is needed here to determine whether there
% is a gap between the last diagonal element in the previous sweep and
% the first diagonal element in this sweep.  If this sweep is the first 
% sweep, previous_diag_elm is expected to be equal to zero on entrance
% into this function.

[n,p] = size(X);
U = zeros(n,p); betas = zeros(p); R = zeros(p,p);
pvt = (1:p); 

% Compute the first row of R
k = 1; tol_gap_attained = 0;

% Find the column with max norm, then swap that colunm with the current
% (1st) column.
[val,ind] = max( sum( X.*X ) );    
pvt([1 ind]) = pvt([ind k]);
X(1:n, [1 ind]) = X(1:n, [ind 1]);

% Generate the proper Householder transformation and apply it to X(1:n,2:p).
[betas(1),U(1:n,1),R(1,1)] = gen_hous( X(1:n,1) );             
X(1:n,2:p) = app_hous( X(1:n,2:p), betas(1), U(1:n,1) );   
R(1,2:p) = X(1,2:p);                                       

% The first row is computed.  Now set boolean constants for the while loop.
if ( abs(R(1,1)) == 0 )
   tol_gap_attained = 1;
elseif (previous_diag_elm == 0) % If true, then this is the first sweep.
            % A second row of R must be computed before there is any gap
                                           % ratio to compare to tol_gap.
   tol_gap_attained = 0;       
else
   tol_gap_attained = ( previous_diag_elm/abs(R(1,1)) >= tol_gap );
end
R_completely_factorized = (p == 1);

% Compute rows until either tol_gap is attained or all the rows of R
% have been computed.  (Once again keep in mind that in this function,
% R can be either R or L of the QLP decomposition.)

while ( ~tol_gap_attained & ~R_completely_factorized )
   k = k + 1;

   % Find the column with max norm, then swap that colunm with the
   % current (k-th) column.
   [val,ind] = max( sum( X(k:n,k:p).*X(k:n,k:p) ) );    
   pvtk = ind + k - 1;               
   pvt([k pvtk]) = pvt([pvtk k]);
   X(k:n, [k pvtk]) = X(k:n, [pvtk k]);                   
   R(1:k-1, [k pvtk]) = R(1:k-1, [pvtk k]);               

   % Generate the proper Householder transformation and apply it
   % to X(k:n,k+1:p).
   [betas(k),U(k:n,k),R(k,k)] = gen_hous( X(k:n,k) );             
   X(k:n,k+1:p) = app_hous( X(k:n,k+1:p), betas(k), U(k:n,k) );   
   R(k,k+1:p) = X(k,k+1:p);                                       

   warningstate = warning; warning off;  % Suppress div. by zero warning.
   tol_gap_attained = ( abs(R(k-1,k-1)/R(k,k)) >=  tol_gap );
   warning = warningstate;
   R_completely_factorized = (k == p);
end                                                               

% Set R, U, and betas to their proper sizes.  (All of pvt is
% needed if we are to constuct the permutation matrices.)
R = R(1:k,:); U = U(:,1:k); betas = betas(1:k);

if R_completely_factorized
   newX = [];
else
   newX = X(k+1:n,k+1:p);
end
 
%%%%%%%%%%%%% end housetriang_gap %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function vstrip_R_t = apply_previous_house(R,U,betas);
% vstrip_R_t = apply_previous_house(R,U,betas,block_sizes);
%
%  When we are interleaving, and it is time to triangularize a new 
%  portion of L (and we're on the second or higher sweep), we need
%  to apply the Householder transformations that have been applied 
%  in triangularizing all the previous portions of L.

%  The piece of R that enters this function is a horizontal strip
%  whose left portion is upper-triangular, the part of A that was 
%  upper-triangularized in the first half of the sweep.  This 
%  function applies the Householder transformations contained in 
%  U and betas (ie, all the transformations that have thus far 
%  contributed in triangularizing L) and returns a vertical strip.
%  The upper part of the strip is a rectangle, part of the matrix 
%  that would already have been part of L had we not been interleaving.
%  The lower part of the strip is also a rectangle, that portion of R'
%  which is ready to be triangularized to obtain the next triangular 
%  portion of L.

[n,p1] = size(U);       % p1 = sum of all previous sweep sizes
[p2,dummy] = size(R);   % p2 = current sweep size, dummy = n - p1
% We must pad R' with zeros on the top to make it the correct size.
vstrip_R_t = [ zeros(p1,p2) ; R'];

for k = 1:p1
   vstrip_R_t(k:n,:) = app_hous( vstrip_R_t(k:n,:), betas(k), U(k:n,k) );   
end

%%%%%%%%% end apply_previous_house %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function Q = formQ(U,betas)
% Q = formQ(U,betas)
%
% This function forms explicitly the orthogonal matrix represented
% by the Householder transformations stored in U and betas.

[rows,cols] = size(U);
Q = zeros(rows,cols);
Q(1:cols,:) = eye(cols);
for ind = cols:-1:1
   Q(ind:rows,ind:cols) = ...
         app_hous( Q(ind:rows,ind:cols), betas(ind), U(ind:rows,ind) );
end

%%%%%%%%%%% end formQ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%-----------------------------------------------------------------------
% End of file lqlp
%-----------------------------------------------------------------------