LAPACK  3.10.1
LAPACK: Linear Algebra PACKage
dsgesv.f
Go to the documentation of this file.
1 *> \brief <b> DSGESV computes the solution to system of linear equations A * X = B for GE matrices</b> (mixed precision with iterative refinement)
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download DSGESV + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsgesv.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsgesv.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsgesv.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,
22 * SWORK, ITER, INFO )
23 *
24 * .. Scalar Arguments ..
25 * INTEGER INFO, ITER, LDA, LDB, LDX, N, NRHS
26 * ..
27 * .. Array Arguments ..
28 * INTEGER IPIV( * )
29 * REAL SWORK( * )
30 * DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( N, * ),
31 * $ X( LDX, * )
32 * ..
33 *
34 *
35 *> \par Purpose:
36 * =============
37 *>
38 *> \verbatim
39 *>
40 *> DSGESV computes the solution to a real system of linear equations
41 *> A * X = B,
42 *> where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
43 *>
44 *> DSGESV first attempts to factorize the matrix in SINGLE PRECISION
45 *> and use this factorization within an iterative refinement procedure
46 *> to produce a solution with DOUBLE PRECISION normwise backward error
47 *> quality (see below). If the approach fails the method switches to a
48 *> DOUBLE PRECISION factorization and solve.
49 *>
50 *> The iterative refinement is not going to be a winning strategy if
51 *> the ratio SINGLE PRECISION performance over DOUBLE PRECISION
52 *> performance is too small. A reasonable strategy should take the
53 *> number of right-hand sides and the size of the matrix into account.
54 *> This might be done with a call to ILAENV in the future. Up to now, we
55 *> always try iterative refinement.
56 *>
57 *> The iterative refinement process is stopped if
58 *> ITER > ITERMAX
59 *> or for all the RHS we have:
60 *> RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX
61 *> where
62 *> o ITER is the number of the current iteration in the iterative
63 *> refinement process
64 *> o RNRM is the infinity-norm of the residual
65 *> o XNRM is the infinity-norm of the solution
66 *> o ANRM is the infinity-operator-norm of the matrix A
67 *> o EPS is the machine epsilon returned by DLAMCH('Epsilon')
68 *> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00
69 *> respectively.
70 *> \endverbatim
71 *
72 * Arguments:
73 * ==========
74 *
75 *> \param[in] N
76 *> \verbatim
77 *> N is INTEGER
78 *> The number of linear equations, i.e., the order of the
79 *> matrix A. N >= 0.
80 *> \endverbatim
81 *>
82 *> \param[in] NRHS
83 *> \verbatim
84 *> NRHS is INTEGER
85 *> The number of right hand sides, i.e., the number of columns
86 *> of the matrix B. NRHS >= 0.
87 *> \endverbatim
88 *>
89 *> \param[in,out] A
90 *> \verbatim
91 *> A is DOUBLE PRECISION array,
92 *> dimension (LDA,N)
93 *> On entry, the N-by-N coefficient matrix A.
94 *> On exit, if iterative refinement has been successfully used
95 *> (INFO = 0 and ITER >= 0, see description below), then A is
96 *> unchanged, if double precision factorization has been used
97 *> (INFO = 0 and ITER < 0, see description below), then the
98 *> array A contains the factors L and U from the factorization
99 *> A = P*L*U; the unit diagonal elements of L are not stored.
100 *> \endverbatim
101 *>
102 *> \param[in] LDA
103 *> \verbatim
104 *> LDA is INTEGER
105 *> The leading dimension of the array A. LDA >= max(1,N).
106 *> \endverbatim
107 *>
108 *> \param[out] IPIV
109 *> \verbatim
110 *> IPIV is INTEGER array, dimension (N)
111 *> The pivot indices that define the permutation matrix P;
112 *> row i of the matrix was interchanged with row IPIV(i).
113 *> Corresponds either to the single precision factorization
114 *> (if INFO = 0 and ITER >= 0) or the double precision
115 *> factorization (if INFO = 0 and ITER < 0).
116 *> \endverbatim
117 *>
118 *> \param[in] B
119 *> \verbatim
120 *> B is DOUBLE PRECISION array, dimension (LDB,NRHS)
121 *> The N-by-NRHS right hand side matrix B.
122 *> \endverbatim
123 *>
124 *> \param[in] LDB
125 *> \verbatim
126 *> LDB is INTEGER
127 *> The leading dimension of the array B. LDB >= max(1,N).
128 *> \endverbatim
129 *>
130 *> \param[out] X
131 *> \verbatim
132 *> X is DOUBLE PRECISION array, dimension (LDX,NRHS)
133 *> If INFO = 0, the N-by-NRHS solution matrix X.
134 *> \endverbatim
135 *>
136 *> \param[in] LDX
137 *> \verbatim
138 *> LDX is INTEGER
139 *> The leading dimension of the array X. LDX >= max(1,N).
140 *> \endverbatim
141 *>
142 *> \param[out] WORK
143 *> \verbatim
144 *> WORK is DOUBLE PRECISION array, dimension (N,NRHS)
145 *> This array is used to hold the residual vectors.
146 *> \endverbatim
147 *>
148 *> \param[out] SWORK
149 *> \verbatim
150 *> SWORK is REAL array, dimension (N*(N+NRHS))
151 *> This array is used to use the single precision matrix and the
152 *> right-hand sides or solutions in single precision.
153 *> \endverbatim
154 *>
155 *> \param[out] ITER
156 *> \verbatim
157 *> ITER is INTEGER
158 *> < 0: iterative refinement has failed, double precision
159 *> factorization has been performed
160 *> -1 : the routine fell back to full precision for
161 *> implementation- or machine-specific reasons
162 *> -2 : narrowing the precision induced an overflow,
163 *> the routine fell back to full precision
164 *> -3 : failure of SGETRF
165 *> -31: stop the iterative refinement after the 30th
166 *> iterations
167 *> > 0: iterative refinement has been successfully used.
168 *> Returns the number of iterations
169 *> \endverbatim
170 *>
171 *> \param[out] INFO
172 *> \verbatim
173 *> INFO is INTEGER
174 *> = 0: successful exit
175 *> < 0: if INFO = -i, the i-th argument had an illegal value
176 *> > 0: if INFO = i, U(i,i) computed in DOUBLE PRECISION is
177 *> exactly zero. The factorization has been completed,
178 *> but the factor U is exactly singular, so the solution
179 *> could not be computed.
180 *> \endverbatim
181 *
182 * Authors:
183 * ========
184 *
185 *> \author Univ. of Tennessee
186 *> \author Univ. of California Berkeley
187 *> \author Univ. of Colorado Denver
188 *> \author NAG Ltd.
189 *
190 *> \ingroup doubleGEsolve
191 *
192 * =====================================================================
193  SUBROUTINE dsgesv( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,
194  $ SWORK, ITER, INFO )
195 *
196 * -- LAPACK driver routine --
197 * -- LAPACK is a software package provided by Univ. of Tennessee, --
198 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
199 *
200 * .. Scalar Arguments ..
201  INTEGER INFO, ITER, LDA, LDB, LDX, N, NRHS
202 * ..
203 * .. Array Arguments ..
204  INTEGER IPIV( * )
205  REAL SWORK( * )
206  DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( N, * ),
207  $ x( ldx, * )
208 * ..
209 *
210 * =====================================================================
211 *
212 * .. Parameters ..
213  LOGICAL DOITREF
214  parameter( doitref = .true. )
215 *
216  INTEGER ITERMAX
217  parameter( itermax = 30 )
218 *
219  DOUBLE PRECISION BWDMAX
220  parameter( bwdmax = 1.0e+00 )
221 *
222  DOUBLE PRECISION NEGONE, ONE
223  parameter( negone = -1.0d+0, one = 1.0d+0 )
224 *
225 * .. Local Scalars ..
226  INTEGER I, IITER, PTSA, PTSX
227  DOUBLE PRECISION ANRM, CTE, EPS, RNRM, XNRM
228 *
229 * .. External Subroutines ..
230  EXTERNAL daxpy, dgemm, dlacpy, dlag2s, dgetrf, dgetrs,
232 * ..
233 * .. External Functions ..
234  INTEGER IDAMAX
235  DOUBLE PRECISION DLAMCH, DLANGE
236  EXTERNAL idamax, dlamch, dlange
237 * ..
238 * .. Intrinsic Functions ..
239  INTRINSIC abs, dble, max, sqrt
240 * ..
241 * .. Executable Statements ..
242 *
243  info = 0
244  iter = 0
245 *
246 * Test the input parameters.
247 *
248  IF( n.LT.0 ) THEN
249  info = -1
250  ELSE IF( nrhs.LT.0 ) THEN
251  info = -2
252  ELSE IF( lda.LT.max( 1, n ) ) THEN
253  info = -4
254  ELSE IF( ldb.LT.max( 1, n ) ) THEN
255  info = -7
256  ELSE IF( ldx.LT.max( 1, n ) ) THEN
257  info = -9
258  END IF
259  IF( info.NE.0 ) THEN
260  CALL xerbla( 'DSGESV', -info )
261  RETURN
262  END IF
263 *
264 * Quick return if (N.EQ.0).
265 *
266  IF( n.EQ.0 )
267  $ RETURN
268 *
269 * Skip single precision iterative refinement if a priori slower
270 * than double precision factorization.
271 *
272  IF( .NOT.doitref ) THEN
273  iter = -1
274  GO TO 40
275  END IF
276 *
277 * Compute some constants.
278 *
279  anrm = dlange( 'I', n, n, a, lda, work )
280  eps = dlamch( 'Epsilon' )
281  cte = anrm*eps*sqrt( dble( n ) )*bwdmax
282 *
283 * Set the indices PTSA, PTSX for referencing SA and SX in SWORK.
284 *
285  ptsa = 1
286  ptsx = ptsa + n*n
287 *
288 * Convert B from double precision to single precision and store the
289 * result in SX.
290 *
291  CALL dlag2s( n, nrhs, b, ldb, swork( ptsx ), n, info )
292 *
293  IF( info.NE.0 ) THEN
294  iter = -2
295  GO TO 40
296  END IF
297 *
298 * Convert A from double precision to single precision and store the
299 * result in SA.
300 *
301  CALL dlag2s( n, n, a, lda, swork( ptsa ), n, info )
302 *
303  IF( info.NE.0 ) THEN
304  iter = -2
305  GO TO 40
306  END IF
307 *
308 * Compute the LU factorization of SA.
309 *
310  CALL sgetrf( n, n, swork( ptsa ), n, ipiv, info )
311 *
312  IF( info.NE.0 ) THEN
313  iter = -3
314  GO TO 40
315  END IF
316 *
317 * Solve the system SA*SX = SB.
318 *
319  CALL sgetrs( 'No transpose', n, nrhs, swork( ptsa ), n, ipiv,
320  $ swork( ptsx ), n, info )
321 *
322 * Convert SX back to double precision
323 *
324  CALL slag2d( n, nrhs, swork( ptsx ), n, x, ldx, info )
325 *
326 * Compute R = B - AX (R is WORK).
327 *
328  CALL dlacpy( 'All', n, nrhs, b, ldb, work, n )
329 *
330  CALL dgemm( 'No Transpose', 'No Transpose', n, nrhs, n, negone, a,
331  $ lda, x, ldx, one, work, n )
332 *
333 * Check whether the NRHS normwise backward errors satisfy the
334 * stopping criterion. If yes, set ITER=0 and return.
335 *
336  DO i = 1, nrhs
337  xnrm = abs( x( idamax( n, x( 1, i ), 1 ), i ) )
338  rnrm = abs( work( idamax( n, work( 1, i ), 1 ), i ) )
339  IF( rnrm.GT.xnrm*cte )
340  $ GO TO 10
341  END DO
342 *
343 * If we are here, the NRHS normwise backward errors satisfy the
344 * stopping criterion. We are good to exit.
345 *
346  iter = 0
347  RETURN
348 *
349  10 CONTINUE
350 *
351  DO 30 iiter = 1, itermax
352 *
353 * Convert R (in WORK) from double precision to single precision
354 * and store the result in SX.
355 *
356  CALL dlag2s( n, nrhs, work, n, swork( ptsx ), n, info )
357 *
358  IF( info.NE.0 ) THEN
359  iter = -2
360  GO TO 40
361  END IF
362 *
363 * Solve the system SA*SX = SR.
364 *
365  CALL sgetrs( 'No transpose', n, nrhs, swork( ptsa ), n, ipiv,
366  $ swork( ptsx ), n, info )
367 *
368 * Convert SX back to double precision and update the current
369 * iterate.
370 *
371  CALL slag2d( n, nrhs, swork( ptsx ), n, work, n, info )
372 *
373  DO i = 1, nrhs
374  CALL daxpy( n, one, work( 1, i ), 1, x( 1, i ), 1 )
375  END DO
376 *
377 * Compute R = B - AX (R is WORK).
378 *
379  CALL dlacpy( 'All', n, nrhs, b, ldb, work, n )
380 *
381  CALL dgemm( 'No Transpose', 'No Transpose', n, nrhs, n, negone,
382  $ a, lda, x, ldx, one, work, n )
383 *
384 * Check whether the NRHS normwise backward errors satisfy the
385 * stopping criterion. If yes, set ITER=IITER>0 and return.
386 *
387  DO i = 1, nrhs
388  xnrm = abs( x( idamax( n, x( 1, i ), 1 ), i ) )
389  rnrm = abs( work( idamax( n, work( 1, i ), 1 ), i ) )
390  IF( rnrm.GT.xnrm*cte )
391  $ GO TO 20
392  END DO
393 *
394 * If we are here, the NRHS normwise backward errors satisfy the
395 * stopping criterion, we are good to exit.
396 *
397  iter = iiter
398 *
399  RETURN
400 *
401  20 CONTINUE
402 *
403  30 CONTINUE
404 *
405 * If we are at this place of the code, this is because we have
406 * performed ITER=ITERMAX iterations and never satisfied the
407 * stopping criterion, set up the ITER flag accordingly and follow up
408 * on double precision routine.
409 *
410  iter = -itermax - 1
411 *
412  40 CONTINUE
413 *
414 * Single-precision iterative refinement failed to converge to a
415 * satisfactory solution, so we resort to double precision.
416 *
417  CALL dgetrf( n, n, a, lda, ipiv, info )
418 *
419  IF( info.NE.0 )
420  $ RETURN
421 *
422  CALL dlacpy( 'All', n, nrhs, b, ldb, x, ldx )
423  CALL dgetrs( 'No transpose', n, nrhs, a, lda, ipiv, x, ldx,
424  $ info )
425 *
426  RETURN
427 *
428 * End of DSGESV
429 *
430  END
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:103
subroutine slag2d(M, N, SA, LDSA, A, LDA, INFO)
SLAG2D converts a single precision matrix to a double precision matrix.
Definition: slag2d.f:104
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine daxpy(N, DA, DX, INCX, DY, INCY)
DAXPY
Definition: daxpy.f:89
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:187
subroutine dgetrf(M, N, A, LDA, IPIV, INFO)
DGETRF
Definition: dgetrf.f:108
subroutine dgetrs(TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO)
DGETRS
Definition: dgetrs.f:121
subroutine dsgesv(N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK, SWORK, ITER, INFO)
DSGESV computes the solution to system of linear equations A * X = B for GE matrices (mixed precision...
Definition: dsgesv.f:195
subroutine dlag2s(M, N, A, LDA, SA, LDSA, INFO)
DLAG2S converts a double precision matrix to a single precision matrix.
Definition: dlag2s.f:108
subroutine sgetrf(M, N, A, LDA, IPIV, INFO)
SGETRF
Definition: sgetrf.f:108
subroutine sgetrs(TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO)
SGETRS
Definition: sgetrs.f:121