LAPACK  3.10.0
LAPACK: Linear Algebra PACKage

◆ cgbtrf()

subroutine cgbtrf ( integer  M,
integer  N,
integer  KL,
integer  KU,
complex, dimension( ldab, * )  AB,
integer  LDAB,
integer, dimension( * )  IPIV,
integer  INFO 
)

CGBTRF

Download CGBTRF + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 CGBTRF computes an LU factorization of a complex m-by-n band matrix A
 using partial pivoting with row interchanges.

 This is the blocked version of the algorithm, calling Level 3 BLAS.
Parameters
[in]M
          M is INTEGER
          The number of rows of the matrix A.  M >= 0.
[in]N
          N is INTEGER
          The number of columns of the matrix A.  N >= 0.
[in]KL
          KL is INTEGER
          The number of subdiagonals within the band of A.  KL >= 0.
[in]KU
          KU is INTEGER
          The number of superdiagonals within the band of A.  KU >= 0.
[in,out]AB
          AB is COMPLEX array, dimension (LDAB,N)
          On entry, the matrix A in band storage, in rows KL+1 to
          2*KL+KU+1; rows 1 to KL of the array need not be set.
          The j-th column of A is stored in the j-th column of the
          array AB as follows:
          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)

          On exit, details of the factorization: U is stored as an
          upper triangular band matrix with KL+KU superdiagonals in
          rows 1 to KL+KU+1, and the multipliers used during the
          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
          See below for further details.
[in]LDAB
          LDAB is INTEGER
          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
[out]IPIV
          IPIV is INTEGER array, dimension (min(M,N))
          The pivot indices; for 1 <= i <= min(M,N), row i of the
          matrix was interchanged with row IPIV(i).
[out]INFO
          INFO is INTEGER
          = 0: successful exit
          < 0: if INFO = -i, the i-th argument had an illegal value
          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
               has been completed, but the factor U is exactly
               singular, and division by zero will occur if it is used
               to solve a system of equations.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
  The band storage scheme is illustrated by the following example, when
  M = N = 6, KL = 2, KU = 1:

  On entry:                       On exit:

      *    *    *    +    +    +       *    *    *   u14  u25  u36
      *    *    +    +    +    +       *    *   u13  u24  u35  u46
      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *

  Array elements marked * are not used by the routine; elements marked
  + need not be set on entry, but are required by the routine to store
  elements of U because of fill-in resulting from the row interchanges.

Definition at line 143 of file cgbtrf.f.

144 *
145 * -- LAPACK computational routine --
146 * -- LAPACK is a software package provided by Univ. of Tennessee, --
147 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
148 *
149 * .. Scalar Arguments ..
150  INTEGER INFO, KL, KU, LDAB, M, N
151 * ..
152 * .. Array Arguments ..
153  INTEGER IPIV( * )
154  COMPLEX AB( LDAB, * )
155 * ..
156 *
157 * =====================================================================
158 *
159 * .. Parameters ..
160  COMPLEX ONE, ZERO
161  parameter( one = ( 1.0e+0, 0.0e+0 ),
162  $ zero = ( 0.0e+0, 0.0e+0 ) )
163  INTEGER NBMAX, LDWORK
164  parameter( nbmax = 64, ldwork = nbmax+1 )
165 * ..
166 * .. Local Scalars ..
167  INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
168  $ JU, K2, KM, KV, NB, NW
169  COMPLEX TEMP
170 * ..
171 * .. Local Arrays ..
172  COMPLEX WORK13( LDWORK, NBMAX ),
173  $ WORK31( LDWORK, NBMAX )
174 * ..
175 * .. External Functions ..
176  INTEGER ICAMAX, ILAENV
177  EXTERNAL icamax, ilaenv
178 * ..
179 * .. External Subroutines ..
180  EXTERNAL ccopy, cgbtf2, cgemm, cgeru, claswp, cscal,
181  $ cswap, ctrsm, xerbla
182 * ..
183 * .. Intrinsic Functions ..
184  INTRINSIC max, min
185 * ..
186 * .. Executable Statements ..
187 *
188 * KV is the number of superdiagonals in the factor U, allowing for
189 * fill-in
190 *
191  kv = ku + kl
192 *
193 * Test the input parameters.
194 *
195  info = 0
196  IF( m.LT.0 ) THEN
197  info = -1
198  ELSE IF( n.LT.0 ) THEN
199  info = -2
200  ELSE IF( kl.LT.0 ) THEN
201  info = -3
202  ELSE IF( ku.LT.0 ) THEN
203  info = -4
204  ELSE IF( ldab.LT.kl+kv+1 ) THEN
205  info = -6
206  END IF
207  IF( info.NE.0 ) THEN
208  CALL xerbla( 'CGBTRF', -info )
209  RETURN
210  END IF
211 *
212 * Quick return if possible
213 *
214  IF( m.EQ.0 .OR. n.EQ.0 )
215  $ RETURN
216 *
217 * Determine the block size for this environment
218 *
219  nb = ilaenv( 1, 'CGBTRF', ' ', m, n, kl, ku )
220 *
221 * The block size must not exceed the limit set by the size of the
222 * local arrays WORK13 and WORK31.
223 *
224  nb = min( nb, nbmax )
225 *
226  IF( nb.LE.1 .OR. nb.GT.kl ) THEN
227 *
228 * Use unblocked code
229 *
230  CALL cgbtf2( m, n, kl, ku, ab, ldab, ipiv, info )
231  ELSE
232 *
233 * Use blocked code
234 *
235 * Zero the superdiagonal elements of the work array WORK13
236 *
237  DO 20 j = 1, nb
238  DO 10 i = 1, j - 1
239  work13( i, j ) = zero
240  10 CONTINUE
241  20 CONTINUE
242 *
243 * Zero the subdiagonal elements of the work array WORK31
244 *
245  DO 40 j = 1, nb
246  DO 30 i = j + 1, nb
247  work31( i, j ) = zero
248  30 CONTINUE
249  40 CONTINUE
250 *
251 * Gaussian elimination with partial pivoting
252 *
253 * Set fill-in elements in columns KU+2 to KV to zero
254 *
255  DO 60 j = ku + 2, min( kv, n )
256  DO 50 i = kv - j + 2, kl
257  ab( i, j ) = zero
258  50 CONTINUE
259  60 CONTINUE
260 *
261 * JU is the index of the last column affected by the current
262 * stage of the factorization
263 *
264  ju = 1
265 *
266  DO 180 j = 1, min( m, n ), nb
267  jb = min( nb, min( m, n )-j+1 )
268 *
269 * The active part of the matrix is partitioned
270 *
271 * A11 A12 A13
272 * A21 A22 A23
273 * A31 A32 A33
274 *
275 * Here A11, A21 and A31 denote the current block of JB columns
276 * which is about to be factorized. The number of rows in the
277 * partitioning are JB, I2, I3 respectively, and the numbers
278 * of columns are JB, J2, J3. The superdiagonal elements of A13
279 * and the subdiagonal elements of A31 lie outside the band.
280 *
281  i2 = min( kl-jb, m-j-jb+1 )
282  i3 = min( jb, m-j-kl+1 )
283 *
284 * J2 and J3 are computed after JU has been updated.
285 *
286 * Factorize the current block of JB columns
287 *
288  DO 80 jj = j, j + jb - 1
289 *
290 * Set fill-in elements in column JJ+KV to zero
291 *
292  IF( jj+kv.LE.n ) THEN
293  DO 70 i = 1, kl
294  ab( i, jj+kv ) = zero
295  70 CONTINUE
296  END IF
297 *
298 * Find pivot and test for singularity. KM is the number of
299 * subdiagonal elements in the current column.
300 *
301  km = min( kl, m-jj )
302  jp = icamax( km+1, ab( kv+1, jj ), 1 )
303  ipiv( jj ) = jp + jj - j
304  IF( ab( kv+jp, jj ).NE.zero ) THEN
305  ju = max( ju, min( jj+ku+jp-1, n ) )
306  IF( jp.NE.1 ) THEN
307 *
308 * Apply interchange to columns J to J+JB-1
309 *
310  IF( jp+jj-1.LT.j+kl ) THEN
311 *
312  CALL cswap( jb, ab( kv+1+jj-j, j ), ldab-1,
313  $ ab( kv+jp+jj-j, j ), ldab-1 )
314  ELSE
315 *
316 * The interchange affects columns J to JJ-1 of A31
317 * which are stored in the work array WORK31
318 *
319  CALL cswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
320  $ work31( jp+jj-j-kl, 1 ), ldwork )
321  CALL cswap( j+jb-jj, ab( kv+1, jj ), ldab-1,
322  $ ab( kv+jp, jj ), ldab-1 )
323  END IF
324  END IF
325 *
326 * Compute multipliers
327 *
328  CALL cscal( km, one / ab( kv+1, jj ), ab( kv+2, jj ),
329  $ 1 )
330 *
331 * Update trailing submatrix within the band and within
332 * the current block. JM is the index of the last column
333 * which needs to be updated.
334 *
335  jm = min( ju, j+jb-1 )
336  IF( jm.GT.jj )
337  $ CALL cgeru( km, jm-jj, -one, ab( kv+2, jj ), 1,
338  $ ab( kv, jj+1 ), ldab-1,
339  $ ab( kv+1, jj+1 ), ldab-1 )
340  ELSE
341 *
342 * If pivot is zero, set INFO to the index of the pivot
343 * unless a zero pivot has already been found.
344 *
345  IF( info.EQ.0 )
346  $ info = jj
347  END IF
348 *
349 * Copy current column of A31 into the work array WORK31
350 *
351  nw = min( jj-j+1, i3 )
352  IF( nw.GT.0 )
353  $ CALL ccopy( nw, ab( kv+kl+1-jj+j, jj ), 1,
354  $ work31( 1, jj-j+1 ), 1 )
355  80 CONTINUE
356  IF( j+jb.LE.n ) THEN
357 *
358 * Apply the row interchanges to the other blocks.
359 *
360  j2 = min( ju-j+1, kv ) - jb
361  j3 = max( 0, ju-j-kv+1 )
362 *
363 * Use CLASWP to apply the row interchanges to A12, A22, and
364 * A32.
365 *
366  CALL claswp( j2, ab( kv+1-jb, j+jb ), ldab-1, 1, jb,
367  $ ipiv( j ), 1 )
368 *
369 * Adjust the pivot indices.
370 *
371  DO 90 i = j, j + jb - 1
372  ipiv( i ) = ipiv( i ) + j - 1
373  90 CONTINUE
374 *
375 * Apply the row interchanges to A13, A23, and A33
376 * columnwise.
377 *
378  k2 = j - 1 + jb + j2
379  DO 110 i = 1, j3
380  jj = k2 + i
381  DO 100 ii = j + i - 1, j + jb - 1
382  ip = ipiv( ii )
383  IF( ip.NE.ii ) THEN
384  temp = ab( kv+1+ii-jj, jj )
385  ab( kv+1+ii-jj, jj ) = ab( kv+1+ip-jj, jj )
386  ab( kv+1+ip-jj, jj ) = temp
387  END IF
388  100 CONTINUE
389  110 CONTINUE
390 *
391 * Update the relevant part of the trailing submatrix
392 *
393  IF( j2.GT.0 ) THEN
394 *
395 * Update A12
396 *
397  CALL ctrsm( 'Left', 'Lower', 'No transpose', 'Unit',
398  $ jb, j2, one, ab( kv+1, j ), ldab-1,
399  $ ab( kv+1-jb, j+jb ), ldab-1 )
400 *
401  IF( i2.GT.0 ) THEN
402 *
403 * Update A22
404 *
405  CALL cgemm( 'No transpose', 'No transpose', i2, j2,
406  $ jb, -one, ab( kv+1+jb, j ), ldab-1,
407  $ ab( kv+1-jb, j+jb ), ldab-1, one,
408  $ ab( kv+1, j+jb ), ldab-1 )
409  END IF
410 *
411  IF( i3.GT.0 ) THEN
412 *
413 * Update A32
414 *
415  CALL cgemm( 'No transpose', 'No transpose', i3, j2,
416  $ jb, -one, work31, ldwork,
417  $ ab( kv+1-jb, j+jb ), ldab-1, one,
418  $ ab( kv+kl+1-jb, j+jb ), ldab-1 )
419  END IF
420  END IF
421 *
422  IF( j3.GT.0 ) THEN
423 *
424 * Copy the lower triangle of A13 into the work array
425 * WORK13
426 *
427  DO 130 jj = 1, j3
428  DO 120 ii = jj, jb
429  work13( ii, jj ) = ab( ii-jj+1, jj+j+kv-1 )
430  120 CONTINUE
431  130 CONTINUE
432 *
433 * Update A13 in the work array
434 *
435  CALL ctrsm( 'Left', 'Lower', 'No transpose', 'Unit',
436  $ jb, j3, one, ab( kv+1, j ), ldab-1,
437  $ work13, ldwork )
438 *
439  IF( i2.GT.0 ) THEN
440 *
441 * Update A23
442 *
443  CALL cgemm( 'No transpose', 'No transpose', i2, j3,
444  $ jb, -one, ab( kv+1+jb, j ), ldab-1,
445  $ work13, ldwork, one, ab( 1+jb, j+kv ),
446  $ ldab-1 )
447  END IF
448 *
449  IF( i3.GT.0 ) THEN
450 *
451 * Update A33
452 *
453  CALL cgemm( 'No transpose', 'No transpose', i3, j3,
454  $ jb, -one, work31, ldwork, work13,
455  $ ldwork, one, ab( 1+kl, j+kv ), ldab-1 )
456  END IF
457 *
458 * Copy the lower triangle of A13 back into place
459 *
460  DO 150 jj = 1, j3
461  DO 140 ii = jj, jb
462  ab( ii-jj+1, jj+j+kv-1 ) = work13( ii, jj )
463  140 CONTINUE
464  150 CONTINUE
465  END IF
466  ELSE
467 *
468 * Adjust the pivot indices.
469 *
470  DO 160 i = j, j + jb - 1
471  ipiv( i ) = ipiv( i ) + j - 1
472  160 CONTINUE
473  END IF
474 *
475 * Partially undo the interchanges in the current block to
476 * restore the upper triangular form of A31 and copy the upper
477 * triangle of A31 back into place
478 *
479  DO 170 jj = j + jb - 1, j, -1
480  jp = ipiv( jj ) - jj + 1
481  IF( jp.NE.1 ) THEN
482 *
483 * Apply interchange to columns J to JJ-1
484 *
485  IF( jp+jj-1.LT.j+kl ) THEN
486 *
487 * The interchange does not affect A31
488 *
489  CALL cswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
490  $ ab( kv+jp+jj-j, j ), ldab-1 )
491  ELSE
492 *
493 * The interchange does affect A31
494 *
495  CALL cswap( jj-j, ab( kv+1+jj-j, j ), ldab-1,
496  $ work31( jp+jj-j-kl, 1 ), ldwork )
497  END IF
498  END IF
499 *
500 * Copy the current column of A31 back into place
501 *
502  nw = min( i3, jj-j+1 )
503  IF( nw.GT.0 )
504  $ CALL ccopy( nw, work31( 1, jj-j+1 ), 1,
505  $ ab( kv+kl+1-jj+j, jj ), 1 )
506  170 CONTINUE
507  180 CONTINUE
508  END IF
509 *
510  RETURN
511 *
512 * End of CGBTRF
513 *
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
Definition: ilaenv.f:162
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
integer function icamax(N, CX, INCX)
ICAMAX
Definition: icamax.f:71
subroutine ccopy(N, CX, INCX, CY, INCY)
CCOPY
Definition: ccopy.f:81
subroutine cswap(N, CX, INCX, CY, INCY)
CSWAP
Definition: cswap.f:81
subroutine cscal(N, CA, CX, INCX)
CSCAL
Definition: cscal.f:78
subroutine cgeru(M, N, ALPHA, X, INCX, Y, INCY, A, LDA)
CGERU
Definition: cgeru.f:130
subroutine cgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
CGEMM
Definition: cgemm.f:187
subroutine ctrsm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
CTRSM
Definition: ctrsm.f:180
subroutine cgbtf2(M, N, KL, KU, AB, LDAB, IPIV, INFO)
CGBTF2 computes the LU factorization of a general band matrix using the unblocked version of the algo...
Definition: cgbtf2.f:145
subroutine claswp(N, A, LDA, K1, K2, IPIV, INCX)
CLASWP performs a series of row interchanges on a general rectangular matrix.
Definition: claswp.f:115
Here is the call graph for this function:
Here is the caller graph for this function: