LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
ctrsyl3.f
Go to the documentation of this file.
1*> \brief \b CTRSYL3
2*
3* Definition:
4* ===========
5*
6*
7*> \par Purpose
8* =============
9*>
10*> \verbatim
11*>
12*> CTRSYL3 solves the complex Sylvester matrix equation:
13*>
14*> op(A)*X + X*op(B) = scale*C or
15*> op(A)*X - X*op(B) = scale*C,
16*>
17*> where op(A) = A or A**H, and A and B are both upper triangular. A is
18*> M-by-M and B is N-by-N; the right hand side C and the solution X are
19*> M-by-N; and scale is an output scale factor, set <= 1 to avoid
20*> overflow in X.
21*>
22*> This is the block version of the algorithm.
23*> \endverbatim
24*
25* Arguments
26* =========
27*
28*> \param[in] TRANA
29*> \verbatim
30*> TRANA is CHARACTER*1
31*> Specifies the option op(A):
32*> = 'N': op(A) = A (No transpose)
33*> = 'C': op(A) = A**H (Conjugate transpose)
34*> \endverbatim
35*>
36*> \param[in] TRANB
37*> \verbatim
38*> TRANB is CHARACTER*1
39*> Specifies the option op(B):
40*> = 'N': op(B) = B (No transpose)
41*> = 'C': op(B) = B**H (Conjugate transpose)
42*> \endverbatim
43*>
44*> \param[in] ISGN
45*> \verbatim
46*> ISGN is INTEGER
47*> Specifies the sign in the equation:
48*> = +1: solve op(A)*X + X*op(B) = scale*C
49*> = -1: solve op(A)*X - X*op(B) = scale*C
50*> \endverbatim
51*>
52*> \param[in] M
53*> \verbatim
54*> M is INTEGER
55*> The order of the matrix A, and the number of rows in the
56*> matrices X and C. M >= 0.
57*> \endverbatim
58*>
59*> \param[in] N
60*> \verbatim
61*> N is INTEGER
62*> The order of the matrix B, and the number of columns in the
63*> matrices X and C. N >= 0.
64*> \endverbatim
65*>
66*> \param[in] A
67*> \verbatim
68*> A is COMPLEX array, dimension (LDA,M)
69*> The upper triangular matrix A.
70*> \endverbatim
71*>
72*> \param[in] LDA
73*> \verbatim
74*> LDA is INTEGER
75*> The leading dimension of the array A. LDA >= max(1,M).
76*> \endverbatim
77*>
78*> \param[in] B
79*> \verbatim
80*> B is COMPLEX array, dimension (LDB,N)
81*> The upper triangular matrix B.
82*> \endverbatim
83*>
84*> \param[in] LDB
85*> \verbatim
86*> LDB is INTEGER
87*> The leading dimension of the array B. LDB >= max(1,N).
88*> \endverbatim
89*>
90*> \param[in,out] C
91*> \verbatim
92*> C is COMPLEX array, dimension (LDC,N)
93*> On entry, the M-by-N right hand side matrix C.
94*> On exit, C is overwritten by the solution matrix X.
95*> \endverbatim
96*>
97*> \param[in] LDC
98*> \verbatim
99*> LDC is INTEGER
100*> The leading dimension of the array C. LDC >= max(1,M)
101*> \endverbatim
102*>
103*> \param[out] SCALE
104*> \verbatim
105*> SCALE is REAL
106*> The scale factor, scale, set <= 1 to avoid overflow in X.
107*> \endverbatim
108*>
109*> \param[out] SWORK
110*> \verbatim
111*> SWORK is REAL array, dimension (MAX(2, ROWS), MAX(1,COLS)).
112*> On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS
113*> and SWORK(2) returns the optimal COLS.
114*> \endverbatim
115*>
116*> \param[in] LDSWORK
117*> \verbatim
118*> LDSWORK is INTEGER
119*> LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1)
120*> and NB is the optimal block size.
121*>
122*> If LDSWORK = -1, then a workspace query is assumed; the routine
123*> only calculates the optimal dimensions of the SWORK matrix,
124*> returns these values as the first and second entry of the SWORK
125*> matrix, and no error message related LWORK is issued by XERBLA.
126*> \endverbatim
127*>
128*> \param[out] INFO
129*> \verbatim
130*> INFO is INTEGER
131*> = 0: successful exit
132*> < 0: if INFO = -i, the i-th argument had an illegal value
133*> = 1: A and B have common or very close eigenvalues; perturbed
134*> values were used to solve the equation (but the matrices
135*> A and B are unchanged).
136*> \endverbatim
137*
138*> \ingroup trsyl3
139*
140* =====================================================================
141* References:
142* E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of
143* algorithms: The triangular Sylvester equation, ACM Transactions
144* on Mathematical Software (TOMS), volume 29, pages 218--243.
145*
146* A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel
147* Solution of the Triangular Sylvester Equation. Lecture Notes in
148* Computer Science, vol 12043, pages 82--92, Springer.
149*
150* Contributor:
151* Angelika Schwarz, Umea University, Sweden.
152*
153* =====================================================================
154 SUBROUTINE ctrsyl3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
155 $ LDC, SCALE, SWORK, LDSWORK, INFO )
156 IMPLICIT NONE
157*
158* .. Scalar Arguments ..
159 CHARACTER TRANA, TRANB
160 INTEGER INFO, ISGN, LDA, LDB, LDC, LDSWORK, M, N
161 REAL SCALE
162* ..
163* .. Array Arguments ..
164 COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * )
165 REAL SWORK( LDSWORK, * )
166* ..
167* .. Parameters ..
168 REAL ZERO, ONE
169 parameter( zero = 0.0e+0, one = 1.0e+0 )
170 COMPLEX CONE
171 parameter( cone = ( 1.0e+0, 0.0e+0 ) )
172* ..
173* .. Local Scalars ..
174 LOGICAL NOTRNA, NOTRNB, LQUERY
175 INTEGER AWRK, BWRK, I, I1, I2, IINFO, J, J1, J2, JJ,
176 $ k, k1, k2, l, l1, l2, ll, nba, nb, nbb
177 REAL ANRM, BIGNUM, BNRM, CNRM, SCAL, SCALOC,
178 $ scamin, sgn, xnrm, buf, smlnum
179 COMPLEX CSGN
180* ..
181* .. Local Arrays ..
182 REAL WNRM( MAX( M, N ) )
183* ..
184* .. External Functions ..
185 LOGICAL LSAME
186 INTEGER ILAENV
187 REAL CLANGE, SLAMCH, SLARMM
188 EXTERNAL clange, ilaenv, lsame, slamch, slarmm
189* ..
190* .. External Subroutines ..
191 EXTERNAL csscal, cgemm, clascl, ctrsyl, xerbla
192* ..
193* .. Intrinsic Functions ..
194 INTRINSIC abs, aimag, exponent, max, min, real
195* ..
196* .. Executable Statements ..
197*
198* Decode and Test input parameters
199*
200 notrna = lsame( trana, 'N' )
201 notrnb = lsame( tranb, 'N' )
202*
203* Use the same block size for all matrices.
204*
205 nb = max( 8, ilaenv( 1, 'CTRSYL', '', m, n, -1, -1) )
206*
207* Compute number of blocks in A and B
208*
209 nba = max( 1, (m + nb - 1) / nb )
210 nbb = max( 1, (n + nb - 1) / nb )
211*
212* Compute workspace
213*
214 info = 0
215 lquery = ( ldswork.EQ.-1 )
216 IF( lquery ) THEN
217 ldswork = 2
218 swork(1,1) = max( nba, nbb )
219 swork(2,1) = 2 * nbb + nba
220 END IF
221*
222* Test the input arguments
223*
224 IF( .NOT.notrna .AND. .NOT. lsame( trana, 'C' ) ) THEN
225 info = -1
226 ELSE IF( .NOT.notrnb .AND. .NOT. lsame( tranb, 'C' ) ) THEN
227 info = -2
228 ELSE IF( isgn.NE.1 .AND. isgn.NE.-1 ) THEN
229 info = -3
230 ELSE IF( m.LT.0 ) THEN
231 info = -4
232 ELSE IF( n.LT.0 ) THEN
233 info = -5
234 ELSE IF( lda.LT.max( 1, m ) ) THEN
235 info = -7
236 ELSE IF( ldb.LT.max( 1, n ) ) THEN
237 info = -9
238 ELSE IF( ldc.LT.max( 1, m ) ) THEN
239 info = -11
240 END IF
241 IF( info.NE.0 ) THEN
242 CALL xerbla( 'CTRSYL3', -info )
243 RETURN
244 ELSE IF( lquery ) THEN
245 RETURN
246 END IF
247*
248* Quick return if possible
249*
250 scale = one
251 IF( m.EQ.0 .OR. n.EQ.0 )
252 $ RETURN
253*
254* Use unblocked code for small problems or if insufficient
255* workspace is provided
256*
257 IF( min( nba, nbb ).EQ.1 .OR. ldswork.LT.max( nba, nbb ) ) THEN
258 CALL ctrsyl( trana, tranb, isgn, m, n, a, lda, b, ldb,
259 $ c, ldc, scale, info )
260 RETURN
261 END IF
262*
263* Set constants to control overflow
264*
265 smlnum = slamch( 'S' )
266 bignum = one / smlnum
267*
268* Set local scaling factors.
269*
270 DO l = 1, nbb
271 DO k = 1, nba
272 swork( k, l ) = one
273 END DO
274 END DO
275*
276* Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero.
277* This scaling is to ensure compatibility with TRSYL and may get flushed.
278*
279 buf = one
280*
281* Compute upper bounds of blocks of A and B
282*
283 awrk = nbb
284 DO k = 1, nba
285 k1 = (k - 1) * nb + 1
286 k2 = min( k * nb, m ) + 1
287 DO l = k, nba
288 l1 = (l - 1) * nb + 1
289 l2 = min( l * nb, m ) + 1
290 IF( notrna ) THEN
291 swork( k, awrk + l ) = clange( 'I', k2-k1, l2-l1,
292 $ a( k1, l1 ), lda, wnrm )
293 ELSE
294 swork( l, awrk + k ) = clange( '1', k2-k1, l2-l1,
295 $ a( k1, l1 ), lda, wnrm )
296 END IF
297 END DO
298 END DO
299 bwrk = nbb + nba
300 DO k = 1, nbb
301 k1 = (k - 1) * nb + 1
302 k2 = min( k * nb, n ) + 1
303 DO l = k, nbb
304 l1 = (l - 1) * nb + 1
305 l2 = min( l * nb, n ) + 1
306 IF( notrnb ) THEN
307 swork( k, bwrk + l ) = clange( 'I', k2-k1, l2-l1,
308 $ b( k1, l1 ), ldb, wnrm )
309 ELSE
310 swork( l, bwrk + k ) = clange( '1', k2-k1, l2-l1,
311 $ b( k1, l1 ), ldb, wnrm )
312 END IF
313 END DO
314 END DO
315*
316 sgn = real( isgn )
317 csgn = cmplx( sgn, zero )
318*
319 IF( notrna .AND. notrnb ) THEN
320*
321* Solve A*X + ISGN*X*B = scale*C.
322*
323* The (K,L)th block of X is determined starting from
324* bottom-left corner column by column by
325*
326* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
327*
328* Where
329* M L-1
330* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].
331* I=K+1 J=1
332*
333* Start loop over block rows (index = K) and block columns (index = L)
334*
335 DO k = nba, 1, -1
336*
337* K1: row index of the first row in X( K, L )
338* K2: row index of the first row in X( K+1, L )
339* so the K2 - K1 is the column count of the block X( K, L )
340*
341 k1 = (k - 1) * nb + 1
342 k2 = min( k * nb, m ) + 1
343 DO l = 1, nbb
344*
345* L1: column index of the first column in X( K, L )
346* L2: column index of the first column in X( K, L + 1)
347* so that L2 - L1 is the row count of the block X( K, L )
348*
349 l1 = (l - 1) * nb + 1
350 l2 = min( l * nb, n ) + 1
351*
352 CALL ctrsyl( trana, tranb, isgn, k2-k1, l2-l1,
353 $ a( k1, k1 ), lda,
354 $ b( l1, l1 ), ldb,
355 $ c( k1, l1 ), ldc, scaloc, iinfo )
356 info = max( info, iinfo )
357*
358 IF( scaloc * swork( k, l ) .EQ. zero ) THEN
359 IF( scaloc .EQ. zero ) THEN
360* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
361* is larger than the product of BIGNUM**2 and cannot be
362* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
363* Mark the computation as pointless.
364 buf = zero
365 ELSE
366* Use second scaling factor to prevent flushing to zero.
367 buf = buf*2.e0**exponent( scaloc )
368 END IF
369 DO jj = 1, nbb
370 DO ll = 1, nba
371* Bound by BIGNUM to not introduce Inf. The value
372* is irrelevant; corresponding entries of the
373* solution will be flushed in consistency scaling.
374 swork( ll, jj ) = min( bignum,
375 $ swork( ll, jj ) / 2.e0**exponent( scaloc ) )
376 END DO
377 END DO
378 END IF
379 swork( k, l ) = scaloc * swork( k, l )
380 xnrm = clange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,
381 $ wnrm )
382*
383 DO i = k - 1, 1, -1
384*
385* C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
386*
387 i1 = (i - 1) * nb + 1
388 i2 = min( i * nb, m ) + 1
389*
390* Compute scaling factor to survive the linear update
391* simulating consistent scaling.
392*
393 cnrm = clange( 'I', i2-i1, l2-l1, c( i1, l1 ),
394 $ ldc, wnrm )
395 scamin = min( swork( i, l ), swork( k, l ) )
396 cnrm = cnrm * ( scamin / swork( i, l ) )
397 xnrm = xnrm * ( scamin / swork( k, l ) )
398 anrm = swork( i, awrk + k )
399 scaloc = slarmm( anrm, xnrm, cnrm )
400 IF( scaloc * scamin .EQ. zero ) THEN
401* Use second scaling factor to prevent flushing to zero.
402 buf = buf*2.e0**exponent( scaloc )
403 DO jj = 1, nbb
404 DO ll = 1, nba
405 swork( ll, jj ) = min( bignum,
406 $ swork( ll, jj ) / 2.e0**exponent( scaloc ) )
407 END DO
408 END DO
409 scamin = scamin / 2.e0**exponent( scaloc )
410 scaloc = scaloc / 2.e0**exponent( scaloc )
411 END IF
412 cnrm = cnrm * scaloc
413 xnrm = xnrm * scaloc
414*
415* Simultaneously apply the robust update factor and the
416* consistency scaling factor to C( I, L ) and C( K, L ).
417*
418 scal = ( scamin / swork( k, l ) ) * scaloc
419 IF( scal.NE.one ) THEN
420 DO jj = l1, l2-1
421 CALL csscal( k2-k1, scal, c( k1, jj ), 1)
422 END DO
423 ENDIF
424*
425 scal = ( scamin / swork( i, l ) ) * scaloc
426 IF( scal.NE.one ) THEN
427 DO ll = l1, l2-1
428 CALL csscal( i2-i1, scal, c( i1, ll ), 1)
429 END DO
430 ENDIF
431*
432* Record current scaling factor
433*
434 swork( k, l ) = scamin * scaloc
435 swork( i, l ) = scamin * scaloc
436*
437 CALL cgemm( 'N', 'N', i2-i1, l2-l1, k2-k1, -cone,
438 $ a( i1, k1 ), lda, c( k1, l1 ), ldc,
439 $ cone, c( i1, l1 ), ldc )
440*
441 END DO
442*
443 DO j = l + 1, nbb
444*
445* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
446*
447 j1 = (j - 1) * nb + 1
448 j2 = min( j * nb, n ) + 1
449*
450* Compute scaling factor to survive the linear update
451* simulating consistent scaling.
452*
453 cnrm = clange( 'I', k2-k1, j2-j1, c( k1, j1 ),
454 $ ldc, wnrm )
455 scamin = min( swork( k, j ), swork( k, l ) )
456 cnrm = cnrm * ( scamin / swork( k, j ) )
457 xnrm = xnrm * ( scamin / swork( k, l ) )
458 bnrm = swork(l, bwrk + j)
459 scaloc = slarmm( bnrm, xnrm, cnrm )
460 IF( scaloc * scamin .EQ. zero ) THEN
461* Use second scaling factor to prevent flushing to zero.
462 buf = buf*2.e0**exponent( scaloc )
463 DO jj = 1, nbb
464 DO ll = 1, nba
465 swork( ll, jj ) = min( bignum,
466 $ swork( ll, jj ) / 2.e0**exponent( scaloc ) )
467 END DO
468 END DO
469 scamin = scamin / 2.e0**exponent( scaloc )
470 scaloc = scaloc / 2.e0**exponent( scaloc )
471 END IF
472 cnrm = cnrm * scaloc
473 xnrm = xnrm * scaloc
474*
475* Simultaneously apply the robust update factor and the
476* consistency scaling factor to C( K, J ) and C( K, L).
477*
478 scal = ( scamin / swork( k, l ) ) * scaloc
479 IF( scal .NE. one ) THEN
480 DO ll = l1, l2-1
481 CALL csscal( k2-k1, scal, c( k1, ll ), 1 )
482 END DO
483 ENDIF
484*
485 scal = ( scamin / swork( k, j ) ) * scaloc
486 IF( scal .NE. one ) THEN
487 DO jj = j1, j2-1
488 CALL csscal( k2-k1, scal, c( k1, jj ), 1 )
489 END DO
490 ENDIF
491*
492* Record current scaling factor
493*
494 swork( k, l ) = scamin * scaloc
495 swork( k, j ) = scamin * scaloc
496*
497 CALL cgemm( 'N', 'N', k2-k1, j2-j1, l2-l1, -csgn,
498 $ c( k1, l1 ), ldc, b( l1, j1 ), ldb,
499 $ cone, c( k1, j1 ), ldc )
500 END DO
501 END DO
502 END DO
503 ELSE IF( .NOT.notrna .AND. notrnb ) THEN
504*
505* Solve A**H *X + ISGN*X*B = scale*C.
506*
507* The (K,L)th block of X is determined starting from
508* upper-left corner column by column by
509*
510* A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
511*
512* Where
513* K-1 L-1
514* R(K,L) = SUM [A(I,K)**H*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]
515* I=1 J=1
516*
517* Start loop over block rows (index = K) and block columns (index = L)
518*
519 DO k = 1, nba
520*
521* K1: row index of the first row in X( K, L )
522* K2: row index of the first row in X( K+1, L )
523* so the K2 - K1 is the column count of the block X( K, L )
524*
525 k1 = (k - 1) * nb + 1
526 k2 = min( k * nb, m ) + 1
527 DO l = 1, nbb
528*
529* L1: column index of the first column in X( K, L )
530* L2: column index of the first column in X( K, L + 1)
531* so that L2 - L1 is the row count of the block X( K, L )
532*
533 l1 = (l - 1) * nb + 1
534 l2 = min( l * nb, n ) + 1
535*
536 CALL ctrsyl( trana, tranb, isgn, k2-k1, l2-l1,
537 $ a( k1, k1 ), lda,
538 $ b( l1, l1 ), ldb,
539 $ c( k1, l1 ), ldc, scaloc, iinfo )
540 info = max( info, iinfo )
541*
542 IF( scaloc * swork( k, l ) .EQ. zero ) THEN
543 IF( scaloc .EQ. zero ) THEN
544* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
545* is larger than the product of BIGNUM**2 and cannot be
546* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
547* Mark the computation as pointless.
548 buf = zero
549 ELSE
550* Use second scaling factor to prevent flushing to zero.
551 buf = buf*2.e0**exponent( scaloc )
552 END IF
553 DO jj = 1, nbb
554 DO ll = 1, nba
555* Bound by BIGNUM to not introduce Inf. The value
556* is irrelevant; corresponding entries of the
557* solution will be flushed in consistency scaling.
558 swork( ll, jj ) = min( bignum,
559 $ swork( ll, jj ) / 2.e0**exponent( scaloc ) )
560 END DO
561 END DO
562 END IF
563 swork( k, l ) = scaloc * swork( k, l )
564 xnrm = clange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,
565 $ wnrm )
566*
567 DO i = k + 1, nba
568*
569* C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L )
570*
571 i1 = (i - 1) * nb + 1
572 i2 = min( i * nb, m ) + 1
573*
574* Compute scaling factor to survive the linear update
575* simulating consistent scaling.
576*
577 cnrm = clange( 'I', i2-i1, l2-l1, c( i1, l1 ),
578 $ ldc, wnrm )
579 scamin = min( swork( i, l ), swork( k, l ) )
580 cnrm = cnrm * ( scamin / swork( i, l ) )
581 xnrm = xnrm * ( scamin / swork( k, l ) )
582 anrm = swork( i, awrk + k )
583 scaloc = slarmm( anrm, xnrm, cnrm )
584 IF( scaloc * scamin .EQ. zero ) THEN
585* Use second scaling factor to prevent flushing to zero.
586 buf = buf*2.e0**exponent( scaloc )
587 DO jj = 1, nbb
588 DO ll = 1, nba
589 swork( ll, jj ) = min( bignum,
590 $ swork( ll, jj ) / 2.e0**exponent( scaloc ) )
591 END DO
592 END DO
593 scamin = scamin / 2.e0**exponent( scaloc )
594 scaloc = scaloc / 2.e0**exponent( scaloc )
595 END IF
596 cnrm = cnrm * scaloc
597 xnrm = xnrm * scaloc
598*
599* Simultaneously apply the robust update factor and the
600* consistency scaling factor to to C( I, L ) and C( K, L).
601*
602 scal = ( scamin / swork( k, l ) ) * scaloc
603 IF( scal .NE. one ) THEN
604 DO ll = l1, l2-1
605 CALL csscal( k2-k1, scal, c( k1, ll ), 1 )
606 END DO
607 ENDIF
608*
609 scal = ( scamin / swork( i, l ) ) * scaloc
610 IF( scal .NE. one ) THEN
611 DO ll = l1, l2-1
612 CALL csscal( i2-i1, scal, c( i1, ll ), 1 )
613 END DO
614 ENDIF
615*
616* Record current scaling factor
617*
618 swork( k, l ) = scamin * scaloc
619 swork( i, l ) = scamin * scaloc
620*
621 CALL cgemm( 'C', 'N', i2-i1, l2-l1, k2-k1, -cone,
622 $ a( k1, i1 ), lda, c( k1, l1 ), ldc,
623 $ cone, c( i1, l1 ), ldc )
624 END DO
625*
626 DO j = l + 1, nbb
627*
628* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
629*
630 j1 = (j - 1) * nb + 1
631 j2 = min( j * nb, n ) + 1
632*
633* Compute scaling factor to survive the linear update
634* simulating consistent scaling.
635*
636 cnrm = clange( 'I', k2-k1, j2-j1, c( k1, j1 ),
637 $ ldc, wnrm )
638 scamin = min( swork( k, j ), swork( k, l ) )
639 cnrm = cnrm * ( scamin / swork( k, j ) )
640 xnrm = xnrm * ( scamin / swork( k, l ) )
641 bnrm = swork( l, bwrk + j )
642 scaloc = slarmm( bnrm, xnrm, cnrm )
643 IF( scaloc * scamin .EQ. zero ) THEN
644* Use second scaling factor to prevent flushing to zero.
645 buf = buf*2.e0**exponent( scaloc )
646 DO jj = 1, nbb
647 DO ll = 1, nba
648 swork( ll, jj ) = min( bignum,
649 $ swork( ll, jj ) / 2.e0**exponent( scaloc ) )
650 END DO
651 END DO
652 scamin = scamin / 2.e0**exponent( scaloc )
653 scaloc = scaloc / 2.e0**exponent( scaloc )
654 END IF
655 cnrm = cnrm * scaloc
656 xnrm = xnrm * scaloc
657*
658* Simultaneously apply the robust update factor and the
659* consistency scaling factor to to C( K, J ) and C( K, L).
660*
661 scal = ( scamin / swork( k, l ) ) * scaloc
662 IF( scal .NE. one ) THEN
663 DO ll = l1, l2-1
664 CALL csscal( k2-k1, scal, c( k1, ll ), 1 )
665 END DO
666 ENDIF
667*
668 scal = ( scamin / swork( k, j ) ) * scaloc
669 IF( scal .NE. one ) THEN
670 DO jj = j1, j2-1
671 CALL csscal( k2-k1, scal, c( k1, jj ), 1 )
672 END DO
673 ENDIF
674*
675* Record current scaling factor
676*
677 swork( k, l ) = scamin * scaloc
678 swork( k, j ) = scamin * scaloc
679*
680 CALL cgemm( 'N', 'N', k2-k1, j2-j1, l2-l1, -csgn,
681 $ c( k1, l1 ), ldc, b( l1, j1 ), ldb,
682 $ cone, c( k1, j1 ), ldc )
683 END DO
684 END DO
685 END DO
686 ELSE IF( .NOT.notrna .AND. .NOT.notrnb ) THEN
687*
688* Solve A**H *X + ISGN*X*B**H = scale*C.
689*
690* The (K,L)th block of X is determined starting from
691* top-right corner column by column by
692*
693* A(K,K)**H*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L)
694*
695* Where
696* K-1 N
697* R(K,L) = SUM [A(I,K)**H*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H].
698* I=1 J=L+1
699*
700* Start loop over block rows (index = K) and block columns (index = L)
701*
702 DO k = 1, nba
703*
704* K1: row index of the first row in X( K, L )
705* K2: row index of the first row in X( K+1, L )
706* so the K2 - K1 is the column count of the block X( K, L )
707*
708 k1 = (k - 1) * nb + 1
709 k2 = min( k * nb, m ) + 1
710 DO l = nbb, 1, -1
711*
712* L1: column index of the first column in X( K, L )
713* L2: column index of the first column in X( K, L + 1)
714* so that L2 - L1 is the row count of the block X( K, L )
715*
716 l1 = (l - 1) * nb + 1
717 l2 = min( l * nb, n ) + 1
718*
719 CALL ctrsyl( trana, tranb, isgn, k2-k1, l2-l1,
720 $ a( k1, k1 ), lda,
721 $ b( l1, l1 ), ldb,
722 $ c( k1, l1 ), ldc, scaloc, iinfo )
723 info = max( info, iinfo )
724*
725 IF( scaloc * swork( k, l ) .EQ. zero ) THEN
726 IF( scaloc .EQ. zero ) THEN
727* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
728* is larger than the product of BIGNUM**2 and cannot be
729* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
730* Mark the computation as pointless.
731 buf = zero
732 ELSE
733* Use second scaling factor to prevent flushing to zero.
734 buf = buf*2.e0**exponent( scaloc )
735 END IF
736 DO jj = 1, nbb
737 DO ll = 1, nba
738* Bound by BIGNUM to not introduce Inf. The value
739* is irrelevant; corresponding entries of the
740* solution will be flushed in consistency scaling.
741 swork( ll, jj ) = min( bignum,
742 $ swork( ll, jj ) / 2.e0**exponent( scaloc ) )
743 END DO
744 END DO
745 END IF
746 swork( k, l ) = scaloc * swork( k, l )
747 xnrm = clange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,
748 $ wnrm )
749*
750 DO i = k + 1, nba
751*
752* C( I, L ) := C( I, L ) - A( K, I )**H * C( K, L )
753*
754 i1 = (i - 1) * nb + 1
755 i2 = min( i * nb, m ) + 1
756*
757* Compute scaling factor to survive the linear update
758* simulating consistent scaling.
759*
760 cnrm = clange( 'I', i2-i1, l2-l1, c( i1, l1 ),
761 $ ldc, wnrm )
762 scamin = min( swork( i, l ), swork( k, l ) )
763 cnrm = cnrm * ( scamin / swork( i, l ) )
764 xnrm = xnrm * ( scamin / swork( k, l ) )
765 anrm = swork( i, awrk + k )
766 scaloc = slarmm( anrm, xnrm, cnrm )
767 IF( scaloc * scamin .EQ. zero ) THEN
768* Use second scaling factor to prevent flushing to zero.
769 buf = buf*2.e0**exponent( scaloc )
770 DO jj = 1, nbb
771 DO ll = 1, nba
772 swork( ll, jj ) = min( bignum,
773 $ swork( ll, jj ) / 2.e0**exponent( scaloc ) )
774 END DO
775 END DO
776 scamin = scamin / 2.e0**exponent( scaloc )
777 scaloc = scaloc / 2.e0**exponent( scaloc )
778 END IF
779 cnrm = cnrm * scaloc
780 xnrm = xnrm * scaloc
781*
782* Simultaneously apply the robust update factor and the
783* consistency scaling factor to C( I, L ) and C( K, L).
784*
785 scal = ( scamin / swork( k, l ) ) * scaloc
786 IF( scal .NE. one ) THEN
787 DO ll = l1, l2-1
788 CALL csscal( k2-k1, scal, c( k1, ll ), 1 )
789 END DO
790 ENDIF
791*
792 scal = ( scamin / swork( i, l ) ) * scaloc
793 IF( scal .NE. one ) THEN
794 DO ll = l1, l2-1
795 CALL csscal( i2-i1, scal, c( i1, ll ), 1 )
796 END DO
797 ENDIF
798*
799* Record current scaling factor
800*
801 swork( k, l ) = scamin * scaloc
802 swork( i, l ) = scamin * scaloc
803*
804 CALL cgemm( 'C', 'N', i2-i1, l2-l1, k2-k1, -cone,
805 $ a( k1, i1 ), lda, c( k1, l1 ), ldc,
806 $ cone, c( i1, l1 ), ldc )
807 END DO
808*
809 DO j = 1, l - 1
810*
811* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H
812*
813 j1 = (j - 1) * nb + 1
814 j2 = min( j * nb, n ) + 1
815*
816* Compute scaling factor to survive the linear update
817* simulating consistent scaling.
818*
819 cnrm = clange( 'I', k2-k1, j2-j1, c( k1, j1 ),
820 $ ldc, wnrm )
821 scamin = min( swork( k, j ), swork( k, l ) )
822 cnrm = cnrm * ( scamin / swork( k, j ) )
823 xnrm = xnrm * ( scamin / swork( k, l ) )
824 bnrm = swork( l, bwrk + j )
825 scaloc = slarmm( bnrm, xnrm, cnrm )
826 IF( scaloc * scamin .EQ. zero ) THEN
827* Use second scaling factor to prevent flushing to zero.
828 buf = buf*2.e0**exponent( scaloc )
829 DO jj = 1, nbb
830 DO ll = 1, nba
831 swork( ll, jj ) = min( bignum,
832 $ swork( ll, jj ) / 2.e0**exponent( scaloc ) )
833 END DO
834 END DO
835 scamin = scamin / 2.e0**exponent( scaloc )
836 scaloc = scaloc / 2.e0**exponent( scaloc )
837 END IF
838 cnrm = cnrm * scaloc
839 xnrm = xnrm * scaloc
840*
841* Simultaneously apply the robust update factor and the
842* consistency scaling factor to C( K, J ) and C( K, L).
843*
844 scal = ( scamin / swork( k, l ) ) * scaloc
845 IF( scal .NE. one ) THEN
846 DO ll = l1, l2-1
847 CALL csscal( k2-k1, scal, c( k1, ll ), 1)
848 END DO
849 ENDIF
850*
851 scal = ( scamin / swork( k, j ) ) * scaloc
852 IF( scal .NE. one ) THEN
853 DO jj = j1, j2-1
854 CALL csscal( k2-k1, scal, c( k1, jj ), 1 )
855 END DO
856 ENDIF
857*
858* Record current scaling factor
859*
860 swork( k, l ) = scamin * scaloc
861 swork( k, j ) = scamin * scaloc
862*
863 CALL cgemm( 'N', 'C', k2-k1, j2-j1, l2-l1, -csgn,
864 $ c( k1, l1 ), ldc, b( j1, l1 ), ldb,
865 $ cone, c( k1, j1 ), ldc )
866 END DO
867 END DO
868 END DO
869 ELSE IF( notrna .AND. .NOT.notrnb ) THEN
870*
871* Solve A*X + ISGN*X*B**H = scale*C.
872*
873* The (K,L)th block of X is determined starting from
874* bottom-right corner column by column by
875*
876* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**H = C(K,L) - R(K,L)
877*
878* Where
879* M N
880* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**H].
881* I=K+1 J=L+1
882*
883* Start loop over block rows (index = K) and block columns (index = L)
884*
885 DO k = nba, 1, -1
886*
887* K1: row index of the first row in X( K, L )
888* K2: row index of the first row in X( K+1, L )
889* so the K2 - K1 is the column count of the block X( K, L )
890*
891 k1 = (k - 1) * nb + 1
892 k2 = min( k * nb, m ) + 1
893 DO l = nbb, 1, -1
894*
895* L1: column index of the first column in X( K, L )
896* L2: column index of the first column in X( K, L + 1)
897* so that L2 - L1 is the row count of the block X( K, L )
898*
899 l1 = (l - 1) * nb + 1
900 l2 = min( l * nb, n ) + 1
901*
902 CALL ctrsyl( trana, tranb, isgn, k2-k1, l2-l1,
903 $ a( k1, k1 ), lda,
904 $ b( l1, l1 ), ldb,
905 $ c( k1, l1 ), ldc, scaloc, iinfo )
906 info = max( info, iinfo )
907*
908 IF( scaloc * swork( k, l ) .EQ. zero ) THEN
909 IF( scaloc .EQ. zero ) THEN
910* The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
911* is larger than the product of BIGNUM**2 and cannot be
912* represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
913* Mark the computation as pointless.
914 buf = zero
915 ELSE
916* Use second scaling factor to prevent flushing to zero.
917 buf = buf*2.e0**exponent( scaloc )
918 END IF
919 DO jj = 1, nbb
920 DO ll = 1, nba
921* Bound by BIGNUM to not introduce Inf. The value
922* is irrelevant; corresponding entries of the
923* solution will be flushed in consistency scaling.
924 swork( ll, jj ) = min( bignum,
925 $ swork( ll, jj ) / 2.e0**exponent( scaloc ) )
926 END DO
927 END DO
928 END IF
929 swork( k, l ) = scaloc * swork( k, l )
930 xnrm = clange( 'I', k2-k1, l2-l1, c( k1, l1 ), ldc,
931 $ wnrm )
932*
933 DO i = 1, k - 1
934*
935* C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
936*
937 i1 = (i - 1) * nb + 1
938 i2 = min( i * nb, m ) + 1
939*
940* Compute scaling factor to survive the linear update
941* simulating consistent scaling.
942*
943 cnrm = clange( 'I', i2-i1, l2-l1, c( i1, l1 ),
944 $ ldc, wnrm )
945 scamin = min( swork( i, l ), swork( k, l ) )
946 cnrm = cnrm * ( scamin / swork( i, l ) )
947 xnrm = xnrm * ( scamin / swork( k, l ) )
948 anrm = swork( i, awrk + k )
949 scaloc = slarmm( anrm, xnrm, cnrm )
950 IF( scaloc * scamin .EQ. zero ) THEN
951* Use second scaling factor to prevent flushing to zero.
952 buf = buf*2.e0**exponent( scaloc )
953 DO jj = 1, nbb
954 DO ll = 1, nba
955 swork( ll, jj ) = min( bignum,
956 $ swork( ll, jj ) / 2.e0**exponent( scaloc ) )
957 END DO
958 END DO
959 scamin = scamin / 2.e0**exponent( scaloc )
960 scaloc = scaloc / 2.e0**exponent( scaloc )
961 END IF
962 cnrm = cnrm * scaloc
963 xnrm = xnrm * scaloc
964*
965* Simultaneously apply the robust update factor and the
966* consistency scaling factor to C( I, L ) and C( K, L).
967*
968 scal = ( scamin / swork( k, l ) ) * scaloc
969 IF( scal .NE. one ) THEN
970 DO ll = l1, l2-1
971 CALL csscal( k2-k1, scal, c( k1, ll ), 1 )
972 END DO
973 ENDIF
974*
975 scal = ( scamin / swork( i, l ) ) * scaloc
976 IF( scal .NE. one ) THEN
977 DO ll = l1, l2-1
978 CALL csscal( i2-i1, scal, c( i1, ll ), 1 )
979 END DO
980 ENDIF
981*
982* Record current scaling factor
983*
984 swork( k, l ) = scamin * scaloc
985 swork( i, l ) = scamin * scaloc
986*
987 CALL cgemm( 'N', 'N', i2-i1, l2-l1, k2-k1, -cone,
988 $ a( i1, k1 ), lda, c( k1, l1 ), ldc,
989 $ cone, c( i1, l1 ), ldc )
990*
991 END DO
992*
993 DO j = 1, l - 1
994*
995* C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**H
996*
997 j1 = (j - 1) * nb + 1
998 j2 = min( j * nb, n ) + 1
999*
1000* Compute scaling factor to survive the linear update
1001* simulating consistent scaling.
1002*
1003 cnrm = clange( 'I', k2-k1, j2-j1, c( k1, j1 ),
1004 $ ldc, wnrm )
1005 scamin = min( swork( k, j ), swork( k, l ) )
1006 cnrm = cnrm * ( scamin / swork( k, j ) )
1007 xnrm = xnrm * ( scamin / swork( k, l ) )
1008 bnrm = swork( l, bwrk + j )
1009 scaloc = slarmm( bnrm, xnrm, cnrm )
1010 IF( scaloc * scamin .EQ. zero ) THEN
1011* Use second scaling factor to prevent flushing to zero.
1012 buf = buf*2.e0**exponent( scaloc )
1013 DO jj = 1, nbb
1014 DO ll = 1, nba
1015 swork( ll, jj ) = min( bignum,
1016 $ swork( ll, jj ) / 2.e0**exponent( scaloc ) )
1017 END DO
1018 END DO
1019 scamin = scamin / 2.e0**exponent( scaloc )
1020 scaloc = scaloc / 2.e0**exponent( scaloc )
1021 END IF
1022 cnrm = cnrm * scaloc
1023 xnrm = xnrm * scaloc
1024*
1025* Simultaneously apply the robust update factor and the
1026* consistency scaling factor to C( K, J ) and C( K, L).
1027*
1028 scal = ( scamin / swork( k, l ) ) * scaloc
1029 IF( scal .NE. one ) THEN
1030 DO jj = l1, l2-1
1031 CALL csscal( k2-k1, scal, c( k1, jj ), 1 )
1032 END DO
1033 ENDIF
1034*
1035 scal = ( scamin / swork( k, j ) ) * scaloc
1036 IF( scal .NE. one ) THEN
1037 DO jj = j1, j2-1
1038 CALL csscal( k2-k1, scal, c( k1, jj ), 1 )
1039 END DO
1040 ENDIF
1041*
1042* Record current scaling factor
1043*
1044 swork( k, l ) = scamin * scaloc
1045 swork( k, j ) = scamin * scaloc
1046*
1047 CALL cgemm( 'N', 'C', k2-k1, j2-j1, l2-l1, -csgn,
1048 $ c( k1, l1 ), ldc, b( j1, l1 ), ldb,
1049 $ cone, c( k1, j1 ), ldc )
1050 END DO
1051 END DO
1052 END DO
1053*
1054 END IF
1055*
1056* Reduce local scaling factors
1057*
1058 scale = swork( 1, 1 )
1059 DO k = 1, nba
1060 DO l = 1, nbb
1061 scale = min( scale, swork( k, l ) )
1062 END DO
1063 END DO
1064 IF( scale .EQ. zero ) THEN
1065*
1066* The magnitude of the largest entry of the solution is larger
1067* than the product of BIGNUM**2 and cannot be represented in the
1068* form (1/SCALE)*X if SCALE is REAL. Set SCALE to
1069* zero and give up.
1070*
1071 swork(1,1) = max( nba, nbb )
1072 swork(2,1) = 2 * nbb + nba
1073 RETURN
1074 END IF
1075*
1076* Realize consistent scaling
1077*
1078 DO k = 1, nba
1079 k1 = (k - 1) * nb + 1
1080 k2 = min( k * nb, m ) + 1
1081 DO l = 1, nbb
1082 l1 = (l - 1) * nb + 1
1083 l2 = min( l * nb, n ) + 1
1084 scal = scale / swork( k, l )
1085 IF( scal .NE. one ) THEN
1086 DO ll = l1, l2-1
1087 CALL csscal( k2-k1, scal, c( k1, ll ), 1 )
1088 END DO
1089 ENDIF
1090 END DO
1091 END DO
1092*
1093 IF( buf .NE. one .AND. buf.GT.zero ) THEN
1094*
1095* Decrease SCALE as much as possible.
1096*
1097 scaloc = min( scale / smlnum, one / buf )
1098 buf = buf * scaloc
1099 scale = scale / scaloc
1100 END IF
1101*
1102 IF( buf.NE.one .AND. buf.GT.zero ) THEN
1103*
1104* In case of overly aggressive scaling during the computation,
1105* flushing of the global scale factor may be prevented by
1106* undoing some of the scaling. This step is to ensure that
1107* this routine flushes only scale factors that TRSYL also
1108* flushes and be usable as a drop-in replacement.
1109*
1110* How much can the normwise largest entry be upscaled?
1111*
1112 scal = max( abs( real( c( 1, 1 ) ) ),
1113 $ abs( aimag( c( 1, 1 ) ) ) )
1114 DO k = 1, m
1115 DO l = 1, n
1116 scal = max( scal, abs( real( c( k, l ) ) ),
1117 $ abs( aimag( c( k, l ) ) ) )
1118 END DO
1119 END DO
1120*
1121* Increase BUF as close to 1 as possible and apply scaling.
1122*
1123 scaloc = min( bignum / scal, one / buf )
1124 buf = buf * scaloc
1125 CALL clascl( 'G', -1, -1, one, scaloc, m, n, c, ldc, iinfo )
1126 END IF
1127*
1128* Combine with buffer scaling factor. SCALE will be flushed if
1129* BUF is less than one here.
1130*
1131 scale = scale * buf
1132*
1133* Restore workspace dimensions
1134*
1135 swork(1,1) = max( nba, nbb )
1136 swork(2,1) = 2 * nbb + nba
1137*
1138 RETURN
1139*
1140* End of CTRSYL3
1141*
1142 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:188
subroutine clascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
CLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition clascl.f:143
subroutine csscal(n, sa, cx, incx)
CSSCAL
Definition csscal.f:78
subroutine ctrsyl3(trana, tranb, isgn, m, n, a, lda, b, ldb, c, ldc, scale, swork, ldswork, info)
CTRSYL3
Definition ctrsyl3.f:156
subroutine ctrsyl(trana, tranb, isgn, m, n, a, lda, b, ldb, c, ldc, scale, info)
CTRSYL
Definition ctrsyl.f:157