SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pzqrdriver.f
Go to the documentation of this file.
1 PROGRAM pzqrdriver
2*
3* -- ScaLAPACK testing driver (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 28, 2001
7*
8* Purpose
9* =======
10*
11* PZQRDRIVER is the main test program for the COMPLEX*16
12* SCALAPACK QR factorization routines. This test driver performs a QR
13* QL, LQ, RQ, QP (QR factorization with column pivoting) or TZ
14* (complete unitary factorization) factorization and checks the
15* results.
16*
17* The program must be driven by a short data file. An annotated
18* example of a data file can be obtained by deleting the first 3
19* characters from the following 16 lines:
20* 'ScaLAPACK QR factorizations input file'
21* 'PVM machine'
22* 'QR.out' output file name (if any)
23* 6 device out
24* 6 number of factorizations
25* 'QR' 'QL' 'LQ' 'RQ' 'QP' 'TZ' factorization: QR, QL, LQ, RQ, QP, TZ
26* 4 number of problems sizes
27* 55 17 31 201 values of M
28* 5 71 31 201 values of N
29* 3 number of MB's and NB's
30* 4 3 5 values of MB
31* 4 7 3 values of NB
32* 7 number of process grids (ordered P & Q)
33* 1 2 1 4 2 3 8 values of P
34* 7 2 4 1 3 2 1 values of Q
35* 1.0 threshold
36*
37* Internal Parameters
38* ===================
39*
40* TOTMEM INTEGER, default = 2000000
41* TOTMEM is a machine-specific parameter indicating the
42* maximum amount of available memory in bytes.
43* The user should customize TOTMEM to his platform. Remember
44* to leave room in memory for the operating system, the BLACS
45* buffer, etc. For example, on a system with 8 MB of memory
46* per process (e.g., one processor on an Intel iPSC/860), the
47* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
48* code, BLACS buffer, etc). However, for PVM, we usually set
49* TOTMEM = 2000000. Some experimenting with the maximum value
50* of TOTMEM may be required.
51*
52* INTGSZ INTEGER, default = 4 bytes.
53* DBLESZ INTEGER, default = 8 bytes.
54* ZPLXSZ INTEGER, default = 16 bytes.
55* INTGSZ, DBLESZ and ZPLXSZ indicate the length in bytes on
56* the given platform for an integer, a double precision real
57* and a double precision complex.
58* MEM COMPLEX*16 array, dimension ( TOTMEM / ZPLXSZ )
59*
60* All arrays used by SCALAPACK routines are allocated from
61* this array and referenced by pointers. The integer IPA,
62* for example, is a pointer to the starting element of MEM for
63* the matrix A.
64*
65* =====================================================================
66*
67* .. Parameters ..
68 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
69 $ lld_, mb_, m_, nb_, n_, rsrc_
70 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
71 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
72 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
73 INTEGER dblesz, intgsz, memsiz, ntests, totmem, zplxsz
74 COMPLEX*16 padval
75 parameter( dblesz = 8, intgsz = 4, totmem = 2000000,
76 $ zplxsz = 16, memsiz = totmem / zplxsz,
77 $ ntests = 20,
78 $ padval = ( -9923.0d+0, -9923.0d+0 ) )
79* ..
80* .. Local Scalars ..
81 CHARACTER*2 fact
82 CHARACTER*6 passed
83 CHARACTER*7 rout
84 CHARACTER*8 routchk
85 CHARACTER*80 outfile
86 LOGICAL check
87 INTEGER i, iam, iaseed, ictxt, imidpad, info, ipa,
88 $ ipostpad, ippiv, iprepad, iptau, iprw, ipw, j,
89 $ k, kfail, kpass, kskip, ktests, l, lipiv,
90 $ lrwork, ltau, lwork, m, maxmn, mb, minmn, mnp,
91 $ mnq, mp, mycol, myrow, n, nb, nfact, ngrids,
92 $ nmat, nnb, nout, npcol, nprocs, nprow, nq,
93 $ workfct, workrfct, worksiz
94 REAL thresh
95 DOUBLE PRECISION anorm, fresid, nops, tmflops
96* ..
97* .. Arrays ..
98 CHARACTER*2 factor( ntests )
99 INTEGER desca( dlen_ ), ierr( 1 ), mbval( ntests ),
100 $ mval( ntests ), nbval( ntests ),
101 $ nval( ntests ), pval( ntests ), qval( ntests )
102 DOUBLE PRECISION ctime( 1 ), wtime( 1 )
103 COMPLEX*16 mem( memsiz )
104* ..
105* .. External Subroutines ..
106 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
107 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
108 $ blacs_pinfo, descinit, igsum2d, pzchekpad,
115* ..
116* .. External Functions ..
117 LOGICAL lsamen
118 INTEGER iceil, numroc
119 DOUBLE PRECISION pzlange
120 EXTERNAL iceil, lsamen, numroc, pzlange
121* ..
122* .. Intrinsic Functions ..
123 INTRINSIC dble, max, min
124* ..
125* .. Data Statements ..
126 DATA ktests, kpass, kfail, kskip /4*0/
127* ..
128* .. Executable Statements ..
129*
130* Get starting information
131*
132 CALL blacs_pinfo( iam, nprocs )
133 iaseed = 100
134 CALL pzqrinfo( outfile, nout, nfact, factor, ntests, nmat, mval,
135 $ ntests, nval, ntests, nnb, mbval, ntests, nbval,
136 $ ntests, ngrids, pval, ntests, qval, ntests,
137 $ thresh, mem, iam, nprocs )
138 check = ( thresh.GE.0.0e+0 )
139*
140* Loop over the different factorization types
141*
142 DO 40 i = 1, nfact
143*
144 fact = factor( i )
145*
146* Print headings
147*
148 IF( iam.EQ.0 ) THEN
149 WRITE( nout, fmt = * )
150 IF( lsamen( 2, fact, 'QR' ) ) THEN
151 rout = 'PZGEQRF'
152 routchk = 'PZGEQRRV'
153 WRITE( nout, fmt = 9986 )
154 $ 'QR factorization tests.'
155 ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
156 rout = 'PZGEQLF'
157 routchk = 'PZGEQLRV'
158 WRITE( nout, fmt = 9986 )
159 $ 'QL factorization tests.'
160 ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
161 rout = 'PZGELQF'
162 routchk = 'PZGELQRV'
163 WRITE( nout, fmt = 9986 )
164 $ 'LQ factorization tests.'
165 ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
166 rout = 'PZGERQF'
167 routchk = 'PZGERQRV'
168 WRITE( nout, fmt = 9986 )
169 $ 'RQ factorization tests.'
170 ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
171 rout = 'PZGEQPF'
172 routchk = 'PZGEQRRV'
173 WRITE( nout, fmt = 9986 )
174 $ 'QR factorization with column pivoting tests.'
175 ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
176 rout = 'PZTZRZF'
177 routchk = 'PZTZRZRV'
178 WRITE( nout, fmt = 9986 )
179 $ 'Complete unitary factorization tests.'
180 END IF
181 WRITE( nout, fmt = * )
182 WRITE( nout, fmt = 9995 )
183 WRITE( nout, fmt = 9994 )
184 WRITE( nout, fmt = * )
185 END IF
186*
187* Loop over different process grids
188*
189 DO 30 j = 1, ngrids
190*
191 nprow = pval( j )
192 npcol = qval( j )
193*
194* Make sure grid information is correct
195*
196 ierr( 1 ) = 0
197 IF( nprow.LT.1 ) THEN
198 IF( iam.EQ.0 )
199 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
200 ierr( 1 ) = 1
201 ELSE IF( npcol.LT.1 ) THEN
202 IF( iam.EQ.0 )
203 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
204 ierr( 1 ) = 1
205 ELSE IF( nprow*npcol.GT.nprocs ) THEN
206 IF( iam.EQ.0 )
207 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
208 ierr( 1 ) = 1
209 END IF
210*
211 IF( ierr( 1 ).GT.0 ) THEN
212 IF( iam.EQ.0 )
213 $ WRITE( nout, fmt = 9997 ) 'grid'
214 kskip = kskip + 1
215 GO TO 30
216 END IF
217*
218* Define process grid
219*
220 CALL blacs_get( -1, 0, ictxt )
221 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
222 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
223*
224* Go to bottom of loop if this case doesn't use my process
225*
226 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
227 $ GO TO 30
228*
229 DO 20 k = 1, nmat
230*
231 m = mval( k )
232 n = nval( k )
233*
234* Make sure matrix information is correct
235*
236 ierr(1) = 0
237 IF( m.LT.1 ) THEN
238 IF( iam.EQ.0 )
239 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
240 ierr( 1 ) = 1
241 ELSE IF( n.LT.1 ) THEN
242 IF( iam.EQ.0 )
243 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
244 ierr( 1 ) = 1
245 END IF
246*
247* Make sure no one had error
248*
249 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
250*
251 IF( ierr( 1 ).GT.0 ) THEN
252 IF( iam.EQ.0 )
253 $ WRITE( nout, fmt = 9997 ) 'matrix'
254 kskip = kskip + 1
255 GO TO 20
256 END IF
257*
258* Loop over different blocking sizes
259*
260 DO 10 l = 1, nnb
261*
262 mb = mbval( l )
263 nb = nbval( l )
264*
265* Make sure mb is legal
266*
267 ierr( 1 ) = 0
268 IF( mb.LT.1 ) THEN
269 ierr( 1 ) = 1
270 IF( iam.EQ.0 )
271 $ WRITE( nout, fmt = 9999 ) 'MB', 'MB', mb
272 END IF
273*
274* Check all processes for an error
275*
276 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
277 $ 0 )
278*
279 IF( ierr( 1 ).GT.0 ) THEN
280 IF( iam.EQ.0 )
281 $ WRITE( nout, fmt = 9997 ) 'MB'
282 kskip = kskip + 1
283 GO TO 10
284 END IF
285*
286* Make sure nb is legal
287*
288 ierr( 1 ) = 0
289 IF( nb.LT.1 ) THEN
290 ierr( 1 ) = 1
291 IF( iam.EQ.0 )
292 $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
293 END IF
294*
295* Check all processes for an error
296*
297 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
298 $ 0 )
299*
300 IF( ierr( 1 ).GT.0 ) THEN
301 IF( iam.EQ.0 )
302 $ WRITE( nout, fmt = 9997 ) 'NB'
303 kskip = kskip + 1
304 GO TO 10
305 END IF
306*
307* Padding constants
308*
309 mp = numroc( m, mb, myrow, 0, nprow )
310 nq = numroc( n, nb, mycol, 0, npcol )
311 mnp = numroc( min( m, n ), mb, myrow, 0, nprow )
312 mnq = numroc( min( m, n ), nb, mycol, 0, npcol )
313 IF( check ) THEN
314 iprepad = max( mb, mp )
315 imidpad = nb
316 ipostpad = max( nb, nq )
317 ELSE
318 iprepad = 0
319 imidpad = 0
320 ipostpad = 0
321 END IF
322*
323* Initialize the array descriptor for the matrix A
324*
325 CALL descinit( desca, m, n, mb, nb, 0, 0, ictxt,
326 $ max( 1, mp ) + imidpad, ierr( 1 ) )
327*
328* Check all processes for an error
329*
330 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
331 $ 0 )
332*
333 IF( ierr( 1 ).LT.0 ) THEN
334 IF( iam.EQ.0 )
335 $ WRITE( nout, fmt = 9997 ) 'descriptor'
336 kskip = kskip + 1
337 GO TO 10
338 END IF
339*
340* Assign pointers into MEM for ScaLAPACK arrays, A is
341* allocated starting at position MEM( IPREPAD+1 )
342*
343 ipa = iprepad+1
344 iptau = ipa + desca( lld_ ) * nq + ipostpad + iprepad
345*
346 IF( lsamen( 2, fact, 'QR' ) ) THEN
347*
348 ltau = mnq
349 ipw = iptau + ltau + ipostpad + iprepad
350*
351* Figure the amount of workspace required by the QR
352* factorization
353*
354 lwork = desca( nb_ ) * ( mp + nq + desca( nb_ ) )
355 workfct = lwork + ipostpad
356 worksiz = workfct
357*
358 IF( check ) THEN
359*
360* Figure the amount of workspace required by the
361* checking routines PZLAFCHK, PZGEQRRV and
362* PZLANGE
363*
364 worksiz = lwork + mp*desca( nb_ ) + ipostpad
365*
366 END IF
367*
368 ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
369*
370 ltau = nq
371 ipw = iptau + ltau + ipostpad + iprepad
372*
373* Figure the amount of workspace required by the QL
374* factorization
375*
376 lwork = desca( nb_ ) * ( mp + nq + desca( nb_ ) )
377 workfct = lwork + ipostpad
378 worksiz = workfct
379*
380 IF( check ) THEN
381*
382* Figure the amount of workspace required by the
383* checking routines PZLAFCHK, PZGEQLRV and
384* PZLANGE
385*
386 worksiz = lwork + mp*desca( nb_ ) + ipostpad
387*
388 END IF
389*
390 ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
391*
392 ltau = mnp
393 ipw = iptau + ltau + ipostpad + iprepad
394*
395* Figure the amount of workspace required by the LQ
396* factorization
397*
398 lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
399 workfct = lwork + ipostpad
400 worksiz = workfct
401*
402 IF( check ) THEN
403*
404* Figure the amount of workspace required by the
405* checking routines PZLAFCHK, PZGELQRV and
406* PZLANGE
407*
408 worksiz = lwork +
409 $ max( mp*desca( nb_ ), nq*desca( mb_ )
410 $ ) + ipostpad
411*
412 END IF
413*
414 ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
415*
416 ltau = mp
417 ipw = iptau + ltau + ipostpad + iprepad
418*
419* Figure the amount of workspace required by the QR
420* factorization
421*
422 lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
423 workfct = lwork + ipostpad
424 worksiz = workfct
425*
426 IF( check ) THEN
427*
428* Figure the amount of workspace required by the
429* checking routines PZLAFCHK, PZGERQRV and
430* PZLANGE
431*
432 worksiz = lwork +
433 $ max( mp*desca( nb_ ), nq*desca( mb_ )
434 $ ) + ipostpad
435*
436 END IF
437*
438 ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
439*
440 ltau = mnq
441 ippiv = iptau + ltau + ipostpad + iprepad
442 lipiv = iceil( intgsz*nq, zplxsz )
443 ipw = ippiv + lipiv + ipostpad + iprepad
444*
445* Figure the amount of workspace required by the
446* factorization i.e from IPW on.
447*
448 lwork = max( 3, mp + max( 1, nq ) )
449 workfct = lwork + ipostpad
450 lrwork = max( 1, 2 * nq )
451 workrfct = iceil( lrwork*dblesz, zplxsz ) +
452 $ ipostpad
453 iprw = ipw + workfct + iprepad
454 worksiz = workfct + iprepad + workrfct
455*
456 IF( check ) THEN
457*
458* Figure the amount of workspace required by the
459* checking routines PZLAFCHK, PZGEQRRV,
460* PZLANGE.
461*
462 worksiz = max( worksiz - ipostpad,
463 $ desca( nb_ )*( 2*mp + nq + desca( nb_ ) ) ) +
464 $ ipostpad
465 END IF
466*
467 ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
468*
469 ltau = mp
470 ipw = iptau + ltau + ipostpad + iprepad
471*
472* Figure the amount of workspace required by the TZ
473* factorization
474*
475 lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
476 workfct = lwork + ipostpad
477 worksiz = workfct
478*
479 IF( check ) THEN
480*
481* Figure the amount of workspace required by the
482* checking routines PZLAFCHK, PZTZRZRV and
483* PZLANGE
484*
485 worksiz = lwork +
486 $ max( mp*desca( nb_ ), nq*desca( mb_ )
487 $ ) + ipostpad
488*
489 END IF
490*
491 END IF
492*
493* Check for adequate memory for problem size
494*
495 ierr( 1 ) = 0
496 IF( ipw+worksiz.GT.memsiz ) THEN
497 IF( iam.EQ.0 )
498 $ WRITE( nout, fmt = 9996 )
499 $ fact // ' factorization',
500 $ ( ipw+worksiz )*zplxsz
501 ierr( 1 ) = 1
502 END IF
503*
504* Check all processes for an error
505*
506 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
507 $ 0 )
508*
509 IF( ierr( 1 ).GT.0 ) THEN
510 IF( iam.EQ.0 )
511 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
512 kskip = kskip + 1
513 GO TO 10
514 END IF
515*
516* Generate the matrix A
517*
518 CALL pzmatgen( ictxt, 'N', 'N', desca( m_ ),
519 $ desca( n_ ), desca( mb_ ),
520 $ desca( nb_ ), mem( ipa ),
521 $ desca( lld_ ), desca( rsrc_ ),
522 $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
523 $ myrow, mycol, nprow, npcol )
524*
525* Need the Infinity of A for checking
526*
527 IF( check ) THEN
528 CALL pzfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
529 $ desca( lld_ ), iprepad, ipostpad,
530 $ padval )
531 IF( lsamen( 2, fact, 'QP' ) ) THEN
532 CALL pzfillpad( ictxt, lipiv, 1,
533 $ mem( ippiv-iprepad ), lipiv,
534 $ iprepad, ipostpad, padval )
535 END IF
536 CALL pzfillpad( ictxt, ltau, 1,
537 $ mem( iptau-iprepad ), ltau,
538 $ iprepad, ipostpad, padval )
539 CALL pzfillpad( ictxt, worksiz-ipostpad, 1,
540 $ mem( ipw-iprepad ),
541 $ worksiz-ipostpad,
542 $ iprepad, ipostpad, padval )
543 anorm = pzlange( 'I', m, n, mem( ipa ), 1, 1,
544 $ desca, mem( ipw ) )
545 CALL pzchekpad( ictxt, 'PZLANGE', mp, nq,
546 $ mem( ipa-iprepad ), desca( lld_ ),
547 $ iprepad, ipostpad, padval )
548 CALL pzchekpad( ictxt, 'PZLANGE',
549 $ worksiz-ipostpad, 1,
550 $ mem( ipw-iprepad ),
551 $ worksiz-ipostpad, iprepad,
552 $ ipostpad, padval )
553 IF( lsamen( 2, fact, 'QP' ) ) THEN
554 CALL pzfillpad( ictxt, workrfct-ipostpad, 1,
555 $ mem( iprw-iprepad ),
556 $ workrfct-ipostpad,
557 $ iprepad, ipostpad, padval )
558 END IF
559 CALL pzfillpad( ictxt, workfct-ipostpad, 1,
560 $ mem( ipw-iprepad ),
561 $ workfct-ipostpad,
562 $ iprepad, ipostpad, padval )
563 END IF
564*
565 CALL slboot()
566 CALL blacs_barrier( ictxt, 'All' )
567*
568* Perform QR factorizations
569*
570 IF( lsamen( 2, fact, 'QR' ) ) THEN
571 CALL sltimer( 1 )
572 CALL pzgeqrf( m, n, mem( ipa ), 1, 1, desca,
573 $ mem( iptau ), mem( ipw ), lwork,
574 $ info )
575 CALL sltimer( 1 )
576 ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
577 CALL sltimer( 1 )
578 CALL pzgeqlf( m, n, mem( ipa ), 1, 1, desca,
579 $ mem( iptau ), mem( ipw ), lwork,
580 $ info )
581 CALL sltimer( 1 )
582 ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
583 CALL sltimer( 1 )
584 CALL pzgelqf( m, n, mem( ipa ), 1, 1, desca,
585 $ mem( iptau ), mem( ipw ), lwork,
586 $ info )
587 CALL sltimer( 1 )
588 ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
589 CALL sltimer( 1 )
590 CALL pzgerqf( m, n, mem( ipa ), 1, 1, desca,
591 $ mem( iptau ), mem( ipw ), lwork,
592 $ info )
593 CALL sltimer( 1 )
594 ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
595 CALL sltimer( 1 )
596 CALL pzgeqpf( m, n, mem( ipa ), 1, 1, desca,
597 $ mem( ippiv ), mem( iptau ),
598 $ mem( ipw ), lwork, mem( iprw ),
599 $ lrwork, info )
600 CALL sltimer( 1 )
601 ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
602 CALL sltimer( 1 )
603 IF( n.GE.m )
604 $ CALL pztzrzf( m, n, mem( ipa ), 1, 1, desca,
605 $ mem( iptau ), mem( ipw ), lwork,
606 $ info )
607 CALL sltimer( 1 )
608 END IF
609*
610 IF( check ) THEN
611*
612* Check for memory overwrite in factorization
613*
614 CALL pzchekpad( ictxt, rout, mp, nq,
615 $ mem( ipa-iprepad ), desca( lld_ ),
616 $ iprepad, ipostpad, padval )
617 CALL pzchekpad( ictxt, rout, ltau, 1,
618 $ mem( iptau-iprepad ), ltau,
619 $ iprepad, ipostpad, padval )
620 IF( lsamen( 2, fact, 'QP' ) ) THEN
621 CALL pzchekpad( ictxt, rout, lipiv, 1,
622 $ mem( ippiv-iprepad ), lipiv,
623 $ iprepad, ipostpad, padval )
624 CALL pzchekpad( ictxt, rout, workrfct-ipostpad,
625 $ 1, mem( iprw-iprepad ),
626 $ workrfct-ipostpad,
627 $ iprepad, ipostpad, padval )
628 END IF
629 CALL pzchekpad( ictxt, rout, workfct-ipostpad, 1,
630 $ mem( ipw-iprepad ),
631 $ workfct-ipostpad, iprepad,
632 $ ipostpad, padval )
633 CALL pzfillpad( ictxt, worksiz-ipostpad, 1,
634 $ mem( ipw-iprepad ),
635 $ worksiz-ipostpad,
636 $ iprepad, ipostpad, padval )
637*
638 IF( lsamen( 2, fact, 'QR' ) ) THEN
639*
640* Compute residual = ||A-Q*R|| / (||A||*N*eps)
641*
642 CALL pzgeqrrv( m, n, mem( ipa ), 1, 1, desca,
643 $ mem( iptau ), mem( ipw ) )
644 CALL pzlafchk( 'No', 'No', m, n, mem( ipa ), 1,
645 $ 1, desca, iaseed, anorm, fresid,
646 $ mem( ipw ) )
647 ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
648*
649* Compute residual = ||A-Q*L|| / (||A||*N*eps)
650*
651 CALL pzgeqlrv( m, n, mem( ipa ), 1, 1, desca,
652 $ mem( iptau ), mem( ipw ) )
653 CALL pzlafchk( 'No', 'No', m, n, mem( ipa ), 1,
654 $ 1, desca, iaseed, anorm, fresid,
655 $ mem( ipw ) )
656 ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
657*
658* Compute residual = ||A-L*Q|| / (||A||*N*eps)
659*
660 CALL pzgelqrv( m, n, mem( ipa ), 1, 1, desca,
661 $ mem( iptau ), mem( ipw ) )
662 CALL pzlafchk( 'No', 'No', m, n, mem( ipa ), 1,
663 $ 1, desca, iaseed, anorm, fresid,
664 $ mem( ipw ) )
665 ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
666*
667* Compute residual = ||A-R*Q|| / (||A||*N*eps)
668*
669 CALL pzgerqrv( m, n, mem( ipa ), 1, 1, desca,
670 $ mem( iptau ), mem( ipw ) )
671 CALL pzlafchk( 'No', 'No', m, n, mem( ipa ), 1,
672 $ 1, desca, iaseed, anorm, fresid,
673 $ mem( ipw ) )
674 ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
675*
676* Compute residual = ||AP-Q*R|| / (||A||*N*eps)
677*
678 CALL pzgeqrrv( m, n, mem( ipa ), 1, 1, desca,
679 $ mem( iptau ), mem( ipw ) )
680 ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
681*
682* Compute residual = ||A-T*Z|| / (||A||*N*eps)
683*
684 IF( n.GE.m ) THEN
685 CALL pztzrzrv( m, n, mem( ipa ), 1, 1, desca,
686 $ mem( iptau ), mem( ipw ) )
687 END IF
688 CALL pzlafchk( 'No', 'No', m, n, mem( ipa ), 1,
689 $ 1, desca, iaseed, anorm, fresid,
690 $ mem( ipw ) )
691 END IF
692*
693* Check for memory overwrite
694*
695 CALL pzchekpad( ictxt, routchk, mp, nq,
696 $ mem( ipa-iprepad ), desca( lld_ ),
697 $ iprepad, ipostpad, padval )
698 CALL pzchekpad( ictxt, routchk, ltau, 1,
699 $ mem( iptau-iprepad ), ltau,
700 $ iprepad, ipostpad, padval )
701 CALL pzchekpad( ictxt, routchk, worksiz-ipostpad,
702 $ 1, mem( ipw-iprepad ),
703 $ worksiz-ipostpad, iprepad,
704 $ ipostpad, padval )
705*
706 IF( lsamen( 2, fact, 'QP' ) ) THEN
707*
708 CALL pzqppiv( m, n, mem( ipa ), 1, 1, desca,
709 $ mem( ippiv ) )
710*
711* Check for memory overwrite
712*
713 CALL pzchekpad( ictxt, 'PZQPPIV', mp, nq,
714 $ mem( ipa-iprepad ),
715 $ desca( lld_ ),
716 $ iprepad, ipostpad, padval )
717 CALL pzchekpad( ictxt, 'PZQPPIV', lipiv, 1,
718 $ mem( ippiv-iprepad ), lipiv,
719 $ iprepad, ipostpad, padval )
720*
721 CALL pzlafchk( 'No', 'No', m, n, mem( ipa ), 1,
722 $ 1, desca, iaseed, anorm, fresid,
723 $ mem( ipw ) )
724*
725* Check for memory overwrite
726*
727 CALL pzchekpad( ictxt, 'PZLAFCHK', mp, nq,
728 $ mem( ipa-iprepad ),
729 $ desca( lld_ ),
730 $ iprepad, ipostpad, padval )
731 CALL pzchekpad( ictxt, 'PZLAFCHK',
732 $ worksiz-ipostpad, 1,
733 $ mem( ipw-iprepad ),
734 $ worksiz-ipostpad, iprepad,
735 $ ipostpad, padval )
736 END IF
737*
738* Test residual and detect NaN result
739*
740 IF( lsamen( 2, fact, 'TZ' ) .AND. n.LT.m ) THEN
741 kskip = kskip + 1
742 passed = 'BYPASS'
743 ELSE
744 IF( fresid.LE.thresh .AND.
745 $ (fresid-fresid).EQ.0.0d+0 ) THEN
746 kpass = kpass + 1
747 passed = 'PASSED'
748 ELSE
749 kfail = kfail + 1
750 passed = 'FAILED'
751 END IF
752 END IF
753*
754 ELSE
755*
756* Don't perform the checking, only timing
757*
758 kpass = kpass + 1
759 fresid = fresid - fresid
760 passed = 'BYPASS'
761*
762 END IF
763*
764* Gather maximum of all CPU and WALL clock timings
765*
766 CALL slcombine( ictxt, 'All', '>', 'W', 1, 1, wtime )
767 CALL slcombine( ictxt, 'All', '>', 'C', 1, 1, ctime )
768*
769* Print results
770*
771 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
772*
773 minmn = min( m, n )
774 maxmn = max( m, n )
775*
776 IF( lsamen( 2, fact, 'TZ' ) ) THEN
777 IF( m.GE.n ) THEN
778 nops = 0.0d+0
779 ELSE
780*
781* 9 ( M^2 N - M^3 ) + 13 M N - M^2 for
782* complete unitary factorization (M <= N).
783*
784 nops = 9.0d+0 * (
785 $ dble( n )*( dble( m )**2 ) -
786 $ dble( m )**3 ) +
787 $ 13.0d+0*dble( n )*dble( m ) -
788 $ dble( m )**2
789 END IF
790*
791 ELSE
792*
793* 8 M N^2 - 8/3 N^2 + 6 M N + 8 N^2 for QR type
794* factorization when M >= N.
795*
796 nops = 8.0d+0 * ( dble( minmn )**2 ) *
797 $ ( dble( maxmn )-dble( minmn ) / 3.0d+0 ) +
798 $ ( 6.0d+0 * dble( maxmn ) +
799 $ 8.0d+0 * dble( minmn ) ) *
800 $ dble( minmn )
801 END IF
802*
803* Print WALL time
804*
805 IF( wtime( 1 ).GT.0.0d+0 ) THEN
806 tmflops = nops / ( wtime( 1 ) * 1.0d+6 )
807 ELSE
808 tmflops = 0.0d+0
809 END IF
810 IF( wtime( 1 ).GE.0.0d+0 )
811 $ WRITE( nout, fmt = 9993 ) 'WALL', m, n, mb, nb,
812 $ nprow, npcol, wtime( 1 ), tmflops,
813 $ passed, fresid
814*
815* Print CPU time
816*
817 IF( ctime( 1 ).GT.0.0d+0 ) THEN
818 tmflops = nops / ( ctime( 1 ) * 1.0d+6 )
819 ELSE
820 tmflops = 0.0d+0
821 END IF
822 IF( ctime( 1 ).GE.0.0d+0 )
823 $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n, mb, nb,
824 $ nprow, npcol, ctime( 1 ), tmflops,
825 $ passed, fresid
826*
827 END IF
828*
829 10 CONTINUE
830*
831 20 CONTINUE
832*
833 CALL blacs_gridexit( ictxt )
834*
835 30 CONTINUE
836*
837 40 CONTINUE
838*
839* Print out ending messages and close output file
840*
841 IF( iam.EQ.0 ) THEN
842 ktests = kpass + kfail + kskip
843 WRITE( nout, fmt = * )
844 WRITE( nout, fmt = 9992 ) ktests
845 IF( check ) THEN
846 WRITE( nout, fmt = 9991 ) kpass
847 WRITE( nout, fmt = 9989 ) kfail
848 ELSE
849 WRITE( nout, fmt = 9990 ) kpass
850 END IF
851 WRITE( nout, fmt = 9988 ) kskip
852 WRITE( nout, fmt = * )
853 WRITE( nout, fmt = * )
854 WRITE( nout, fmt = 9987 )
855 IF( nout.NE.6 .AND. nout.NE.0 )
856 $ CLOSE ( nout )
857 END IF
858*
859 CALL blacs_exit( 0 )
860*
861 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
862 $ '; It should be at least 1' )
863 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
864 $ i4 )
865 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
866 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
867 $ i11 )
868 9995 FORMAT( 'TIME M N MB NB P Q Fact Time ',
869 $ ' MFLOPS CHECK Residual' )
870 9994 FORMAT( '---- ------ ------ --- --- ----- ----- --------- ',
871 $ '----------- ------ --------' )
872 9993 FORMAT( a4, 1x, i6, 1x, i6, 1x, i3, 1x, i3, 1x, i5, 1x, i5, 1x,
873 $ f9.2, 1x, f11.2, 1x, a6, 2x, g8.1 )
874 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
875 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
876 9990 FORMAT( i5, ' tests completed without checking.' )
877 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
878 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
879 9987 FORMAT( 'END OF TESTS.' )
880 9986 FORMAT( a )
881*
882 stop
883*
884* End of PZQRDRIVER
885*
886 END
887*
888 SUBROUTINE pzqppiv( M, N, A, IA, JA, DESCA, IPIV )
889*
890* -- ScaLAPACK routine (version 1.7) --
891* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
892* and University of California, Berkeley.
893* May 1, 1997
894*
895* .. Scalar Arguments ..
896 INTEGER IA, JA, M, N
897* ..
898* .. Array Arguments ..
899 INTEGER DESCA( * ), IPIV( * )
900 COMPLEX*16 A( * )
901* ..
902*
903* Purpose
904* =======
905*
906* PZQPPIV applies to sub( A ) = A(IA:IA+M-1,JA:JA+N-1) the pivots
907* returned by PZGEQPF in reverse order for checking purposes.
908*
909* Notes
910* =====
911*
912* Each global data object is described by an associated description
913* vector. This vector stores the information required to establish
914* the mapping between an object element and its corresponding process
915* and memory location.
916*
917* Let A be a generic term for any 2D block cyclicly distributed array.
918* Such a global array has an associated description vector DESCA.
919* In the following comments, the character _ should be read as
920* "of the global array".
921*
922* NOTATION STORED IN EXPLANATION
923* --------------- -------------- --------------------------------------
924* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
925* DTYPE_A = 1.
926* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
927* the BLACS process grid A is distribu-
928* ted over. The context itself is glo-
929* bal, but the handle (the integer
930* value) may vary.
931* M_A (global) DESCA( M_ ) The number of rows in the global
932* array A.
933* N_A (global) DESCA( N_ ) The number of columns in the global
934* array A.
935* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
936* the rows of the array.
937* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
938* the columns of the array.
939* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
940* row of the array A is distributed.
941* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
942* first column of the array A is
943* distributed.
944* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
945* array. LLD_A >= MAX(1,LOCr(M_A)).
946*
947* Let K be the number of rows or columns of a distributed matrix,
948* and assume that its process grid has dimension p x q.
949* LOCr( K ) denotes the number of elements of K that a process
950* would receive if K were distributed over the p processes of its
951* process column.
952* Similarly, LOCc( K ) denotes the number of elements of K that a
953* process would receive if K were distributed over the q processes of
954* its process row.
955* The values of LOCr() and LOCc() may be determined via a call to the
956* ScaLAPACK tool function, NUMROC:
957* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
958* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
959* An upper bound for these quantities may be computed by:
960* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
961* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
962*
963* Arguments
964* =========
965*
966* M (global input) INTEGER
967* The number of rows to be operated on, i.e. the number of rows
968* of the distributed submatrix sub( A ). M >= 0.
969*
970* N (global input) INTEGER
971* The number of columns to be operated on, i.e. the number of
972* columns of the distributed submatrix sub( A ). N >= 0.
973*
974* A (local input/local output) COMPLEX*16 pointer into the
975* local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
976* On entry, the local pieces of the M-by-N distributed matrix
977* sub( A ) which is to be permuted. On exit, the local pieces
978* of the distributed permuted submatrix sub( A ) * Inv( P ).
979*
980* IA (global input) INTEGER
981* The row index in the global array A indicating the first
982* row of sub( A ).
983*
984* JA (global input) INTEGER
985* The column index in the global array A indicating the
986* first column of sub( A ).
987*
988* DESCA (global and local input) INTEGER array of dimension DLEN_.
989* The array descriptor for the distributed matrix A.
990*
991* IPIV (local input) INTEGER array, dimension LOCc(JA+N-1).
992* On exit, if IPIV(I) = K, the local i-th column of sub( A )*P
993* was the global K-th column of sub( A ). IPIV is tied to the
994* distributed matrix A.
995*
996* =====================================================================
997*
998* .. Parameters ..
999 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
1000 $ LLD_, MB_, M_, NB_, N_, RSRC_
1001 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
1002 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
1003 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
1004* ..
1005* .. Local Scalars ..
1006 INTEGER IACOL, ICOFFA, ICTXT, IITMP, IPVT, IPCOL,
1007 $ IPROW, ITMP, J, JJ, JJA, KK, MYCOL, MYROW,
1008 $ NPCOL, NPROW, NQ
1009* ..
1010* .. External Subroutines ..
1011 EXTERNAL blacs_gridinfo, igebr2d, igebs2d, igerv2d,
1012 $ igesd2d, igamn2d, infog1l, pzswap
1013* ..
1014* .. External Functions ..
1015 INTEGER INDXL2G, NUMROC
1016 EXTERNAL indxl2g, numroc
1017* ..
1018* .. Intrinsic Functions ..
1019 INTRINSIC min, mod
1020* ..
1021* .. Executable Statements ..
1022*
1023* Get grid parameters
1024*
1025 ictxt = desca( ctxt_ )
1026 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
1027 CALL infog1l( ja, desca( nb_ ), npcol, mycol, desca( csrc_ ), jja,
1028 $ iacol )
1029 icoffa = mod( ja-1, desca( nb_ ) )
1030 nq = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
1031 IF( mycol.EQ.iacol )
1032 $ nq = nq - icoffa
1033*
1034 DO 20 j = ja, ja+n-2
1035*
1036 ipvt = ja+n-1
1037 itmp = ja+n
1038*
1039* Find first the local minimum candidate for pivoting
1040*
1041 CALL infog1l( j, desca( nb_ ), npcol, mycol, desca( csrc_ ),
1042 $ jj, iacol )
1043 DO 10 kk = jj, jja+nq-1
1044 IF( ipiv( kk ).LT.ipvt )THEN
1045 iitmp = kk
1046 ipvt = ipiv( kk )
1047 END IF
1048 10 CONTINUE
1049*
1050* Find the global minimum pivot
1051*
1052 CALL igamn2d( ictxt, 'Rowwise', ' ', 1, 1, ipvt, 1, iprow,
1053 $ ipcol, 1, -1, mycol )
1054*
1055* Broadcast the corresponding index to the other process columns
1056*
1057 IF( mycol.EQ.ipcol ) THEN
1058 itmp = indxl2g( iitmp, desca( nb_ ), mycol, desca( csrc_ ),
1059 $ npcol )
1060 CALL igebs2d( ictxt, 'Rowwise', ' ', 1, 1, itmp, 1 )
1061 IF( ipcol.NE.iacol ) THEN
1062 CALL igerv2d( ictxt, 1, 1, ipiv( iitmp ), 1, myrow,
1063 $ iacol )
1064 ELSE
1065 IF( mycol.EQ.iacol )
1066 $ ipiv( iitmp ) = ipiv( jj )
1067 END IF
1068 ELSE
1069 CALL igebr2d( ictxt, 'Rowwise', ' ', 1, 1, itmp, 1, myrow,
1070 $ ipcol )
1071 IF( mycol.EQ.iacol .AND. ipcol.NE.iacol )
1072 $ CALL igesd2d( ictxt, 1, 1, ipiv( jj ), 1, myrow, ipcol )
1073 END IF
1074*
1075* Swap the columns of A
1076*
1077 CALL pzswap( m, a, ia, itmp, desca, 1, a, ia, j, desca, 1 )
1078*
1079 20 CONTINUE
1080*
1081* End of PZQPPIV
1082*
1083 END
subroutine pzlafchk(aform, diag, m, n, a, ia, ja, desca, iaseed, anorm, fresid, work)
Definition pzlafchk.f:3
subroutine pzmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pzmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
logical function lsamen(n, ca, cb)
Definition pblastst.f:1457
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pzchekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pzchekpad.f:3
subroutine pzfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition pzfillpad.f:2
subroutine pzgelqf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pzgelqf.f:3
subroutine pzgelqrv(m, n, a, ia, ja, desca, tau, work)
Definition pzgelqrv.f:2
subroutine pzgeqlf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pzgeqlf.f:3
subroutine pzgeqlrv(m, n, a, ia, ja, desca, tau, work)
Definition pzgeqlrv.f:2
subroutine pzgeqpf(m, n, a, ia, ja, desca, ipiv, tau, work, lwork, rwork, lrwork, info)
Definition pzgeqpf.f:3
subroutine pzgeqrf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pzgeqrf.f:3
subroutine pzgeqrrv(m, n, a, ia, ja, desca, tau, work)
Definition pzgeqrrv.f:2
subroutine pzgerqf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pzgerqf.f:3
subroutine pzgerqrv(m, n, a, ia, ja, desca, tau, work)
Definition pzgerqrv.f:2
double precision function pzlange(norm, m, n, a, ia, ja, desca, work)
Definition pzlange.f:3
subroutine pzqppiv(m, n, a, ia, ja, desca, ipiv)
Definition pzqrdriver.f:889
program pzqrdriver
Definition pzqrdriver.f:1
subroutine pzqrinfo(summry, nout, nfact, factor, ldfact, nmat, mval, ldmval, nval, ldnval, nnb, mbval, ldmbval, nbval, ldnbval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition pzqrinfo.f:6
subroutine pztzrzf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pztzrzf.f:3
subroutine pztzrzrv(m, n, a, ia, ja, desca, tau, work)
Definition pztzrzrv.f:2
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267