SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
psqrdriver.f
Go to the documentation of this file.
1 PROGRAM psqrdriver
2*
3* -- ScaLAPACK testing driver (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 28, 2001
7*
8* Purpose
9* =======
10*
11* PSQRDRIVER is the main test program for the REAL
12* SCALAPACK QR factorization routines. This test driver performs a QR
13* QL, LQ, RQ, QP (QR factorization with column pivoting) or TZ
14* (complete orthogonal factorization) factorization and checks the
15* results.
16*
17* The program must be driven by a short data file. An annotated
18* example of a data file can be obtained by deleting the first 3
19* characters from the following 16 lines:
20* 'ScaLAPACK QR factorizations input file'
21* 'PVM machine'
22* 'QR.out' output file name (if any)
23* 6 device out
24* 6 number of factorizations
25* 'QR' 'QL' 'LQ' 'RQ' 'QP' 'TZ' factorization: QR, QL, LQ, RQ, QP, TZ
26* 4 number of problems sizes
27* 55 17 31 201 values of M
28* 5 71 31 201 values of N
29* 3 number of MB's and NB's
30* 4 3 5 values of MB
31* 4 7 3 values of NB
32* 7 number of process grids (ordered P & Q)
33* 1 2 1 4 2 3 8 values of P
34* 7 2 4 1 3 2 1 values of Q
35* 1.0 threshold
36*
37* Internal Parameters
38* ===================
39*
40* TOTMEM INTEGER, default = 2000000
41* TOTMEM is a machine-specific parameter indicating the
42* maximum amount of available memory in bytes.
43* The user should customize TOTMEM to his platform. Remember
44* to leave room in memory for the operating system, the BLACS
45* buffer, etc. For example, on a system with 8 MB of memory
46* per process (e.g., one processor on an Intel iPSC/860), the
47* parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
48* code, BLACS buffer, etc). However, for PVM, we usually set
49* TOTMEM = 2000000. Some experimenting with the maximum value
50* of TOTMEM may be required.
51*
52* INTGSZ INTEGER, default = 4 bytes.
53* REALSZ INTEGER, default = 4 bytes.
54* INTGSZ and REALSZ indicate the length in bytes on the
55* given platform for an integer and a single precision real.
56* MEM REAL array, dimension ( TOTMEM / REALSZ )
57*
58* All arrays used by SCALAPACK routines are allocated from
59* this array and referenced by pointers. The integer IPA,
60* for example, is a pointer to the starting element of MEM for
61* the matrix A.
62*
63* =====================================================================
64*
65* .. Parameters ..
66 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
67 $ lld_, mb_, m_, nb_, n_, rsrc_
68 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
69 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
70 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
71 INTEGER intgsz, memsiz, ntests, realsz, totmem
72 REAL padval
73 parameter( intgsz = 4, realsz = 4, totmem = 2000000,
74 $ memsiz = totmem / realsz, ntests = 20,
75 $ padval = -9923.0e+0 )
76* ..
77* .. Local Scalars ..
78 CHARACTER*2 fact
79 CHARACTER*6 passed
80 CHARACTER*7 rout
81 CHARACTER*8 routchk
82 CHARACTER*80 outfile
83 LOGICAL check
84 INTEGER i, iam, iaseed, ictxt, imidpad, info, ipa,
85 $ ipostpad, ippiv, iprepad, iptau, ipw, j, k,
86 $ kfail, kpass, kskip, ktests, l, lipiv, ltau,
87 $ lwork, m, maxmn, mb, minmn, mnp, mnq, mp,
88 $ mycol, myrow, n, nb, nfact, ngrids, nmat, nnb,
89 $ nout, npcol, nprocs, nprow, nq, workfct,
90 $ worksiz
91 REAL anorm, fresid, thresh
92 DOUBLE PRECISION nops, tmflops
93* ..
94* .. Arrays ..
95 CHARACTER*2 factor( ntests )
96 INTEGER desca( dlen_ ), ierr( 1 ), mbval( ntests ),
97 $ mval( ntests ), nbval( ntests ),
98 $ nval( ntests ), pval( ntests ), qval( ntests )
99 REAL mem( memsiz )
100 DOUBLE PRECISION ctime( 1 ), wtime( 1 )
101* ..
102* .. External Subroutines ..
103 EXTERNAL blacs_barrier, blacs_exit, blacs_get,
104 $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
105 $ blacs_pinfo, descinit, igsum2d, pschekpad,
112* ..
113* .. External Functions ..
114 LOGICAL lsamen
115 INTEGER iceil, numroc
116 REAL pslange
117 EXTERNAL iceil, lsamen, numroc, pslange
118* ..
119* .. Intrinsic Functions ..
120 INTRINSIC dble, max, min
121* ..
122* .. Data Statements ..
123 DATA ktests, kpass, kfail, kskip /4*0/
124* ..
125* .. Executable Statements ..
126*
127* Get starting information
128*
129 CALL blacs_pinfo( iam, nprocs )
130 iaseed = 100
131 CALL psqrinfo( outfile, nout, nfact, factor, ntests, nmat, mval,
132 $ ntests, nval, ntests, nnb, mbval, ntests, nbval,
133 $ ntests, ngrids, pval, ntests, qval, ntests,
134 $ thresh, mem, iam, nprocs )
135 check = ( thresh.GE.0.0e+0 )
136*
137* Loop over the different factorization types
138*
139 DO 40 i = 1, nfact
140*
141 fact = factor( i )
142*
143* Print headings
144*
145 IF( iam.EQ.0 ) THEN
146 WRITE( nout, fmt = * )
147 IF( lsamen( 2, fact, 'QR' ) ) THEN
148 rout = 'PSGEQRF'
149 routchk = 'PSGEQRRV'
150 WRITE( nout, fmt = 9986 )
151 $ 'QR factorization tests.'
152 ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
153 rout = 'PSGEQLF'
154 routchk = 'PSGEQLRV'
155 WRITE( nout, fmt = 9986 )
156 $ 'QL factorization tests.'
157 ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
158 rout = 'PSGELQF'
159 routchk = 'PSGELQRV'
160 WRITE( nout, fmt = 9986 )
161 $ 'LQ factorization tests.'
162 ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
163 rout = 'PSGERQF'
164 routchk = 'PSGERQRV'
165 WRITE( nout, fmt = 9986 )
166 $ 'RQ factorization tests.'
167 ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
168 rout = 'PSGEQPF'
169 routchk = 'PSGEQRRV'
170 WRITE( nout, fmt = 9986 )
171 $ 'QR factorization with column pivoting tests.'
172 ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
173 rout = 'PSTZRZF'
174 routchk = 'PSTZRZRV'
175 WRITE( nout, fmt = 9986 )
176 $ 'Complete orthogonal factorization tests.'
177 END IF
178 WRITE( nout, fmt = * )
179 WRITE( nout, fmt = 9995 )
180 WRITE( nout, fmt = 9994 )
181 WRITE( nout, fmt = * )
182 END IF
183*
184* Loop over different process grids
185*
186 DO 30 j = 1, ngrids
187*
188 nprow = pval( j )
189 npcol = qval( j )
190*
191* Make sure grid information is correct
192*
193 ierr( 1 ) = 0
194 IF( nprow.LT.1 ) THEN
195 IF( iam.EQ.0 )
196 $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
197 ierr( 1 ) = 1
198 ELSE IF( npcol.LT.1 ) THEN
199 IF( iam.EQ.0 )
200 $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
201 ierr( 1 ) = 1
202 ELSE IF( nprow*npcol.GT.nprocs ) THEN
203 IF( iam.EQ.0 )
204 $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
205 ierr( 1 ) = 1
206 END IF
207*
208 IF( ierr( 1 ).GT.0 ) THEN
209 IF( iam.EQ.0 )
210 $ WRITE( nout, fmt = 9997 ) 'grid'
211 kskip = kskip + 1
212 GO TO 30
213 END IF
214*
215* Define process grid
216*
217 CALL blacs_get( -1, 0, ictxt )
218 CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
219 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
220*
221* Go to bottom of loop if this case doesn't use my process
222*
223 IF( myrow.GE.nprow .OR. mycol.GE.npcol )
224 $ GO TO 30
225*
226 DO 20 k = 1, nmat
227*
228 m = mval( k )
229 n = nval( k )
230*
231* Make sure matrix information is correct
232*
233 ierr(1) = 0
234 IF( m.LT.1 ) THEN
235 IF( iam.EQ.0 )
236 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
237 ierr( 1 ) = 1
238 ELSE IF( n.LT.1 ) THEN
239 IF( iam.EQ.0 )
240 $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
241 ierr( 1 ) = 1
242 END IF
243*
244* Make sure no one had error
245*
246 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
247*
248 IF( ierr( 1 ).GT.0 ) THEN
249 IF( iam.EQ.0 )
250 $ WRITE( nout, fmt = 9997 ) 'matrix'
251 kskip = kskip + 1
252 GO TO 20
253 END IF
254*
255* Loop over different blocking sizes
256*
257 DO 10 l = 1, nnb
258*
259 mb = mbval( l )
260 nb = nbval( l )
261*
262* Make sure mb is legal
263*
264 ierr( 1 ) = 0
265 IF( mb.LT.1 ) THEN
266 ierr( 1 ) = 1
267 IF( iam.EQ.0 )
268 $ WRITE( nout, fmt = 9999 ) 'MB', 'MB', mb
269 END IF
270*
271* Check all processes for an error
272*
273 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
274 $ 0 )
275*
276 IF( ierr( 1 ).GT.0 ) THEN
277 IF( iam.EQ.0 )
278 $ WRITE( nout, fmt = 9997 ) 'MB'
279 kskip = kskip + 1
280 GO TO 10
281 END IF
282*
283* Make sure nb is legal
284*
285 ierr( 1 ) = 0
286 IF( nb.LT.1 ) THEN
287 ierr( 1 ) = 1
288 IF( iam.EQ.0 )
289 $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
290 END IF
291*
292* Check all processes for an error
293*
294 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
295 $ 0 )
296*
297 IF( ierr( 1 ).GT.0 ) THEN
298 IF( iam.EQ.0 )
299 $ WRITE( nout, fmt = 9997 ) 'NB'
300 kskip = kskip + 1
301 GO TO 10
302 END IF
303*
304* Padding constants
305*
306 mp = numroc( m, mb, myrow, 0, nprow )
307 nq = numroc( n, nb, mycol, 0, npcol )
308 mnp = numroc( min( m, n ), mb, myrow, 0, nprow )
309 mnq = numroc( min( m, n ), nb, mycol, 0, npcol )
310 IF( check ) THEN
311 iprepad = max( mb, mp )
312 imidpad = nb
313 ipostpad = max( nb, nq )
314 ELSE
315 iprepad = 0
316 imidpad = 0
317 ipostpad = 0
318 END IF
319*
320* Initialize the array descriptor for the matrix A
321*
322 CALL descinit( desca, m, n, mb, nb, 0, 0, ictxt,
323 $ max( 1, mp ) + imidpad, ierr( 1 ) )
324*
325* Check all processes for an error
326*
327 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
328 $ 0 )
329*
330 IF( ierr( 1 ).LT.0 ) THEN
331 IF( iam.EQ.0 )
332 $ WRITE( nout, fmt = 9997 ) 'descriptor'
333 kskip = kskip + 1
334 GO TO 10
335 END IF
336*
337* Assign pointers into MEM for ScaLAPACK arrays, A is
338* allocated starting at position MEM( IPREPAD+1 )
339*
340 ipa = iprepad+1
341 iptau = ipa + desca( lld_ ) * nq + ipostpad + iprepad
342*
343 IF( lsamen( 2, fact, 'QR' ) ) THEN
344*
345 ltau = mnq
346 ipw = iptau + ltau + ipostpad + iprepad
347*
348* Figure the amount of workspace required by the QR
349* factorization
350*
351 lwork = desca( nb_ ) * ( mp + nq + desca( nb_ ) )
352 workfct = lwork + ipostpad
353 worksiz = workfct
354*
355 IF( check ) THEN
356*
357* Figure the amount of workspace required by the
358* checking routines PSLAFCHK, PSGEQRRV and
359* PSLANGE
360*
361 worksiz = lwork + mp*desca( nb_ ) + ipostpad
362*
363 END IF
364*
365 ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
366*
367 ltau = nq
368 ipw = iptau + ltau + ipostpad + iprepad
369*
370* Figure the amount of workspace required by the QL
371* factorization
372*
373 lwork = desca( nb_ ) * ( mp + nq + desca( nb_ ) )
374 workfct = lwork + ipostpad
375 worksiz = workfct
376*
377 IF( check ) THEN
378*
379* Figure the amount of workspace required by the
380* checking routines PSLAFCHK, PSGEQLRV and
381* PSLANGE
382*
383 worksiz = lwork + mp*desca( nb_ ) + ipostpad
384*
385 END IF
386*
387 ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
388*
389 ltau = mnp
390 ipw = iptau + ltau + ipostpad + iprepad
391*
392* Figure the amount of workspace required by the LQ
393* factorization
394*
395 lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
396 workfct = lwork + ipostpad
397 worksiz = workfct
398*
399 IF( check ) THEN
400*
401* Figure the amount of workspace required by the
402* checking routines PSLAFCHK, PSGELQRV and
403* PSLANGE
404*
405 worksiz = lwork +
406 $ max( mp*desca( nb_ ), nq*desca( mb_ )
407 $ ) + ipostpad
408*
409 END IF
410*
411 ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
412*
413 ltau = mp
414 ipw = iptau + ltau + ipostpad + iprepad
415*
416* Figure the amount of workspace required by the QR
417* factorization
418*
419 lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
420 workfct = lwork + ipostpad
421 worksiz = workfct
422*
423 IF( check ) THEN
424*
425* Figure the amount of workspace required by the
426* checking routines PSLAFCHK, PSGERQRV and
427* PSLANGE
428*
429 worksiz = lwork +
430 $ max( mp*desca( nb_ ), nq*desca( mb_ )
431 $ ) + ipostpad
432*
433 END IF
434*
435 ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
436*
437 ltau = mnq
438 ippiv = iptau + ltau + ipostpad + iprepad
439 lipiv = iceil( intgsz*nq, realsz )
440 ipw = ippiv + lipiv + ipostpad + iprepad
441*
442* Figure the amount of workspace required by the
443* factorization i.e from IPW on.
444*
445 lwork = max( 3, mp + max( 1, nq ) ) + 2 * nq
446 workfct = lwork + ipostpad
447 worksiz = workfct
448*
449 IF( check ) THEN
450*
451* Figure the amount of workspace required by the
452* checking routines PSLAFCHK, PSGEQRRV,
453* PSLANGE.
454*
455 worksiz = max( worksiz - ipostpad,
456 $ desca( nb_ )*( 2*mp + nq + desca( nb_ ) ) ) +
457 $ ipostpad
458 END IF
459*
460 ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
461*
462 ltau = mp
463 ipw = iptau + ltau + ipostpad + iprepad
464*
465* Figure the amount of workspace required by the TZ
466* factorization
467*
468 lwork = desca( mb_ ) * ( mp + nq + desca( mb_ ) )
469 workfct = lwork + ipostpad
470 worksiz = workfct
471*
472 IF( check ) THEN
473*
474* Figure the amount of workspace required by the
475* checking routines PSLAFCHK, PSTZRZRV and
476* PSLANGE
477*
478 worksiz = lwork +
479 $ max( mp*desca( nb_ ), nq*desca( mb_ )
480 $ ) + ipostpad
481*
482 END IF
483*
484 END IF
485*
486* Check for adequate memory for problem size
487*
488 ierr( 1 ) = 0
489 IF( ipw+worksiz.GT.memsiz ) THEN
490 IF( iam.EQ.0 )
491 $ WRITE( nout, fmt = 9996 )
492 $ fact // ' factorization',
493 $ ( ipw+worksiz )*realsz
494 ierr( 1 ) = 1
495 END IF
496*
497* Check all processes for an error
498*
499 CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
500 $ 0 )
501*
502 IF( ierr( 1 ).GT.0 ) THEN
503 IF( iam.EQ.0 )
504 $ WRITE( nout, fmt = 9997 ) 'MEMORY'
505 kskip = kskip + 1
506 GO TO 10
507 END IF
508*
509* Generate the matrix A
510*
511 CALL psmatgen( ictxt, 'N', 'N', desca( m_ ),
512 $ desca( n_ ), desca( mb_ ),
513 $ desca( nb_ ), mem( ipa ),
514 $ desca( lld_ ), desca( rsrc_ ),
515 $ desca( csrc_ ), iaseed, 0, mp, 0, nq,
516 $ myrow, mycol, nprow, npcol )
517*
518* Need the Infinity of A for checking
519*
520 IF( check ) THEN
521 CALL psfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
522 $ desca( lld_ ), iprepad, ipostpad,
523 $ padval )
524 IF( lsamen( 2, fact, 'QP' ) ) THEN
525 CALL psfillpad( ictxt, lipiv, 1,
526 $ mem( ippiv-iprepad ), lipiv,
527 $ iprepad, ipostpad, padval )
528 END IF
529 CALL psfillpad( ictxt, ltau, 1,
530 $ mem( iptau-iprepad ), ltau,
531 $ iprepad, ipostpad, padval )
532 CALL psfillpad( ictxt, worksiz-ipostpad, 1,
533 $ mem( ipw-iprepad ),
534 $ worksiz-ipostpad,
535 $ iprepad, ipostpad, padval )
536 anorm = pslange( 'I', m, n, mem( ipa ), 1, 1,
537 $ desca, mem( ipw ) )
538 CALL pschekpad( ictxt, 'PSLANGE', mp, nq,
539 $ mem( ipa-iprepad ), desca( lld_ ),
540 $ iprepad, ipostpad, padval )
541 CALL pschekpad( ictxt, 'PSLANGE',
542 $ worksiz-ipostpad, 1,
543 $ mem( ipw-iprepad ),
544 $ worksiz-ipostpad, iprepad,
545 $ ipostpad, padval )
546 CALL psfillpad( ictxt, workfct-ipostpad, 1,
547 $ mem( ipw-iprepad ),
548 $ workfct-ipostpad,
549 $ iprepad, ipostpad, padval )
550 END IF
551*
552 CALL slboot()
553 CALL blacs_barrier( ictxt, 'All' )
554*
555* Perform QR factorizations
556*
557 IF( lsamen( 2, fact, 'QR' ) ) THEN
558 CALL sltimer( 1 )
559 CALL psgeqrf( m, n, mem( ipa ), 1, 1, desca,
560 $ mem( iptau ), mem( ipw ), lwork,
561 $ info )
562 CALL sltimer( 1 )
563 ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
564 CALL sltimer( 1 )
565 CALL psgeqlf( m, n, mem( ipa ), 1, 1, desca,
566 $ mem( iptau ), mem( ipw ), lwork,
567 $ info )
568 CALL sltimer( 1 )
569 ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
570 CALL sltimer( 1 )
571 CALL psgelqf( m, n, mem( ipa ), 1, 1, desca,
572 $ mem( iptau ), mem( ipw ), lwork,
573 $ info )
574 CALL sltimer( 1 )
575 ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
576 CALL sltimer( 1 )
577 CALL psgerqf( m, n, mem( ipa ), 1, 1, desca,
578 $ mem( iptau ), mem( ipw ), lwork,
579 $ info )
580 CALL sltimer( 1 )
581 ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
582 CALL sltimer( 1 )
583 CALL psgeqpf( m, n, mem( ipa ), 1, 1, desca,
584 $ mem( ippiv ), mem( iptau ),
585 $ mem( ipw ), lwork, info )
586 CALL sltimer( 1 )
587 ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
588 CALL sltimer( 1 )
589 IF( n.GE.m )
590 $ CALL pstzrzf( m, n, mem( ipa ), 1, 1, desca,
591 $ mem( iptau ), mem( ipw ), lwork,
592 $ info )
593 CALL sltimer( 1 )
594 END IF
595*
596 IF( check ) THEN
597*
598* Check for memory overwrite in factorization
599*
600 CALL pschekpad( ictxt, rout, mp, nq,
601 $ mem( ipa-iprepad ), desca( lld_ ),
602 $ iprepad, ipostpad, padval )
603 CALL pschekpad( ictxt, rout, ltau, 1,
604 $ mem( iptau-iprepad ), ltau,
605 $ iprepad, ipostpad, padval )
606 IF( lsamen( 2, fact, 'QP' ) ) THEN
607 CALL pschekpad( ictxt, rout, lipiv, 1,
608 $ mem( ippiv-iprepad ), lipiv,
609 $ iprepad, ipostpad, padval )
610 END IF
611 CALL pschekpad( ictxt, rout, workfct-ipostpad, 1,
612 $ mem( ipw-iprepad ),
613 $ workfct-ipostpad, iprepad,
614 $ ipostpad, padval )
615 CALL psfillpad( ictxt, worksiz-ipostpad, 1,
616 $ mem( ipw-iprepad ),
617 $ worksiz-ipostpad,
618 $ iprepad, ipostpad, padval )
619*
620 IF( lsamen( 2, fact, 'QR' ) ) THEN
621*
622* Compute residual = ||A-Q*R|| / (||A||*N*eps)
623*
624 CALL psgeqrrv( m, n, mem( ipa ), 1, 1, desca,
625 $ mem( iptau ), mem( ipw ) )
626 CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1,
627 $ 1, desca, iaseed, anorm, fresid,
628 $ mem( ipw ) )
629 ELSE IF( lsamen( 2, fact, 'QL' ) ) THEN
630*
631* Compute residual = ||A-Q*L|| / (||A||*N*eps)
632*
633 CALL psgeqlrv( m, n, mem( ipa ), 1, 1, desca,
634 $ mem( iptau ), mem( ipw ) )
635 CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1,
636 $ 1, desca, iaseed, anorm, fresid,
637 $ mem( ipw ) )
638 ELSE IF( lsamen( 2, fact, 'LQ' ) ) THEN
639*
640* Compute residual = ||A-L*Q|| / (||A||*N*eps)
641*
642 CALL psgelqrv( m, n, mem( ipa ), 1, 1, desca,
643 $ mem( iptau ), mem( ipw ) )
644 CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1,
645 $ 1, desca, iaseed, anorm, fresid,
646 $ mem( ipw ) )
647 ELSE IF( lsamen( 2, fact, 'RQ' ) ) THEN
648*
649* Compute residual = ||A-R*Q|| / (||A||*N*eps)
650*
651 CALL psgerqrv( m, n, mem( ipa ), 1, 1, desca,
652 $ mem( iptau ), mem( ipw ) )
653 CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1,
654 $ 1, desca, iaseed, anorm, fresid,
655 $ mem( ipw ) )
656 ELSE IF( lsamen( 2, fact, 'QP' ) ) THEN
657*
658* Compute residual = ||AP-Q*R|| / (||A||*N*eps)
659*
660 CALL psgeqrrv( m, n, mem( ipa ), 1, 1, desca,
661 $ mem( iptau ), mem( ipw ) )
662 ELSE IF( lsamen( 2, fact, 'TZ' ) ) THEN
663*
664* Compute residual = ||A-T*Z|| / (||A||*N*eps)
665*
666 IF( n.GE.m ) THEN
667 CALL pstzrzrv( m, n, mem( ipa ), 1, 1, desca,
668 $ mem( iptau ), mem( ipw ) )
669 END IF
670 CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1,
671 $ 1, desca, iaseed, anorm, fresid,
672 $ mem( ipw ) )
673 END IF
674*
675* Check for memory overwrite
676*
677 CALL pschekpad( ictxt, routchk, mp, nq,
678 $ mem( ipa-iprepad ), desca( lld_ ),
679 $ iprepad, ipostpad, padval )
680 CALL pschekpad( ictxt, routchk, ltau, 1,
681 $ mem( iptau-iprepad ), ltau,
682 $ iprepad, ipostpad, padval )
683 CALL pschekpad( ictxt, routchk, worksiz-ipostpad,
684 $ 1, mem( ipw-iprepad ),
685 $ worksiz-ipostpad, iprepad,
686 $ ipostpad, padval )
687*
688 IF( lsamen( 2, fact, 'QP' ) ) THEN
689*
690 CALL psqppiv( m, n, mem( ipa ), 1, 1, desca,
691 $ mem( ippiv ) )
692*
693* Check for memory overwrite
694*
695 CALL pschekpad( ictxt, 'PSQPPIV', mp, nq,
696 $ mem( ipa-iprepad ),
697 $ desca( lld_ ),
698 $ iprepad, ipostpad, padval )
699 CALL pschekpad( ictxt, 'PSQPPIV', lipiv, 1,
700 $ mem( ippiv-iprepad ), lipiv,
701 $ iprepad, ipostpad, padval )
702*
703 CALL pslafchk( 'No', 'No', m, n, mem( ipa ), 1,
704 $ 1, desca, iaseed, anorm, fresid,
705 $ mem( ipw ) )
706*
707* Check for memory overwrite
708*
709 CALL pschekpad( ictxt, 'PSLAFCHK', mp, nq,
710 $ mem( ipa-iprepad ),
711 $ desca( lld_ ),
712 $ iprepad, ipostpad, padval )
713 CALL pschekpad( ictxt, 'PSLAFCHK',
714 $ worksiz-ipostpad, 1,
715 $ mem( ipw-iprepad ),
716 $ worksiz-ipostpad, iprepad,
717 $ ipostpad, padval )
718 END IF
719*
720* Test residual and detect NaN result
721*
722 IF( lsamen( 2, fact, 'TZ' ) .AND. n.LT.m ) THEN
723 kskip = kskip + 1
724 passed = 'BYPASS'
725 ELSE
726 IF( fresid.LE.thresh .AND.
727 $ (fresid-fresid).EQ.0.0e+0 ) THEN
728 kpass = kpass + 1
729 passed = 'PASSED'
730 ELSE
731 kfail = kfail + 1
732 passed = 'FAILED'
733 END IF
734 END IF
735*
736 ELSE
737*
738* Don't perform the checking, only timing
739*
740 kpass = kpass + 1
741 fresid = fresid - fresid
742 passed = 'BYPASS'
743*
744 END IF
745*
746* Gather maximum of all CPU and WALL clock timings
747*
748 CALL slcombine( ictxt, 'All', '>', 'W', 1, 1, wtime )
749 CALL slcombine( ictxt, 'All', '>', 'C', 1, 1, ctime )
750*
751* Print results
752*
753 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
754*
755 minmn = min( m, n )
756 maxmn = max( m, n )
757*
758 IF( lsamen( 2, fact, 'TZ' ) ) THEN
759 IF( m.GE.n ) THEN
760 nops = 0.0d+0
761 ELSE
762*
763* 5/2 ( M^2 N - M^3 ) + 5/2 N M + 1/2 M^2 for
764* complete orthogonal factorization (M <= N).
765*
766 nops = ( 5.0d+0 * (
767 $ dble( n )*( dble( m )**2 ) -
768 $ dble( m )**3 +
769 $ dble( n )*dble( m ) ) +
770 $ dble( m )**2 ) / 2.0d+0
771 END IF
772*
773 ELSE
774*
775* 2 M N^2 - 2/3 N^2 + M N + N^2 for QR type
776* factorization when M >= N.
777*
778 nops = 2.0d+0 * ( dble( minmn )**2 ) *
779 $ ( dble( maxmn )-dble( minmn ) / 3.0d+0 ) +
780 $ ( dble( maxmn )+dble( minmn ) )*dble( minmn )
781 END IF
782*
783* Print WALL time
784*
785 IF( wtime( 1 ).GT.0.0d+0 ) THEN
786 tmflops = nops / ( wtime( 1 ) * 1.0d+6 )
787 ELSE
788 tmflops = 0.0d+0
789 END IF
790 IF( wtime( 1 ).GE.0.0d+0 )
791 $ WRITE( nout, fmt = 9993 ) 'WALL', m, n, mb, nb,
792 $ nprow, npcol, wtime( 1 ), tmflops,
793 $ passed, fresid
794*
795* Print CPU time
796*
797 IF( ctime( 1 ).GT.0.0d+0 ) THEN
798 tmflops = nops / ( ctime( 1 ) * 1.0d+6 )
799 ELSE
800 tmflops = 0.0d+0
801 END IF
802 IF( ctime( 1 ).GE.0.0d+0 )
803 $ WRITE( nout, fmt = 9993 ) 'CPU ', m, n, mb, nb,
804 $ nprow, npcol, ctime( 1 ), tmflops,
805 $ passed, fresid
806*
807 END IF
808*
809 10 CONTINUE
810*
811 20 CONTINUE
812*
813 CALL blacs_gridexit( ictxt )
814*
815 30 CONTINUE
816*
817 40 CONTINUE
818*
819* Print out ending messages and close output file
820*
821 IF( iam.EQ.0 ) THEN
822 ktests = kpass + kfail + kskip
823 WRITE( nout, fmt = * )
824 WRITE( nout, fmt = 9992 ) ktests
825 IF( check ) THEN
826 WRITE( nout, fmt = 9991 ) kpass
827 WRITE( nout, fmt = 9989 ) kfail
828 ELSE
829 WRITE( nout, fmt = 9990 ) kpass
830 END IF
831 WRITE( nout, fmt = 9988 ) kskip
832 WRITE( nout, fmt = * )
833 WRITE( nout, fmt = * )
834 WRITE( nout, fmt = 9987 )
835 IF( nout.NE.6 .AND. nout.NE.0 )
836 $ CLOSE ( nout )
837 END IF
838*
839 CALL blacs_exit( 0 )
840*
841 9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
842 $ '; It should be at least 1' )
843 9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
844 $ i4 )
845 9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
846 9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
847 $ i11 )
848 9995 FORMAT( 'TIME M N MB NB P Q Fact Time ',
849 $ ' MFLOPS CHECK Residual' )
850 9994 FORMAT( '---- ------ ------ --- --- ----- ----- --------- ',
851 $ '----------- ------ --------' )
852 9993 FORMAT( a4, 1x, i6, 1x, i6, 1x, i3, 1x, i3, 1x, i5, 1x, i5, 1x,
853 $ f9.2, 1x, f11.2, 1x, a6, 2x, g8.1 )
854 9992 FORMAT( 'Finished ', i6, ' tests, with the following results:' )
855 9991 FORMAT( i5, ' tests completed and passed residual checks.' )
856 9990 FORMAT( i5, ' tests completed without checking.' )
857 9989 FORMAT( i5, ' tests completed and failed residual checks.' )
858 9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
859 9987 FORMAT( 'END OF TESTS.' )
860 9986 FORMAT( a )
861*
862 stop
863*
864* End of PSQRDRIVER
865*
866 END
867*
868 SUBROUTINE psqppiv( M, N, A, IA, JA, DESCA, IPIV )
869*
870* -- ScaLAPACK routine (version 1.7) --
871* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
872* and University of California, Berkeley.
873* May 1, 1997
874*
875* .. Scalar Arguments ..
876 INTEGER IA, JA, M, N
877* ..
878* .. Array Arguments ..
879 INTEGER DESCA( * ), IPIV( * )
880 REAL A( * )
881* ..
882*
883* Purpose
884* =======
885*
886* PSQPPIV applies to sub( A ) = A(IA:IA+M-1,JA:JA+N-1) the pivots
887* returned by PSGEQPF in reverse order for checking purposes.
888*
889* Notes
890* =====
891*
892* Each global data object is described by an associated description
893* vector. This vector stores the information required to establish
894* the mapping between an object element and its corresponding process
895* and memory location.
896*
897* Let A be a generic term for any 2D block cyclicly distributed array.
898* Such a global array has an associated description vector DESCA.
899* In the following comments, the character _ should be read as
900* "of the global array".
901*
902* NOTATION STORED IN EXPLANATION
903* --------------- -------------- --------------------------------------
904* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
905* DTYPE_A = 1.
906* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
907* the BLACS process grid A is distribu-
908* ted over. The context itself is glo-
909* bal, but the handle (the integer
910* value) may vary.
911* M_A (global) DESCA( M_ ) The number of rows in the global
912* array A.
913* N_A (global) DESCA( N_ ) The number of columns in the global
914* array A.
915* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
916* the rows of the array.
917* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
918* the columns of the array.
919* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
920* row of the array A is distributed.
921* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
922* first column of the array A is
923* distributed.
924* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
925* array. LLD_A >= MAX(1,LOCr(M_A)).
926*
927* Let K be the number of rows or columns of a distributed matrix,
928* and assume that its process grid has dimension p x q.
929* LOCr( K ) denotes the number of elements of K that a process
930* would receive if K were distributed over the p processes of its
931* process column.
932* Similarly, LOCc( K ) denotes the number of elements of K that a
933* process would receive if K were distributed over the q processes of
934* its process row.
935* The values of LOCr() and LOCc() may be determined via a call to the
936* ScaLAPACK tool function, NUMROC:
937* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
938* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
939* An upper bound for these quantities may be computed by:
940* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
941* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
942*
943* Arguments
944* =========
945*
946* M (global input) INTEGER
947* The number of rows to be operated on, i.e. the number of rows
948* of the distributed submatrix sub( A ). M >= 0.
949*
950* N (global input) INTEGER
951* The number of columns to be operated on, i.e. the number of
952* columns of the distributed submatrix sub( A ). N >= 0.
953*
954* A (local input/local output) REAL pointer into the
955* local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
956* On entry, the local pieces of the M-by-N distributed matrix
957* sub( A ) which is to be permuted. On exit, the local pieces
958* of the distributed permuted submatrix sub( A ) * Inv( P ).
959*
960* IA (global input) INTEGER
961* The row index in the global array A indicating the first
962* row of sub( A ).
963*
964* JA (global input) INTEGER
965* The column index in the global array A indicating the
966* first column of sub( A ).
967*
968* DESCA (global and local input) INTEGER array of dimension DLEN_.
969* The array descriptor for the distributed matrix A.
970*
971* IPIV (local input) INTEGER array, dimension LOCc(JA+N-1).
972* On exit, if IPIV(I) = K, the local i-th column of sub( A )*P
973* was the global K-th column of sub( A ). IPIV is tied to the
974* distributed matrix A.
975*
976* =====================================================================
977*
978* .. Parameters ..
979 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
980 $ LLD_, MB_, M_, NB_, N_, RSRC_
981 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
982 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
983 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
984* ..
985* .. Local Scalars ..
986 INTEGER IACOL, ICOFFA, ICTXT, IITMP, IPVT, IPCOL,
987 $ IPROW, ITMP, J, JJ, JJA, KK, MYCOL, MYROW,
988 $ NPCOL, NPROW, NQ
989* ..
990* .. External Subroutines ..
991 EXTERNAL blacs_gridinfo, igebr2d, igebs2d, igerv2d,
992 $ igesd2d, igamn2d, infog1l, psswap
993* ..
994* .. External Functions ..
995 INTEGER INDXL2G, NUMROC
996 EXTERNAL indxl2g, numroc
997* ..
998* .. Intrinsic Functions ..
999 INTRINSIC min, mod
1000* ..
1001* .. Executable Statements ..
1002*
1003* Get grid parameters
1004*
1005 ictxt = desca( ctxt_ )
1006 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
1007 CALL infog1l( ja, desca( nb_ ), npcol, mycol, desca( csrc_ ), jja,
1008 $ iacol )
1009 icoffa = mod( ja-1, desca( nb_ ) )
1010 nq = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
1011 IF( mycol.EQ.iacol )
1012 $ nq = nq - icoffa
1013*
1014 DO 20 j = ja, ja+n-2
1015*
1016 ipvt = ja+n-1
1017 itmp = ja+n
1018*
1019* Find first the local minimum candidate for pivoting
1020*
1021 CALL infog1l( j, desca( nb_ ), npcol, mycol, desca( csrc_ ),
1022 $ jj, iacol )
1023 DO 10 kk = jj, jja+nq-1
1024 IF( ipiv( kk ).LT.ipvt )THEN
1025 iitmp = kk
1026 ipvt = ipiv( kk )
1027 END IF
1028 10 CONTINUE
1029*
1030* Find the global minimum pivot
1031*
1032 CALL igamn2d( ictxt, 'Rowwise', ' ', 1, 1, ipvt, 1, iprow,
1033 $ ipcol, 1, -1, mycol )
1034*
1035* Broadcast the corresponding index to the other process columns
1036*
1037 IF( mycol.EQ.ipcol ) THEN
1038 itmp = indxl2g( iitmp, desca( nb_ ), mycol, desca( csrc_ ),
1039 $ npcol )
1040 CALL igebs2d( ictxt, 'Rowwise', ' ', 1, 1, itmp, 1 )
1041 IF( ipcol.NE.iacol ) THEN
1042 CALL igerv2d( ictxt, 1, 1, ipiv( iitmp ), 1, myrow,
1043 $ iacol )
1044 ELSE
1045 IF( mycol.EQ.iacol )
1046 $ ipiv( iitmp ) = ipiv( jj )
1047 END IF
1048 ELSE
1049 CALL igebr2d( ictxt, 'Rowwise', ' ', 1, 1, itmp, 1, myrow,
1050 $ ipcol )
1051 IF( mycol.EQ.iacol .AND. ipcol.NE.iacol )
1052 $ CALL igesd2d( ictxt, 1, 1, ipiv( jj ), 1, myrow, ipcol )
1053 END IF
1054*
1055* Swap the columns of A
1056*
1057 CALL psswap( m, a, ia, itmp, desca, 1, a, ia, j, desca, 1 )
1058*
1059 20 CONTINUE
1060*
1061* End of PSQPPIV
1062*
1063 END
subroutine pslafchk(aform, diag, m, n, a, ia, ja, desca, iaseed, anorm, fresid, work)
Definition pslafchk.f:3
subroutine psmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition psmatgen.f:4
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3
integer function iceil(inum, idenom)
Definition iceil.f:2
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
logical function lsamen(n, ca, cb)
Definition pblastst.f:1457
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pschekpad(ictxt, mess, m, n, a, lda, ipre, ipost, chkval)
Definition pschekpad.f:3
subroutine psfillpad(ictxt, m, n, a, lda, ipre, ipost, chkval)
Definition psfillpad.f:2
subroutine psgelqf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition psgelqf.f:3
subroutine psgelqrv(m, n, a, ia, ja, desca, tau, work)
Definition psgelqrv.f:2
subroutine psgeqlf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition psgeqlf.f:3
subroutine psgeqlrv(m, n, a, ia, ja, desca, tau, work)
Definition psgeqlrv.f:2
subroutine psgeqpf(m, n, a, ia, ja, desca, ipiv, tau, work, lwork, info)
Definition psgeqpf.f:3
subroutine psgeqrf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition psgeqrf.f:3
subroutine psgeqrrv(m, n, a, ia, ja, desca, tau, work)
Definition psgeqrrv.f:2
subroutine psgerqf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition psgerqf.f:3
subroutine psgerqrv(m, n, a, ia, ja, desca, tau, work)
Definition psgerqrv.f:2
real function pslange(norm, m, n, a, ia, ja, desca, work)
Definition pslange.f:3
program psqrdriver
Definition psqrdriver.f:1
subroutine psqppiv(m, n, a, ia, ja, desca, ipiv)
Definition psqrdriver.f:869
subroutine psqrinfo(summry, nout, nfact, factor, ldfact, nmat, mval, ldmval, nval, ldnval, nnb, mbval, ldmbval, nbval, ldnbval, ngrids, pval, ldpval, qval, ldqval, thresh, work, iam, nprocs)
Definition psqrinfo.f:6
subroutine pstzrzf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition pstzrzf.f:3
subroutine pstzrzrv(m, n, a, ia, ja, desca, tau, work)
Definition pstzrzrv.f:2
subroutine slboot()
Definition sltimer.f:2
subroutine sltimer(i)
Definition sltimer.f:47
subroutine slcombine(ictxt, scope, op, timetype, n, ibeg, times)
Definition sltimer.f:267