ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pslsdriver.f
Go to the documentation of this file.
1  PROGRAM pslsdriver
2 *
3 * -- ScaLAPACK routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 28, 2001
7 *
8 * Purpose
9 * =======
10 *
11 * PSLSDRIVER is the main test program for the REAL
12 * SCALAPACK (full rank) Least Squares routines. This test driver solves
13 * full-rank least square problems.
14 *
15 * The program must be driven by a short data file. An annotated
16 * example of a data file can be obtained by deleting the first 3
17 * characters from the following 17 lines:
18 * 'ScaLapack LS solve input file'
19 * 'Intel iPSC/860 hypercube, gamma model.'
20 * 'LS.out' output file name (if any)
21 * 6 device out
22 * 4 number of problems sizes
23 * 55 17 31 201 values of M
24 * 5 71 31 201 values of N
25 * 3 number of NB's
26 * 2 3 5 values of NB
27 * 3 number of NRHS's
28 * 2 3 5 values of NRHS
29 * 2 number of NBRHS's
30 * 1 2 values of NBRHS
31 * 7 number of process grids (ordered P & Q)
32 * 1 2 1 4 2 3 8 values of P
33 * 7 2 4 1 3 2 1 values of Q
34 * 3.0 threshold
35 *
36 * Internal Parameters
37 * ===================
38 *
39 * TOTMEM INTEGER, default = 6200000.
40 * TOTMEM is a machine-specific parameter indicating the
41 * maximum amount of available memory in bytes.
42 * The user should customize TOTMEM to his platform. Remember
43 * to leave room in memory for the operating system, the BLACS
44 * buffer, etc. For example, on a system with 8 MB of memory
45 * per process (e.g., one processor on an Intel iPSC/860), the
46 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
47 * code, BLACS buffer, etc). However, for PVM, we usually set
48 * TOTMEM = 2000000. Some experimenting with the maximum value
49 * of TOTMEM may be required.
50 * INTGSZ INTEGER, default = 4 bytes.
51 * REALSZ INTEGER, default = 4 bytes.
52 * INTGSZ and REALSZ indicate the length in bytes on the
53 * given platform for an integer and a single precision real.
54 * MEM REAL array, dimension ( TOTMEM / REALSZ )
55 * All arrays used by SCALAPACK routines are allocated from
56 * this array and referenced by pointers. The integer IPA,
57 * for example, is a pointer to the starting element of MEM for
58 * the matrix A.
59 *
60 * =====================================================================
61 *
62 * .. Parameters ..
63  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
64  $ lld_, mb_, m_, nb_, n_, rsrc_
65  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
66  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
67  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
68  INTEGER memsiz, ntests, realsz, totmem
69  REAL padval
70  REAL one, zero
71  parameter( realsz = 4, totmem = 2000000,
72  $ memsiz = totmem / realsz, ntests = 20,
73  $ padval = -9923.0e+0 )
74  parameter( one = 1.0e+0, zero = 0.0e+0 )
75 * ..
76 * .. Local Scalars ..
77  LOGICAL check, tpsd
78  CHARACTER trans
79  CHARACTER*6 passed
80  CHARACTER*80 outfile
81  INTEGER hh, i, iam, iaseed, ibseed, ictxt, ii, imidpad,
82  $ info, ipa, ipb, ipostpad, iprepad, ipw, ipw2,
83  $ ipx, iscale, itran, itype, j, jj, k, kfail, kk,
84  $ kpass, kskip, ktests, lcm, lcmp, ltau, lwf,
85  $ lwork, lws, m, mnp, mnrhsp, mp, mq, mycol,
86  $ myrow, n, nb, nbrhs, ncols, ngrids, nmat, nnb,
87  $ nnbr, nnr, nnrhsq, nout, np, npcol, nprocs,
88  $ nprow, nrows, nq, nrhs, nrhsp, nrhsq, worksiz
89  REAL anorm, bnorm, sresid, thresh
90  DOUBLE PRECISION addfac, adds, mulfac, mults, nops, tmflops
91 * ..
92 * .. Local Arrays ..
93  INTEGER desca( dlen_ ), descb( dlen_ ), descw( lld_ ),
94  $ descx( dlen_ ), ierr( 2 ), mval( ntests ),
95  $ nbrval( ntests ), nbval( ntests ),
96  $ nrval( ntests ), nval( ntests ),
97  $ pval( ntests ), qval( ntests )
98  REAL mem( memsiz ), result( 2 )
99  DOUBLE PRECISION ctime( 1 ), wtime( 1 )
100 * ..
101 * .. External Subroutines ..
102  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
103  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
104  $ blacs_pinfo, descinit, igsum2d, pschekpad,
105  $ psfillpad, psgels, psgemm, pslacpy,
106  $ pslsinfo, psmatgen, psnrm2, psscal,
108 * ..
109 * .. External Functions ..
110  LOGICAL lsame
111  INTEGER iceil, ilcm, numroc
112  REAL pslange, psqrt14, psqrt17
113  EXTERNAL iceil, ilcm, lsame, numroc, pslange,
114  $ psqrt14, psqrt17
115 * ..
116 * .. Intrinsic Functions ..
117  INTRINSIC max, min
118 * ..
119 * .. Data Statements ..
120  DATA ktests, kpass, kfail, kskip / 4*0 /
121 * ..
122 * .. Executable Statements ..
123 *
124 * Get starting information
125 *
126  CALL blacs_pinfo( iam, nprocs )
127 *
128  iaseed = 100
129  ibseed = 200
130  CALL pslsinfo( outfile, nout, nmat, mval, ntests, nval,
131  $ ntests, nnb, nbval, ntests, nnr, nrval, ntests,
132  $ nnbr, nbrval, ntests, ngrids, pval, ntests, qval,
133  $ ntests, thresh, mem, iam, nprocs )
134  check = ( thresh.GE.0.0e+0 )
135 *
136 * Print headings
137 *
138  IF( iam.EQ.0 ) THEN
139  WRITE( nout, fmt = * )
140  WRITE( nout, fmt = 9995 )
141  WRITE( nout, fmt = 9994 )
142  WRITE( nout, fmt = * )
143  END IF
144 *
145 * Loop over different process grids
146 *
147  DO 90 i = 1, ngrids
148 *
149  nprow = pval( i )
150  npcol = qval( i )
151 *
152 * Make sure grid information is correct
153 *
154  ierr( 1 ) = 0
155  IF( nprow.LT.1 ) THEN
156  IF( iam.EQ.0 )
157  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
158  ierr( 1 ) = 1
159  ELSE IF( npcol.LT.1 ) THEN
160  IF( iam.EQ.0 )
161  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
162  ierr( 1 ) = 1
163  ELSE IF( nprow*npcol.GT.nprocs ) THEN
164  IF( iam.EQ.0 )
165  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
166  ierr( 1 ) = 1
167  END IF
168 *
169  IF( ierr( 1 ).GT.0 ) THEN
170  IF( iam.EQ.0 )
171  $ WRITE( nout, fmt = 9997 ) 'grid'
172  kskip = kskip + 1
173  GO TO 90
174  END IF
175 *
176 * Define process grid
177 *
178  CALL blacs_get( -1, 0, ictxt )
179  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
180  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
181 *
182 * Go to bottom of loop if this case doesn't use my process
183 *
184  IF( ( myrow.GE.nprow ).OR.( mycol.GE.npcol ) )
185  $ GO TO 90
186 *
187  DO 80 j = 1, nmat
188 *
189  m = mval( j )
190  n = nval( j )
191 *
192 * Make sure matrix information is correct
193 *
194  ierr( 1 ) = 0
195  IF( m.LT.1 ) THEN
196  IF( iam.EQ.0 )
197  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
198  ierr( 1 ) = 1
199  ELSE IF( n.LT.1 ) THEN
200  IF( iam.EQ.0 )
201  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
202  ierr( 1 ) = 1
203  END IF
204 *
205 * Make sure no one had error
206 *
207  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
208 *
209  IF( ierr( 1 ).GT.0 ) THEN
210  IF( iam.EQ.0 )
211  $ WRITE( nout, fmt = 9997 ) 'matrix'
212  kskip = kskip + 1
213  GO TO 80
214  END IF
215 *
216 * Loop over different blocking sizes
217 *
218  DO 70 k = 1, nnb
219 *
220  nb = nbval( k )
221 *
222 * Make sure nb is legal
223 *
224  ierr( 1 ) = 0
225  IF( nb.LT.1 ) THEN
226  ierr( 1 ) = 1
227  IF( iam.EQ.0 )
228  $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
229  END IF
230 *
231 * Check all processes for an error
232 *
233  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
234 *
235  IF( ierr( 1 ).GT.0 ) THEN
236  IF( iam.EQ.0 )
237  $ WRITE( nout, fmt = 9997 ) 'NB'
238  kskip = kskip + 1
239  GO TO 70
240  END IF
241 *
242 * Padding constants
243 *
244  mp = numroc( m, nb, myrow, 0, nprow )
245  mq = numroc( m, nb, mycol, 0, npcol )
246  np = numroc( n, nb, myrow, 0, nprow )
247  mnp = max( mp, np )
248  nq = numroc( n, nb, mycol, 0, npcol )
249 *
250  IF( check ) THEN
251  iprepad = max( nb, mp )
252  imidpad = nb
253  ipostpad = max( nb, nq )
254  ELSE
255  iprepad = 0
256  imidpad = 0
257  ipostpad = 0
258  END IF
259 *
260 * Initialize the array descriptor for the matrix A
261 *
262  CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
263  $ max( 1, mp ) + imidpad, ierr( 1 ) )
264 *
265 * Check all processes for an error
266 *
267  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
268 *
269  IF( ierr( 1 ).LT.0 ) THEN
270  IF( iam.EQ.0 )
271  $ WRITE( nout, fmt = 9997 ) 'descriptor'
272  kskip = kskip + 1
273  GO TO 70
274  END IF
275 *
276  DO 60 iscale = 1, 3
277 *
278  itype = iscale
279 *
280 * Assign pointers into MEM for SCALAPACK arrays, A is
281 * allocated starting at position MEM( IPREPAD+1 )
282 *
283  ipa = iprepad + 1
284  ipx = ipa + desca( lld_ )*nq + ipostpad + iprepad
285  ipw = ipx
286 *
287  worksiz = nq + ipostpad
288 *
289 * Check for adequate memory for problem size
290 *
291  ierr( 1 ) = 0
292  IF( ( ipw+worksiz ).GT.memsiz ) THEN
293  IF( iam.EQ.0 )
294  $ WRITE( nout, fmt = 9996 ) 'MEMORY',
295  $ ( ipx+worksiz )*realsz
296  ierr( 1 ) = 1
297  END IF
298 *
299 * Check all processes for an error
300 *
301  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
302  $ 0 )
303 *
304  IF( ierr( 1 ).GT.0 ) THEN
305  IF( iam.EQ.0 )
306  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
307  kskip = kskip + 1
308  GO TO 70
309  END IF
310 *
311  IF( check ) THEN
312  CALL psfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
313  $ desca( lld_ ), iprepad, ipostpad,
314  $ padval )
315  CALL psfillpad( ictxt, worksiz-ipostpad, 1,
316  $ mem( ipw-iprepad ),
317  $ worksiz-ipostpad, iprepad,
318  $ ipostpad, padval )
319  END IF
320 *
321 * Generate the matrix A and calculate its 1-norm
322 *
323  CALL psqrt13( iscale, m, n, mem( ipa ), 1, 1,
324  $ desca, anorm, iaseed, mem( ipw ) )
325 *
326  IF( check ) THEN
327  CALL pschekpad( ictxt, 'PSQRT13', mp, nq,
328  $ mem( ipa-iprepad ), desca( lld_ ),
329  $ iprepad, ipostpad, padval )
330  CALL pschekpad( ictxt, 'PSQRT13',
331  $ worksiz-ipostpad, 1,
332  $ mem( ipw-iprepad ),
333  $ worksiz-ipostpad, iprepad,
334  $ ipostpad, padval )
335  END IF
336 *
337  DO 50 itran = 1, 2
338 *
339  IF( itran.EQ.1 ) THEN
340  nrows = m
341  ncols = n
342  trans = 'N'
343  tpsd = .false.
344  ELSE
345  nrows = n
346  ncols = m
347  trans = 'T'
348  tpsd = .true.
349  END IF
350 *
351 * Loop over the different values for NRHS
352 *
353  DO 40 hh = 1, nnr
354 *
355  nrhs = nrval( hh )
356 *
357  DO 30 kk = 1, nnbr
358 *
359  nbrhs = nbrval( kk )
360 *
361  nrhsp = numroc( nrhs, nbrhs, myrow, 0,
362  $ nprow )
363  nrhsq = numroc( nrhs, nbrhs, mycol, 0,
364  $ npcol )
365 *
366 * Define Array descriptor for rhs MAX(M,N)xNRHS
367 *
368  CALL descinit( descx, max( m, n ), nrhs, nb,
369  $ nbrhs, 0, 0, ictxt,
370  $ max( 1, mnp ) + imidpad,
371  $ ierr( 1 ) )
372  IF( tpsd ) THEN
373  CALL descinit( descw, m, nrhs, nb, nbrhs,
374  $ 0, 0, ictxt, max( 1, mp ) +
375  $ imidpad, ierr( 2 ) )
376  ELSE
377  CALL descinit( descw, n, nrhs, nb, nbrhs,
378  $ 0, 0, ictxt, max( 1, np ) +
379  $ imidpad, ierr( 2 ) )
380  END IF
381 *
382 * Check all processes for an error
383 *
384  CALL igsum2d( ictxt, 'All', ' ', 2, 1, ierr,
385  $ 2, -1, 0 )
386 *
387  IF( ierr( 1 ).LT.0 .OR. ierr( 2 ).LT.0 ) THEN
388  IF( iam.EQ.0 )
389  $ WRITE( nout, fmt = 9997 ) 'descriptor'
390  kskip = kskip + 1
391  GO TO 30
392  END IF
393 *
394 * Check for enough memory
395 *
396  ipx = ipa + desca( lld_ )*nq + ipostpad +
397  $ iprepad
398  ipw = ipx + descx( lld_ )*nrhsq + ipostpad +
399  $ iprepad
400  worksiz = descw( lld_ )*nrhsq + ipostpad
401 *
402  ierr( 1 ) = 0
403  IF( ipw+worksiz.GT.memsiz ) THEN
404  IF( iam.EQ.0 )
405  $ WRITE( nout, fmt = 9996 ) 'Generation',
406  $ ( ipw+worksiz )*realsz
407  ierr( 1 ) = 1
408  END IF
409 *
410 * Check all processes for an error
411 *
412  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
413  $ 1, -1, 0 )
414 *
415  IF( ierr( 1 ).GT.0 ) THEN
416  IF( iam.EQ.0 )
417  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
418  kskip = kskip + 1
419  GO TO 30
420  END IF
421 *
422 * Generate RHS
423 *
424  IF( tpsd ) THEN
425  CALL psmatgen( ictxt, 'No', 'No',
426  $ descw( m_ ), descw( n_ ),
427  $ descw( mb_ ), descw( nb_ ),
428  $ mem( ipw ), descw( lld_ ),
429  $ descw( rsrc_ ),
430  $ descw( csrc_ ), ibseed, 0,
431  $ mp, 0, nrhsq, myrow, mycol,
432  $ nprow, npcol )
433  ELSE
434  CALL psmatgen( ictxt, 'No', 'No',
435  $ descw( m_ ), descw( n_ ),
436  $ descw( mb_ ), descw( nb_ ),
437  $ mem( ipw ), descw( lld_ ),
438  $ descw( rsrc_ ),
439  $ descw( csrc_ ), ibseed, 0,
440  $ np, 0, nrhsq, myrow, mycol,
441  $ nprow, npcol )
442  END IF
443 *
444  IF( check ) THEN
445  CALL psfillpad( ictxt, mnp, nrhsq,
446  $ mem( ipx-iprepad ),
447  $ descx( lld_ ), iprepad,
448  $ ipostpad, padval )
449  IF( tpsd ) THEN
450  CALL psfillpad( ictxt, mp, nrhsq,
451  $ mem( ipw-iprepad ),
452  $ descw( lld_ ), iprepad,
453  $ ipostpad, padval )
454  ELSE
455  CALL psfillpad( ictxt, np, nrhsq,
456  $ mem( ipw-iprepad ),
457  $ descw( lld_ ), iprepad,
458  $ ipostpad, padval )
459  END IF
460  END IF
461 *
462  DO 10 jj = 1, nrhs
463  CALL psnrm2( ncols, bnorm, mem( ipw ), 1,
464  $ jj, descw, 1 )
465  IF( bnorm.GT.zero )
466  $ CALL psscal( ncols, one / bnorm,
467  $ mem( ipw ), 1, jj, descw,
468  $ 1 )
469  10 CONTINUE
470 *
471  CALL psgemm( trans, 'N', nrows, nrhs, ncols,
472  $ one, mem( ipa ), 1, 1, desca,
473  $ mem( ipw ), 1, 1, descw, zero,
474  $ mem( ipx ), 1, 1, descx )
475 *
476  IF( check ) THEN
477 *
478 * check for memory overwrite
479 *
480  CALL pschekpad( ictxt, 'Generation', mp,
481  $ nq, mem( ipa-iprepad ),
482  $ desca( lld_ ), iprepad,
483  $ ipostpad, padval )
484  CALL pschekpad( ictxt, 'Generation', mnp,
485  $ nrhsq, mem( ipx-iprepad ),
486  $ descx( lld_ ), iprepad,
487  $ ipostpad, padval )
488  IF( tpsd ) THEN
489  CALL pschekpad( ictxt, 'Generation',
490  $ mp, nrhsq,
491  $ mem( ipw-iprepad ),
492  $ descw( lld_ ), iprepad,
493  $ ipostpad, padval )
494  ELSE
495  CALL pschekpad( ictxt, 'Generation',
496  $ np, nrhsq,
497  $ mem( ipw-iprepad ),
498  $ descw( lld_ ), iprepad,
499  $ ipostpad, padval )
500  END IF
501 *
502 * Allocate space for copy of rhs
503 *
504  ipb = ipw
505 *
506  IF( tpsd ) THEN
507  CALL descinit( descb, n, nrhs, nb,
508  $ nbrhs, 0, 0, ictxt,
509  $ max( 1, np ) + imidpad,
510  $ ierr( 1 ) )
511  ELSE
512  CALL descinit( descb, m, nrhs, nb,
513  $ nbrhs, 0, 0, ictxt,
514  $ max( 1, mp ) + imidpad,
515  $ ierr( 1 ) )
516  END IF
517 *
518 * Check all processes for an error
519 *
520  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
521  $ ierr, 1, -1, 0 )
522 *
523  IF( ierr( 1 ).LT.0 ) THEN
524  IF( iam.EQ.0 )
525  $ WRITE( nout, fmt = 9997 )
526  $ 'descriptor'
527  kskip = kskip + 1
528  GO TO 30
529  END IF
530 *
531  ipw = ipb + descb( lld_ )*nrhsq +
532  $ ipostpad + iprepad
533 *
534  END IF
535 *
536 * Calculate the amount of workspace for PSGELS
537 *
538  IF( m.GE.n ) THEN
539  ltau = numroc( min(m,n), nb, mycol, 0,
540  $ npcol )
541  lwf = nb * ( mp + nq + nb )
542  lws = max( ( nb*( nb - 1 ) ) / 2,
543  $ ( mp + nrhsq ) * nb ) + nb*nb
544  ELSE
545  lcm = ilcm( nprow, npcol )
546  lcmp = lcm / nprow
547  ltau = numroc( min(m,n), nb, myrow, 0,
548  $ nprow )
549  lwf = nb * ( mp + nq + nb )
550  lws = max( ( nb*( nb - 1 ) ) / 2, ( np +
551  $ max( nq + numroc( numroc( n, nb, 0,
552  $ 0, nprow ), nb, 0, 0, lcmp ),
553  $ nrhsq ) ) * nb ) + nb*nb
554  END IF
555 *
556  lwork = ltau + max( lwf, lws )
557  worksiz = lwork + ipostpad
558 *
559 * Check for adequate memory for problem size
560 *
561  ierr( 1 ) = 0
562  IF( ipw+worksiz.GT.memsiz ) THEN
563  IF( iam.EQ.0 )
564  $ WRITE( nout, fmt = 9996 ) 'solve',
565  $ ( ipw+worksiz )*realsz
566  ierr( 1 ) = 1
567  END IF
568 *
569 * Check all processes for an error
570 *
571  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
572  $ 1, -1, 0 )
573 *
574  IF( ierr( 1 ).GT.0 ) THEN
575  IF( iam.EQ.0 )
576  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
577  kskip = kskip + 1
578  GO TO 30
579  END IF
580 *
581  IF( check ) THEN
582 *
583 * Make the copy of the right hand side
584 *
585  CALL pslacpy( 'All', nrows, nrhs,
586  $ mem( ipx ), 1, 1, descx,
587  $ mem( ipb ), 1, 1, descb )
588 *
589  IF( tpsd ) THEN
590  CALL psfillpad( ictxt, np, nrhsq,
591  $ mem( ipb-iprepad ),
592  $ descb( lld_ ), iprepad,
593  $ ipostpad, padval )
594  ELSE
595  CALL psfillpad( ictxt, mp, nrhsq,
596  $ mem( ipb-iprepad ),
597  $ descb( lld_ ), iprepad,
598  $ ipostpad, padval )
599  END IF
600  CALL psfillpad( ictxt, lwork, 1,
601  $ mem( ipw-iprepad ),
602  $ lwork, iprepad,
603  $ ipostpad, padval )
604  END IF
605 *
606  CALL slboot( )
607  CALL blacs_barrier( ictxt, 'All' )
608  CALL sltimer( 1 )
609 *
610 * Solve the LS or overdetermined system
611 *
612  CALL psgels( trans, m, n, nrhs, mem( ipa ),
613  $ 1, 1, desca, mem( ipx ), 1, 1,
614  $ descx, mem( ipw ), lwork, info )
615 *
616  CALL sltimer( 1 )
617 *
618  IF( check ) THEN
619 *
620 * check for memory overwrite
621 *
622  CALL pschekpad( ictxt, 'PSGELS', mp,
623  $ nq, mem( ipa-iprepad ),
624  $ desca( lld_ ), iprepad,
625  $ ipostpad, padval )
626  CALL pschekpad( ictxt, 'PSGELS', mnp,
627  $ nrhsq, mem( ipx-iprepad ),
628  $ descx( lld_ ), iprepad,
629  $ ipostpad, padval )
630  CALL pschekpad( ictxt, 'PSGELS', lwork,
631  $ 1, mem( ipw-iprepad ),
632  $ lwork, iprepad,
633  $ ipostpad, padval )
634  END IF
635 *
636 * Regenerate A in place for testing and next
637 * iteration
638 *
639  CALL psqrt13( iscale, m, n, mem( ipa ), 1, 1,
640  $ desca, anorm, iaseed,
641  $ mem( ipw ) )
642 *
643 * check the solution to rhs
644 *
645  IF( check ) THEN
646 *
647 * Am I going to call PSQRT17 ?
648 *
649  IF( ( m.GE.n .AND. ( .NOT.tpsd ) ) .OR.
650  $ ( m.LT.n .AND. tpsd ) ) THEN
651 *
652 * Call PSQRT17 first, A, X, and B remain
653 * unchanged. Solving LS system
654 *
655 * Check amount of memory for PSQRT17
656 *
657  IF( tpsd ) THEN
658  worksiz = np*nrhsq + nrhsp*mq
659  ipw2 = ipw + worksiz
660  worksiz = worksiz +
661  $ max( nq, max( mq, nrhsq ) ) +
662  $ ipostpad
663  ELSE
664  worksiz = mp*nrhsq + nrhsp*nq
665  ipw2 = ipw + worksiz
666  worksiz = worksiz +
667  $ max( nq, nrhsq ) +
668  $ ipostpad
669  END IF
670 *
671 * Check for adequate memory for problem
672 * size
673 *
674  ierr( 1 ) = 0
675  IF( ( ipw+worksiz ).GT.memsiz ) THEN
676  IF( iam.EQ.0 )
677  $ WRITE( nout, fmt = 9996 )
678  $ 'MEMORY', ( ipw+worksiz )*realsz
679  ierr( 1 ) = 1
680  END IF
681 *
682 * Check all processes for an error
683 *
684  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
685  $ ierr, 1, -1, 0 )
686 *
687  IF( ierr( 1 ).GT.0 ) THEN
688  IF( iam.EQ.0 )
689  $ WRITE( nout, fmt = 9997 )
690  $ 'MEMORY'
691  kskip = kskip + 1
692  GO TO 30
693  END IF
694 *
695  CALL psfillpad( ictxt,
696  $ worksiz-ipostpad, 1,
697  $ mem( ipw-iprepad ),
698  $ worksiz-ipostpad,
699  $ iprepad, ipostpad,
700  $ padval )
701 *
702  result( 2 ) = psqrt17( trans, 1, m, n,
703  $ nrhs,
704  $ mem( ipa ),
705  $ 1, 1, desca,
706  $ mem( ipx ), 1,
707  $ 1, descx,
708  $ mem( ipb ),
709  $ 1, 1, descb,
710  $ mem( ipw ),
711  $ mem( ipw2 ) )
712  sresid = result( 2 )
713 *
714  CALL pschekpad( ictxt, 'PSQRT17',
715  $ mp, nq,
716  $ mem( ipa-iprepad ),
717  $ desca( lld_ ),
718  $ iprepad, ipostpad,
719  $ padval )
720  CALL pschekpad( ictxt, 'PSQRT17',
721  $ mnp, nrhsq,
722  $ mem( ipx-iprepad ),
723  $ descx( lld_ ), iprepad,
724  $ ipostpad, padval )
725  IF( tpsd ) THEN
726  CALL pschekpad( ictxt, 'PSQRT17',
727  $ np, nrhsq,
728  $ mem( ipb-iprepad ),
729  $ descb( lld_ ),
730  $ iprepad, ipostpad,
731  $ padval )
732  ELSE
733  CALL pschekpad( ictxt, 'PSQRT17',
734  $ mp, nrhsq,
735  $ mem( ipb-iprepad ),
736  $ descb( lld_ ),
737  $ iprepad, ipostpad,
738  $ padval )
739  END IF
740  CALL pschekpad( ictxt, 'PSQRT17',
741  $ worksiz-ipostpad, 1,
742  $ mem( ipw-iprepad ),
743  $ worksiz-ipostpad,
744  $ iprepad, ipostpad,
745  $ padval )
746  END IF
747 *
748 * Call PSQRT16, B will be destroyed.
749 *
750  IF( tpsd ) THEN
751  worksiz = mp + ipostpad
752  ELSE
753  worksiz = nq + ipostpad
754  END IF
755 *
756 * Check for adequate memory for problem size
757 *
758  ierr( 1 ) = 0
759  IF( ( ipw+worksiz ).GT.memsiz ) THEN
760  IF( iam.EQ.0 )
761  $ WRITE( nout, fmt = 9996 ) 'MEMORY',
762  $ ( ipw+worksiz )*realsz
763  ierr( 1 ) = 1
764  END IF
765 *
766 * Check all processes for an error
767 *
768  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
769  $ ierr, 1, -1, 0 )
770 *
771  IF( ierr( 1 ).GT.0 ) THEN
772  IF( iam.EQ.0 )
773  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
774  kskip = kskip + 1
775  GO TO 30
776  END IF
777 *
778  CALL psfillpad( ictxt,
779  $ worksiz-ipostpad, 1,
780  $ mem( ipw-iprepad ),
781  $ worksiz-ipostpad,
782  $ iprepad, ipostpad,
783  $ padval )
784 *
785  CALL psqrt16( trans, m, n, nrhs,
786  $ mem( ipa ), 1, 1, desca,
787  $ mem( ipx ), 1, 1, descx,
788  $ mem( ipb ), 1, 1, descb,
789  $ mem( ipw ), result( 1 ) )
790 *
791  CALL pschekpad( ictxt, 'PSQRT16',
792  $ mp, nq,
793  $ mem( ipa-iprepad ),
794  $ desca( lld_ ),
795  $ iprepad, ipostpad,
796  $ padval )
797  CALL pschekpad( ictxt, 'PSQRT16',
798  $ mnp, nrhsq,
799  $ mem( ipx-iprepad ),
800  $ descx( lld_ ), iprepad,
801  $ ipostpad, padval )
802  IF( tpsd ) THEN
803  CALL pschekpad( ictxt, 'PSQRT16',
804  $ np, nrhsq,
805  $ mem( ipb-iprepad ),
806  $ descb( lld_ ),
807  $ iprepad, ipostpad,
808  $ padval )
809  ELSE
810  CALL pschekpad( ictxt, 'PSQRT16',
811  $ mp, nrhsq,
812  $ mem( ipb-iprepad ),
813  $ descb( lld_ ),
814  $ iprepad, ipostpad,
815  $ padval )
816  END IF
817  CALL pschekpad( ictxt, 'PSQRT16',
818  $ worksiz-ipostpad, 1,
819  $ mem( ipw-iprepad ),
820  $ worksiz-ipostpad,
821  $ iprepad, ipostpad,
822  $ padval )
823 *
824 * Call PSQRT14
825 *
826  IF( ( m.GE.n .AND. tpsd ) .OR.
827  $ ( m.LT.n .AND. ( .NOT.tpsd ) ) ) THEN
828 *
829  ipw = ipb
830 *
831  IF( tpsd ) THEN
832 *
833  nnrhsq = numroc( n+nrhs, nb, mycol,
834  $ 0, npcol )
835  ltau = numroc( min( m, n+nrhs ), nb,
836  $ mycol, 0, npcol )
837  lwf = nb * ( nb + mp + nnrhsq )
838  worksiz = mp * nnrhsq + ltau + lwf +
839  $ ipostpad
840 *
841  ELSE
842 *
843  mnrhsp = numroc( m+nrhs, nb, myrow,
844  $ 0, nprow )
845  ltau = numroc( min( m+nrhs, n ), nb,
846  $ myrow, 0, nprow )
847  lwf = nb * ( nb + mnrhsp + nq )
848  worksiz = mnrhsp * nq + ltau + lwf +
849  $ ipostpad
850 *
851  END IF
852 *
853 * Check for adequate memory for problem
854 * size
855 *
856  ierr( 1 ) = 0
857  IF( ( ipw+worksiz ).GT.memsiz ) THEN
858  IF( iam.EQ.0 )
859  $ WRITE( nout, fmt = 9996 )
860  $ 'MEMORY', ( ipw+worksiz )*realsz
861  ierr( 1 ) = 1
862  END IF
863 *
864 * Check all processes for an error
865 *
866  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
867  $ ierr, 1, -1, 0 )
868 *
869  IF( ierr( 1 ).GT.0 ) THEN
870  IF( iam.EQ.0 )
871  $ WRITE( nout, fmt = 9997 )
872  $ 'MEMORY'
873  kskip = kskip + 1
874  GO TO 30
875  END IF
876 *
877  CALL psfillpad( ictxt,
878  $ worksiz-ipostpad, 1,
879  $ mem( ipw-iprepad ),
880  $ worksiz-ipostpad,
881  $ iprepad, ipostpad,
882  $ padval )
883 *
884 * Solve underdetermined system
885 *
886  result( 2 ) = psqrt14( trans, m, n,
887  $ nrhs,
888  $ mem( ipa ), 1,
889  $ 1, desca,
890  $ mem( ipx ),
891  $ 1, 1, descx,
892  $ mem( ipw ) )
893  sresid = result( 2 )
894 *
895  CALL pschekpad( ictxt, 'PSQRT14',
896  $ mp, nq,
897  $ mem( ipa-iprepad ),
898  $ desca( lld_ ),
899  $ iprepad, ipostpad,
900  $ padval )
901  CALL pschekpad( ictxt, 'PSQRT14',
902  $ mnp, nrhsq,
903  $ mem( ipx-iprepad ),
904  $ descx( lld_ ), iprepad,
905  $ ipostpad, padval )
906  CALL pschekpad( ictxt, 'PSQRT14',
907  $ worksiz-ipostpad, 1,
908  $ mem( ipw-iprepad ),
909  $ worksiz-ipostpad,
910  $ iprepad, ipostpad,
911  $ padval )
912  END IF
913 *
914 * Print information about the tests that
915 * did not pass the threshold.
916 *
917  passed = 'PASSED'
918  DO 20 ii = 1, 2
919  IF( ( result( ii ).GE.thresh ) .AND.
920  $ ( result( ii )-result( ii ).EQ.0.0e+0
921  $ ) ) THEN
922  IF( iam.EQ.0 )
923  $ WRITE( nout, fmt = 9986 )trans,
924  $ m, n, nrhs, nb, itype, ii,
925  $ result( ii )
926  kfail = kfail + 1
927  passed = 'FAILED'
928  ELSE
929  kpass = kpass + 1
930  END IF
931  20 CONTINUE
932 *
933  ELSE
934 *
935 * By-pass the solve check
936 *
937  kpass = kpass + 1
938  sresid = sresid - sresid
939  passed = 'BYPASS'
940 *
941  END IF
942 *
943 * Gather maximum of all CPU and WALL clock
944 * timings
945 *
946  CALL slcombine( ictxt, 'All', '>', 'W', 1, 1,
947  $ wtime )
948  CALL slcombine( ictxt, 'All', '>', 'C', 1, 1,
949  $ ctime )
950 *
951 * Print results
952 *
953  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
954  addfac = 1
955  mulfac = 1
956  IF( m.GE.n ) THEN
957 *
958 * NOPS = SOPLA( 'SGEQRF', M, N, 0, 0,
959 * NB ) + SOPLA( 'SORMQR', M, NRHS, N,
960 * 0, NB )
961 *
962  mults = n*( ( ( 23.d0 / 6.d0 )+m+n /
963  $ 2.d0 )+ n*( m-n / 3.d0 ) ) +
964  $ n*nrhs*( 2.d0*m+2.d0-n )
965  adds = n*( ( 5.d0 / 6.d0 )+n*
966  $ ( 1.d0 / 2.d0+( m-n / 3.d0 ) ) )
967  $ + n*nrhs*( 2.d0*m+1.d0-n )
968  ELSE
969 *
970 * NOPS = SOPLA( 'SGELQF', M, N, 0, 0,
971 * NB ) + SOPLA( 'SORMLQ', M,
972 * NRHS, N, 0, NB )
973 *
974  mults = m*( ( ( 29.d0 / 6.d0 )+2.d0*n-m
975  $ / 2.d0 )+m*( n-m / 3.d0 ) )
976  $ + n*nrhs*( 2.d0*m+2.d0-n )
977  adds = m*( ( 5.d0 / 6.d0 )+m / 2.d0+m*
978  $ ( n-m / 3.d0 ) )
979  $ + n*nrhs*( 2.d0*m+1.d0-n )
980  END IF
981  nops = addfac*adds + mulfac*mults
982 *
983 * Calculate total megaflops, for WALL and
984 * CPU time, and print output
985 *
986 * Print WALL time if machine supports it
987 *
988  IF( wtime( 1 ).GT.0.0d+0 ) THEN
989  tmflops = nops / ( wtime( 1 )*1.0d+6 )
990  ELSE
991  tmflops = 0.0d+0
992  END IF
993 *
994  IF( wtime( 1 ).GE.0.0d+0 )
995  $ WRITE( nout, fmt = 9993 )
996  $ 'WALL', trans, m, n, nb, nrhs,
997  $ nbrhs, nprow, npcol, wtime( 1 ),
998  $ tmflops, passed
999 *
1000 * Print CPU time if machine supports it
1001 *
1002  IF( ctime( 1 ).GT.0.0d+0 ) THEN
1003  tmflops = nops / ( ctime( 1 )*1.0d+6 )
1004  ELSE
1005  tmflops = 0.0d+0
1006  END IF
1007 *
1008  IF( ctime( 1 ).GE.0.0d+0 )
1009  $ WRITE( nout, fmt = 9993 )
1010  $ 'CPU ', trans, m, n, nb, nrhs,
1011  $ nbrhs, nprow, npcol, ctime( 1 ),
1012  $ tmflops, passed
1013  END IF
1014  30 CONTINUE
1015  40 CONTINUE
1016  50 CONTINUE
1017  60 CONTINUE
1018  70 CONTINUE
1019  80 CONTINUE
1020  CALL blacs_gridexit( ictxt )
1021  90 CONTINUE
1022 *
1023 * Print out ending messages and close output file
1024 *
1025  IF( iam.EQ.0 ) THEN
1026  ktests = kpass + kfail + kskip
1027  WRITE( nout, fmt = * )
1028  WRITE( nout, fmt = 9992 ) ktests
1029  IF( check ) THEN
1030  WRITE( nout, fmt = 9991 ) kpass
1031  WRITE( nout, fmt = 9989 ) kfail
1032  ELSE
1033  WRITE( nout, fmt = 9990 ) kpass
1034  END IF
1035  WRITE( nout, fmt = 9988 ) kskip
1036  WRITE( nout, fmt = * )
1037  WRITE( nout, fmt = * )
1038  WRITE( nout, fmt = 9987 )
1039  IF( nout.NE.6 .AND. nout.NE.0 )
1040  $ CLOSE ( nout )
1041  END IF
1042 *
1043  CALL blacs_exit( 0 )
1044 *
1045  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
1046  $ '; It should be at least 1' )
1047  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
1048  $ i4 )
1049  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
1050  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
1051  $ i11 )
1052  9995 FORMAT( 'Time TRANS M N NB NRHS NBRHS P Q ',
1053  $ 'LS Time MFLOPS CHECK' )
1054  9994 FORMAT( '---- ----- ------ ------ --- ----- ----- ----- ----- ',
1055  $ '--------- -------- ------' )
1056  9993 FORMAT( a4, 3x, a1, 3x, i6, 1x, i6, 1x, i3, 1x, i5, 1x, i5, 1x,
1057  $ i5, 1x, i5, 1x, f9.2, 1x, f8.2, 1x, a6 )
1058  9992 FORMAT( 'Finished', i6, ' tests, with the following results:' )
1059  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
1060  9990 FORMAT( i5, ' tests completed without checking.' )
1061  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
1062  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
1063  9987 FORMAT( 'END OF TESTS.' )
1064  9986 FORMAT( ' TRANS=''', a1, ''', M=', i5, ', N=', i5, ', NRHS=', i4,
1065  $ ', NB=', i4, ', type', i2, ', test(', i2, ')=', g12.5 )
1066 *
1067  stop
1068 *
1069 * End of PSLSDRIVER
1070 *
1071  END
psqrt13
subroutine psqrt13(SCALE, M, N, A, IA, JA, DESCA, NORMA, ISEED, WORK)
Definition: psqrt13.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
psqrt16
subroutine psqrt16(TRANS, M, N, NRHS, A, IA, JA, DESCA, X, IX, JX, DESCX, B, IB, JB, DESCB, RWORK, RESID)
Definition: psqrt16.f:3
ilcm
integer function ilcm(M, N)
Definition: ilcm.f:2
pslsinfo
subroutine pslsinfo(SUMMRY, NOUT, NMAT, MVAL, LDMVAL, NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pslsinfo.f:6
pslange
real function pslange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pslange.f:3
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
pslsdriver
program pslsdriver
Definition: pslsdriver.f:1
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pschekpad
subroutine pschekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pschekpad.f:3
psqrt14
real function psqrt14(TRANS, M, N, NRHS, A, IA, JA, DESCA, X, IX, JX, DESCX, WORK)
Definition: psqrt14.f:3
pslacpy
subroutine pslacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pslacpy.f:3
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
psmatgen
subroutine psmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: psmatgen.f:4
psqrt17
real function psqrt17(TRANS, IRESID, M, N, NRHS, A, IA, JA, DESCA, X, IX, JX, DESCX, B, IB, JB, DESCB, WORK, RWORK)
Definition: psqrt17.f:5
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
psgels
subroutine psgels(TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, DESCB, WORK, LWORK, INFO)
Definition: psgels.f:3
psfillpad
subroutine psfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: psfillpad.f:2
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
min
#define min(A, B)
Definition: pcgemr.c:181
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2