ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlsdriver.f
Go to the documentation of this file.
1  PROGRAM pdlsdriver
2 *
3 * -- ScaLAPACK routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 28, 2001
7 *
8 * Purpose
9 * =======
10 *
11 * PDLSDRIVER is the main test program for the DOUBLE PRECISION
12 * SCALAPACK (full rank) Least Squares routines. This test driver solves
13 * full-rank least square problems.
14 *
15 * The program must be driven by a short data file. An annotated
16 * example of a data file can be obtained by deleting the first 3
17 * characters from the following 17 lines:
18 * 'ScaLapack LS solve input file'
19 * 'Intel iPSC/860 hypercube, gamma model.'
20 * 'LS.out' output file name (if any)
21 * 6 device out
22 * 4 number of problems sizes
23 * 55 17 31 201 values of M
24 * 5 71 31 201 values of N
25 * 3 number of NB's
26 * 2 3 5 values of NB
27 * 3 number of NRHS's
28 * 2 3 5 values of NRHS
29 * 2 number of NBRHS's
30 * 1 2 values of NBRHS
31 * 7 number of process grids (ordered P & Q)
32 * 1 2 1 4 2 3 8 values of P
33 * 7 2 4 1 3 2 1 values of Q
34 * 3.0 threshold
35 *
36 * Internal Parameters
37 * ===================
38 *
39 * TOTMEM INTEGER, default = 6200000.
40 * TOTMEM is a machine-specific parameter indicating the
41 * maximum amount of available memory in bytes.
42 * The user should customize TOTMEM to his platform. Remember
43 * to leave room in memory for the operating system, the BLACS
44 * buffer, etc. For example, on a system with 8 MB of memory
45 * per process (e.g., one processor on an Intel iPSC/860), the
46 * parameters we use are TOTMEM=6200000 (leaving 1.8 MB for OS,
47 * code, BLACS buffer, etc). However, for PVM, we usually set
48 * TOTMEM = 2000000. Some experimenting with the maximum value
49 * of TOTMEM may be required.
50 * INTGSZ INTEGER, default = 4 bytes.
51 * DBLESZ INTEGER, default = 8 bytes.
52 * INTGSZ and DBLESZ indicate the length in bytes on the
53 * given platform for an integer and a double precision real.
54 * MEM DOUBLE PRECISION array, dimension ( TOTMEM / DBLESZ )
55 * All arrays used by SCALAPACK routines are allocated from
56 * this array and referenced by pointers. The integer IPA,
57 * for example, is a pointer to the starting element of MEM for
58 * the matrix A.
59 *
60 * =====================================================================
61 *
62 * .. Parameters ..
63  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
64  $ lld_, mb_, m_, nb_, n_, rsrc_
65  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
66  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
67  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
68  INTEGER dblesz, memsiz, ntests, totmem
69  DOUBLE PRECISION padval
70  DOUBLE PRECISION one, zero
71  parameter( dblesz = 8, totmem = 2000000,
72  $ memsiz = totmem / dblesz, ntests = 20,
73  $ padval = -9923.0d+0 )
74  parameter( one = 1.0d+0, zero = 0.0d+0 )
75 * ..
76 * .. Local Scalars ..
77  LOGICAL check, tpsd
78  CHARACTER trans
79  CHARACTER*6 passed
80  CHARACTER*80 outfile
81  INTEGER hh, i, iam, iaseed, ibseed, ictxt, ii, imidpad,
82  $ info, ipa, ipb, ipostpad, iprepad, ipw, ipw2,
83  $ ipx, iscale, itran, itype, j, jj, k, kfail, kk,
84  $ kpass, kskip, ktests, lcm, lcmp, ltau, lwf,
85  $ lwork, lws, m, mnp, mnrhsp, mp, mq, mycol,
86  $ myrow, n, nb, nbrhs, ncols, ngrids, nmat, nnb,
87  $ nnbr, nnr, nnrhsq, nout, np, npcol, nprocs,
88  $ nprow, nrows, nq, nrhs, nrhsp, nrhsq, worksiz
89  REAL thresh
90  DOUBLE PRECISION addfac, adds, anorm, bnorm, mulfac, mults,
91  $ nops, sresid, tmflops
92 * ..
93 * .. Local Arrays ..
94  INTEGER desca( dlen_ ), descb( dlen_ ), descw( lld_ ),
95  $ descx( dlen_ ), ierr( 2 ), mval( ntests ),
96  $ nbrval( ntests ), nbval( ntests ),
97  $ nrval( ntests ), nval( ntests ),
98  $ pval( ntests ), qval( ntests )
99  DOUBLE PRECISION ctime( 1 ), mem( memsiz ), result( 2 ),
100  $ wtime( 1 )
101 * ..
102 * .. External Subroutines ..
103  EXTERNAL blacs_barrier, blacs_exit, blacs_get,
104  $ blacs_gridexit, blacs_gridinfo, blacs_gridinit,
105  $ blacs_pinfo, descinit, igsum2d, pdchekpad,
106  $ pdfillpad, pdgels, pdgemm, pdlacpy,
107  $ pdlsinfo, pdmatgen, pdnrm2, pdscal,
109 * ..
110 * .. External Functions ..
111  LOGICAL lsame
112  INTEGER iceil, ilcm, numroc
113  DOUBLE PRECISION pdlange, pdqrt14, pdqrt17
114  EXTERNAL iceil, ilcm, lsame, numroc, pdlange,
115  $ pdqrt14, pdqrt17
116 * ..
117 * .. Intrinsic Functions ..
118  INTRINSIC max, min
119 * ..
120 * .. Data Statements ..
121  DATA ktests, kpass, kfail, kskip / 4*0 /
122 * ..
123 * .. Executable Statements ..
124 *
125 * Get starting information
126 *
127  CALL blacs_pinfo( iam, nprocs )
128 *
129  iaseed = 100
130  ibseed = 200
131  CALL pdlsinfo( outfile, nout, nmat, mval, ntests, nval,
132  $ ntests, nnb, nbval, ntests, nnr, nrval, ntests,
133  $ nnbr, nbrval, ntests, ngrids, pval, ntests, qval,
134  $ ntests, thresh, mem, iam, nprocs )
135  check = ( thresh.GE.0.0e+0 )
136 *
137 * Print headings
138 *
139  IF( iam.EQ.0 ) THEN
140  WRITE( nout, fmt = * )
141  WRITE( nout, fmt = 9995 )
142  WRITE( nout, fmt = 9994 )
143  WRITE( nout, fmt = * )
144  END IF
145 *
146 * Loop over different process grids
147 *
148  DO 90 i = 1, ngrids
149 *
150  nprow = pval( i )
151  npcol = qval( i )
152 *
153 * Make sure grid information is correct
154 *
155  ierr( 1 ) = 0
156  IF( nprow.LT.1 ) THEN
157  IF( iam.EQ.0 )
158  $ WRITE( nout, fmt = 9999 ) 'GRID', 'nprow', nprow
159  ierr( 1 ) = 1
160  ELSE IF( npcol.LT.1 ) THEN
161  IF( iam.EQ.0 )
162  $ WRITE( nout, fmt = 9999 ) 'GRID', 'npcol', npcol
163  ierr( 1 ) = 1
164  ELSE IF( nprow*npcol.GT.nprocs ) THEN
165  IF( iam.EQ.0 )
166  $ WRITE( nout, fmt = 9998 ) nprow*npcol, nprocs
167  ierr( 1 ) = 1
168  END IF
169 *
170  IF( ierr( 1 ).GT.0 ) THEN
171  IF( iam.EQ.0 )
172  $ WRITE( nout, fmt = 9997 ) 'grid'
173  kskip = kskip + 1
174  GO TO 90
175  END IF
176 *
177 * Define process grid
178 *
179  CALL blacs_get( -1, 0, ictxt )
180  CALL blacs_gridinit( ictxt, 'Row-major', nprow, npcol )
181  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
182 *
183 * Go to bottom of loop if this case doesn't use my process
184 *
185  IF( ( myrow.GE.nprow ).OR.( mycol.GE.npcol ) )
186  $ GO TO 90
187 *
188  DO 80 j = 1, nmat
189 *
190  m = mval( j )
191  n = nval( j )
192 *
193 * Make sure matrix information is correct
194 *
195  ierr( 1 ) = 0
196  IF( m.LT.1 ) THEN
197  IF( iam.EQ.0 )
198  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'M', m
199  ierr( 1 ) = 1
200  ELSE IF( n.LT.1 ) THEN
201  IF( iam.EQ.0 )
202  $ WRITE( nout, fmt = 9999 ) 'MATRIX', 'N', n
203  ierr( 1 ) = 1
204  END IF
205 *
206 * Make sure no one had error
207 *
208  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
209 *
210  IF( ierr( 1 ).GT.0 ) THEN
211  IF( iam.EQ.0 )
212  $ WRITE( nout, fmt = 9997 ) 'matrix'
213  kskip = kskip + 1
214  GO TO 80
215  END IF
216 *
217 * Loop over different blocking sizes
218 *
219  DO 70 k = 1, nnb
220 *
221  nb = nbval( k )
222 *
223 * Make sure nb is legal
224 *
225  ierr( 1 ) = 0
226  IF( nb.LT.1 ) THEN
227  ierr( 1 ) = 1
228  IF( iam.EQ.0 )
229  $ WRITE( nout, fmt = 9999 ) 'NB', 'NB', nb
230  END IF
231 *
232 * Check all processes for an error
233 *
234  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
235 *
236  IF( ierr( 1 ).GT.0 ) THEN
237  IF( iam.EQ.0 )
238  $ WRITE( nout, fmt = 9997 ) 'NB'
239  kskip = kskip + 1
240  GO TO 70
241  END IF
242 *
243 * Padding constants
244 *
245  mp = numroc( m, nb, myrow, 0, nprow )
246  mq = numroc( m, nb, mycol, 0, npcol )
247  np = numroc( n, nb, myrow, 0, nprow )
248  mnp = max( mp, np )
249  nq = numroc( n, nb, mycol, 0, npcol )
250 *
251  IF( check ) THEN
252  iprepad = max( nb, mp )
253  imidpad = nb
254  ipostpad = max( nb, nq )
255  ELSE
256  iprepad = 0
257  imidpad = 0
258  ipostpad = 0
259  END IF
260 *
261 * Initialize the array descriptor for the matrix A
262 *
263  CALL descinit( desca, m, n, nb, nb, 0, 0, ictxt,
264  $ max( 1, mp ) + imidpad, ierr( 1 ) )
265 *
266 * Check all processes for an error
267 *
268  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1, 0 )
269 *
270  IF( ierr( 1 ).LT.0 ) THEN
271  IF( iam.EQ.0 )
272  $ WRITE( nout, fmt = 9997 ) 'descriptor'
273  kskip = kskip + 1
274  GO TO 70
275  END IF
276 *
277  DO 60 iscale = 1, 3
278 *
279  itype = iscale
280 *
281 * Assign pointers into MEM for SCALAPACK arrays, A is
282 * allocated starting at position MEM( IPREPAD+1 )
283 *
284  ipa = iprepad + 1
285  ipx = ipa + desca( lld_ )*nq + ipostpad + iprepad
286  ipw = ipx
287 *
288  worksiz = nq + ipostpad
289 *
290 * Check for adequate memory for problem size
291 *
292  ierr( 1 ) = 0
293  IF( ( ipw+worksiz ).GT.memsiz ) THEN
294  IF( iam.EQ.0 )
295  $ WRITE( nout, fmt = 9996 ) 'MEMORY',
296  $ ( ipx+worksiz )*dblesz
297  ierr( 1 ) = 1
298  END IF
299 *
300 * Check all processes for an error
301 *
302  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr, 1, -1,
303  $ 0 )
304 *
305  IF( ierr( 1 ).GT.0 ) THEN
306  IF( iam.EQ.0 )
307  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
308  kskip = kskip + 1
309  GO TO 70
310  END IF
311 *
312  IF( check ) THEN
313  CALL pdfillpad( ictxt, mp, nq, mem( ipa-iprepad ),
314  $ desca( lld_ ), iprepad, ipostpad,
315  $ padval )
316  CALL pdfillpad( ictxt, worksiz-ipostpad, 1,
317  $ mem( ipw-iprepad ),
318  $ worksiz-ipostpad, iprepad,
319  $ ipostpad, padval )
320  END IF
321 *
322 * Generate the matrix A and calculate its 1-norm
323 *
324  CALL pdqrt13( iscale, m, n, mem( ipa ), 1, 1,
325  $ desca, anorm, iaseed, mem( ipw ) )
326 *
327  IF( check ) THEN
328  CALL pdchekpad( ictxt, 'PDQRT13', mp, nq,
329  $ mem( ipa-iprepad ), desca( lld_ ),
330  $ iprepad, ipostpad, padval )
331  CALL pdchekpad( ictxt, 'PDQRT13',
332  $ worksiz-ipostpad, 1,
333  $ mem( ipw-iprepad ),
334  $ worksiz-ipostpad, iprepad,
335  $ ipostpad, padval )
336  END IF
337 *
338  DO 50 itran = 1, 2
339 *
340  IF( itran.EQ.1 ) THEN
341  nrows = m
342  ncols = n
343  trans = 'N'
344  tpsd = .false.
345  ELSE
346  nrows = n
347  ncols = m
348  trans = 'T'
349  tpsd = .true.
350  END IF
351 *
352 * Loop over the different values for NRHS
353 *
354  DO 40 hh = 1, nnr
355 *
356  nrhs = nrval( hh )
357 *
358  DO 30 kk = 1, nnbr
359 *
360  nbrhs = nbrval( kk )
361 *
362  nrhsp = numroc( nrhs, nbrhs, myrow, 0,
363  $ nprow )
364  nrhsq = numroc( nrhs, nbrhs, mycol, 0,
365  $ npcol )
366 *
367 * Define Array descriptor for rhs MAX(M,N)xNRHS
368 *
369  CALL descinit( descx, max( m, n ), nrhs, nb,
370  $ nbrhs, 0, 0, ictxt,
371  $ max( 1, mnp ) + imidpad,
372  $ ierr( 1 ) )
373  IF( tpsd ) THEN
374  CALL descinit( descw, m, nrhs, nb, nbrhs,
375  $ 0, 0, ictxt, max( 1, mp ) +
376  $ imidpad, ierr( 2 ) )
377  ELSE
378  CALL descinit( descw, n, nrhs, nb, nbrhs,
379  $ 0, 0, ictxt, max( 1, np ) +
380  $ imidpad, ierr( 2 ) )
381  END IF
382 *
383 * Check all processes for an error
384 *
385  CALL igsum2d( ictxt, 'All', ' ', 2, 1, ierr,
386  $ 2, -1, 0 )
387 *
388  IF( ierr( 1 ).LT.0 .OR. ierr( 2 ).LT.0 ) THEN
389  IF( iam.EQ.0 )
390  $ WRITE( nout, fmt = 9997 ) 'descriptor'
391  kskip = kskip + 1
392  GO TO 30
393  END IF
394 *
395 * Check for enough memory
396 *
397  ipx = ipa + desca( lld_ )*nq + ipostpad +
398  $ iprepad
399  ipw = ipx + descx( lld_ )*nrhsq + ipostpad +
400  $ iprepad
401  worksiz = descw( lld_ )*nrhsq + ipostpad
402 *
403  ierr( 1 ) = 0
404  IF( ipw+worksiz.GT.memsiz ) THEN
405  IF( iam.EQ.0 )
406  $ WRITE( nout, fmt = 9996 ) 'Generation',
407  $ ( ipw+worksiz )*dblesz
408  ierr( 1 ) = 1
409  END IF
410 *
411 * Check all processes for an error
412 *
413  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
414  $ 1, -1, 0 )
415 *
416  IF( ierr( 1 ).GT.0 ) THEN
417  IF( iam.EQ.0 )
418  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
419  kskip = kskip + 1
420  GO TO 30
421  END IF
422 *
423 * Generate RHS
424 *
425  IF( tpsd ) THEN
426  CALL pdmatgen( ictxt, 'No', 'No',
427  $ descw( m_ ), descw( n_ ),
428  $ descw( mb_ ), descw( nb_ ),
429  $ mem( ipw ), descw( lld_ ),
430  $ descw( rsrc_ ),
431  $ descw( csrc_ ), ibseed, 0,
432  $ mp, 0, nrhsq, myrow, mycol,
433  $ nprow, npcol )
434  ELSE
435  CALL pdmatgen( ictxt, 'No', 'No',
436  $ descw( m_ ), descw( n_ ),
437  $ descw( mb_ ), descw( nb_ ),
438  $ mem( ipw ), descw( lld_ ),
439  $ descw( rsrc_ ),
440  $ descw( csrc_ ), ibseed, 0,
441  $ np, 0, nrhsq, myrow, mycol,
442  $ nprow, npcol )
443  END IF
444 *
445  IF( check ) THEN
446  CALL pdfillpad( ictxt, mnp, nrhsq,
447  $ mem( ipx-iprepad ),
448  $ descx( lld_ ), iprepad,
449  $ ipostpad, padval )
450  IF( tpsd ) THEN
451  CALL pdfillpad( ictxt, mp, nrhsq,
452  $ mem( ipw-iprepad ),
453  $ descw( lld_ ), iprepad,
454  $ ipostpad, padval )
455  ELSE
456  CALL pdfillpad( ictxt, np, nrhsq,
457  $ mem( ipw-iprepad ),
458  $ descw( lld_ ), iprepad,
459  $ ipostpad, padval )
460  END IF
461  END IF
462 *
463  DO 10 jj = 1, nrhs
464  CALL pdnrm2( ncols, bnorm, mem( ipw ), 1,
465  $ jj, descw, 1 )
466  IF( bnorm.GT.zero )
467  $ CALL pdscal( ncols, one / bnorm,
468  $ mem( ipw ), 1, jj, descw,
469  $ 1 )
470  10 CONTINUE
471 *
472  CALL pdgemm( trans, 'N', nrows, nrhs, ncols,
473  $ one, mem( ipa ), 1, 1, desca,
474  $ mem( ipw ), 1, 1, descw, zero,
475  $ mem( ipx ), 1, 1, descx )
476 *
477  IF( check ) THEN
478 *
479 * check for memory overwrite
480 *
481  CALL pdchekpad( ictxt, 'Generation', mp,
482  $ nq, mem( ipa-iprepad ),
483  $ desca( lld_ ), iprepad,
484  $ ipostpad, padval )
485  CALL pdchekpad( ictxt, 'Generation', mnp,
486  $ nrhsq, mem( ipx-iprepad ),
487  $ descx( lld_ ), iprepad,
488  $ ipostpad, padval )
489  IF( tpsd ) THEN
490  CALL pdchekpad( ictxt, 'Generation',
491  $ mp, nrhsq,
492  $ mem( ipw-iprepad ),
493  $ descw( lld_ ), iprepad,
494  $ ipostpad, padval )
495  ELSE
496  CALL pdchekpad( ictxt, 'Generation',
497  $ np, nrhsq,
498  $ mem( ipw-iprepad ),
499  $ descw( lld_ ), iprepad,
500  $ ipostpad, padval )
501  END IF
502 *
503 * Allocate space for copy of rhs
504 *
505  ipb = ipw
506 *
507  IF( tpsd ) THEN
508  CALL descinit( descb, n, nrhs, nb,
509  $ nbrhs, 0, 0, ictxt,
510  $ max( 1, np ) + imidpad,
511  $ ierr( 1 ) )
512  ELSE
513  CALL descinit( descb, m, nrhs, nb,
514  $ nbrhs, 0, 0, ictxt,
515  $ max( 1, mp ) + imidpad,
516  $ ierr( 1 ) )
517  END IF
518 *
519 * Check all processes for an error
520 *
521  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
522  $ ierr, 1, -1, 0 )
523 *
524  IF( ierr( 1 ).LT.0 ) THEN
525  IF( iam.EQ.0 )
526  $ WRITE( nout, fmt = 9997 )
527  $ 'descriptor'
528  kskip = kskip + 1
529  GO TO 30
530  END IF
531 *
532  ipw = ipb + descb( lld_ )*nrhsq +
533  $ ipostpad + iprepad
534 *
535  END IF
536 *
537 * Calculate the amount of workspace for PDGELS
538 *
539  IF( m.GE.n ) THEN
540  ltau = numroc( min(m,n), nb, mycol, 0,
541  $ npcol )
542  lwf = nb * ( mp + nq + nb )
543  lws = max( ( nb*( nb - 1 ) ) / 2,
544  $ ( mp + nrhsq ) * nb ) + nb*nb
545  ELSE
546  lcm = ilcm( nprow, npcol )
547  lcmp = lcm / nprow
548  ltau = numroc( min(m,n), nb, myrow, 0,
549  $ nprow )
550  lwf = nb * ( mp + nq + nb )
551  lws = max( ( nb*( nb - 1 ) ) / 2, ( np +
552  $ max( nq + numroc( numroc( n, nb, 0,
553  $ 0, nprow ), nb, 0, 0, lcmp ),
554  $ nrhsq ) ) * nb ) + nb*nb
555  END IF
556 *
557  lwork = ltau + max( lwf, lws )
558  worksiz = lwork + ipostpad
559 *
560 * Check for adequate memory for problem size
561 *
562  ierr( 1 ) = 0
563  IF( ipw+worksiz.GT.memsiz ) THEN
564  IF( iam.EQ.0 )
565  $ WRITE( nout, fmt = 9996 ) 'solve',
566  $ ( ipw+worksiz )*dblesz
567  ierr( 1 ) = 1
568  END IF
569 *
570 * Check all processes for an error
571 *
572  CALL igsum2d( ictxt, 'All', ' ', 1, 1, ierr,
573  $ 1, -1, 0 )
574 *
575  IF( ierr( 1 ).GT.0 ) THEN
576  IF( iam.EQ.0 )
577  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
578  kskip = kskip + 1
579  GO TO 30
580  END IF
581 *
582  IF( check ) THEN
583 *
584 * Make the copy of the right hand side
585 *
586  CALL pdlacpy( 'All', nrows, nrhs,
587  $ mem( ipx ), 1, 1, descx,
588  $ mem( ipb ), 1, 1, descb )
589 *
590  IF( tpsd ) THEN
591  CALL pdfillpad( ictxt, np, nrhsq,
592  $ mem( ipb-iprepad ),
593  $ descb( lld_ ), iprepad,
594  $ ipostpad, padval )
595  ELSE
596  CALL pdfillpad( ictxt, mp, nrhsq,
597  $ mem( ipb-iprepad ),
598  $ descb( lld_ ), iprepad,
599  $ ipostpad, padval )
600  END IF
601  CALL pdfillpad( ictxt, lwork, 1,
602  $ mem( ipw-iprepad ),
603  $ lwork, iprepad,
604  $ ipostpad, padval )
605  END IF
606 *
607  CALL slboot( )
608  CALL blacs_barrier( ictxt, 'All' )
609  CALL sltimer( 1 )
610 *
611 * Solve the LS or overdetermined system
612 *
613  CALL pdgels( trans, m, n, nrhs, mem( ipa ),
614  $ 1, 1, desca, mem( ipx ), 1, 1,
615  $ descx, mem( ipw ), lwork, info )
616 *
617  CALL sltimer( 1 )
618 *
619  IF( check ) THEN
620 *
621 * check for memory overwrite
622 *
623  CALL pdchekpad( ictxt, 'PDGELS', mp,
624  $ nq, mem( ipa-iprepad ),
625  $ desca( lld_ ), iprepad,
626  $ ipostpad, padval )
627  CALL pdchekpad( ictxt, 'PDGELS', mnp,
628  $ nrhsq, mem( ipx-iprepad ),
629  $ descx( lld_ ), iprepad,
630  $ ipostpad, padval )
631  CALL pdchekpad( ictxt, 'PDGELS', lwork,
632  $ 1, mem( ipw-iprepad ),
633  $ lwork, iprepad,
634  $ ipostpad, padval )
635  END IF
636 *
637 * Regenerate A in place for testing and next
638 * iteration
639 *
640  CALL pdqrt13( iscale, m, n, mem( ipa ), 1, 1,
641  $ desca, anorm, iaseed,
642  $ mem( ipw ) )
643 *
644 * check the solution to rhs
645 *
646  IF( check ) THEN
647 *
648 * Am I going to call PDQRT17 ?
649 *
650  IF( ( m.GE.n .AND. ( .NOT.tpsd ) ) .OR.
651  $ ( m.LT.n .AND. tpsd ) ) THEN
652 *
653 * Call PDQRT17 first, A, X, and B remain
654 * unchanged. Solving LS system
655 *
656 * Check amount of memory for PDQRT17
657 *
658  IF( tpsd ) THEN
659  worksiz = np*nrhsq + nrhsp*mq
660  ipw2 = ipw + worksiz
661  worksiz = worksiz +
662  $ max( nq, max( mq, nrhsq ) ) +
663  $ ipostpad
664  ELSE
665  worksiz = mp*nrhsq + nrhsp*nq
666  ipw2 = ipw + worksiz
667  worksiz = worksiz +
668  $ max( nq, nrhsq ) +
669  $ ipostpad
670  END IF
671 *
672 * Check for adequate memory for problem
673 * size
674 *
675  ierr( 1 ) = 0
676  IF( ( ipw+worksiz ).GT.memsiz ) THEN
677  IF( iam.EQ.0 )
678  $ WRITE( nout, fmt = 9996 )
679  $ 'MEMORY', ( ipw+worksiz )*dblesz
680  ierr( 1 ) = 1
681  END IF
682 *
683 * Check all processes for an error
684 *
685  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
686  $ ierr, 1, -1, 0 )
687 *
688  IF( ierr( 1 ).GT.0 ) THEN
689  IF( iam.EQ.0 )
690  $ WRITE( nout, fmt = 9997 )
691  $ 'MEMORY'
692  kskip = kskip + 1
693  GO TO 30
694  END IF
695 *
696  CALL pdfillpad( ictxt,
697  $ worksiz-ipostpad, 1,
698  $ mem( ipw-iprepad ),
699  $ worksiz-ipostpad,
700  $ iprepad, ipostpad,
701  $ padval )
702 *
703  result( 2 ) = pdqrt17( trans, 1, m, n,
704  $ nrhs,
705  $ mem( ipa ),
706  $ 1, 1, desca,
707  $ mem( ipx ), 1,
708  $ 1, descx,
709  $ mem( ipb ),
710  $ 1, 1, descb,
711  $ mem( ipw ),
712  $ mem( ipw2 ) )
713  sresid = result( 2 )
714 *
715  CALL pdchekpad( ictxt, 'PDQRT17',
716  $ mp, nq,
717  $ mem( ipa-iprepad ),
718  $ desca( lld_ ),
719  $ iprepad, ipostpad,
720  $ padval )
721  CALL pdchekpad( ictxt, 'PDQRT17',
722  $ mnp, nrhsq,
723  $ mem( ipx-iprepad ),
724  $ descx( lld_ ), iprepad,
725  $ ipostpad, padval )
726  IF( tpsd ) THEN
727  CALL pdchekpad( ictxt, 'PDQRT17',
728  $ np, nrhsq,
729  $ mem( ipb-iprepad ),
730  $ descb( lld_ ),
731  $ iprepad, ipostpad,
732  $ padval )
733  ELSE
734  CALL pdchekpad( ictxt, 'PDQRT17',
735  $ mp, nrhsq,
736  $ mem( ipb-iprepad ),
737  $ descb( lld_ ),
738  $ iprepad, ipostpad,
739  $ padval )
740  END IF
741  CALL pdchekpad( ictxt, 'PDQRT17',
742  $ worksiz-ipostpad, 1,
743  $ mem( ipw-iprepad ),
744  $ worksiz-ipostpad,
745  $ iprepad, ipostpad,
746  $ padval )
747  END IF
748 *
749 * Call PDQRT16, B will be destroyed.
750 *
751  IF( tpsd ) THEN
752  worksiz = mp + ipostpad
753  ELSE
754  worksiz = nq + ipostpad
755  END IF
756 *
757 * Check for adequate memory for problem size
758 *
759  ierr( 1 ) = 0
760  IF( ( ipw+worksiz ).GT.memsiz ) THEN
761  IF( iam.EQ.0 )
762  $ WRITE( nout, fmt = 9996 ) 'MEMORY',
763  $ ( ipw+worksiz )*dblesz
764  ierr( 1 ) = 1
765  END IF
766 *
767 * Check all processes for an error
768 *
769  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
770  $ ierr, 1, -1, 0 )
771 *
772  IF( ierr( 1 ).GT.0 ) THEN
773  IF( iam.EQ.0 )
774  $ WRITE( nout, fmt = 9997 ) 'MEMORY'
775  kskip = kskip + 1
776  GO TO 30
777  END IF
778 *
779  CALL pdfillpad( ictxt,
780  $ worksiz-ipostpad, 1,
781  $ mem( ipw-iprepad ),
782  $ worksiz-ipostpad,
783  $ iprepad, ipostpad,
784  $ padval )
785 *
786  CALL pdqrt16( trans, m, n, nrhs,
787  $ mem( ipa ), 1, 1, desca,
788  $ mem( ipx ), 1, 1, descx,
789  $ mem( ipb ), 1, 1, descb,
790  $ mem( ipw ), result( 1 ) )
791 *
792  CALL pdchekpad( ictxt, 'PDQRT16',
793  $ mp, nq,
794  $ mem( ipa-iprepad ),
795  $ desca( lld_ ),
796  $ iprepad, ipostpad,
797  $ padval )
798  CALL pdchekpad( ictxt, 'PDQRT16',
799  $ mnp, nrhsq,
800  $ mem( ipx-iprepad ),
801  $ descx( lld_ ), iprepad,
802  $ ipostpad, padval )
803  IF( tpsd ) THEN
804  CALL pdchekpad( ictxt, 'PDQRT16',
805  $ np, nrhsq,
806  $ mem( ipb-iprepad ),
807  $ descb( lld_ ),
808  $ iprepad, ipostpad,
809  $ padval )
810  ELSE
811  CALL pdchekpad( ictxt, 'PDQRT16',
812  $ mp, nrhsq,
813  $ mem( ipb-iprepad ),
814  $ descb( lld_ ),
815  $ iprepad, ipostpad,
816  $ padval )
817  END IF
818  CALL pdchekpad( ictxt, 'PDQRT16',
819  $ worksiz-ipostpad, 1,
820  $ mem( ipw-iprepad ),
821  $ worksiz-ipostpad,
822  $ iprepad, ipostpad,
823  $ padval )
824 *
825 * Call PDQRT14
826 *
827  IF( ( m.GE.n .AND. tpsd ) .OR.
828  $ ( m.LT.n .AND. ( .NOT.tpsd ) ) ) THEN
829 *
830  ipw = ipb
831 *
832  IF( tpsd ) THEN
833 *
834  nnrhsq = numroc( n+nrhs, nb, mycol,
835  $ 0, npcol )
836  ltau = numroc( min( m, n+nrhs ), nb,
837  $ mycol, 0, npcol )
838  lwf = nb * ( nb + mp + nnrhsq )
839  worksiz = mp * nnrhsq + ltau + lwf +
840  $ ipostpad
841 *
842  ELSE
843 *
844  mnrhsp = numroc( m+nrhs, nb, myrow,
845  $ 0, nprow )
846  ltau = numroc( min( m+nrhs, n ), nb,
847  $ myrow, 0, nprow )
848  lwf = nb * ( nb + mnrhsp + nq )
849  worksiz = mnrhsp * nq + ltau + lwf +
850  $ ipostpad
851 *
852  END IF
853 *
854 * Check for adequate memory for problem
855 * size
856 *
857  ierr( 1 ) = 0
858  IF( ( ipw+worksiz ).GT.memsiz ) THEN
859  IF( iam.EQ.0 )
860  $ WRITE( nout, fmt = 9996 )
861  $ 'MEMORY', ( ipw+worksiz )*dblesz
862  ierr( 1 ) = 1
863  END IF
864 *
865 * Check all processes for an error
866 *
867  CALL igsum2d( ictxt, 'All', ' ', 1, 1,
868  $ ierr, 1, -1, 0 )
869 *
870  IF( ierr( 1 ).GT.0 ) THEN
871  IF( iam.EQ.0 )
872  $ WRITE( nout, fmt = 9997 )
873  $ 'MEMORY'
874  kskip = kskip + 1
875  GO TO 30
876  END IF
877 *
878  CALL pdfillpad( ictxt,
879  $ worksiz-ipostpad, 1,
880  $ mem( ipw-iprepad ),
881  $ worksiz-ipostpad,
882  $ iprepad, ipostpad,
883  $ padval )
884 *
885 * Solve underdetermined system
886 *
887  result( 2 ) = pdqrt14( trans, m, n,
888  $ nrhs,
889  $ mem( ipa ), 1,
890  $ 1, desca,
891  $ mem( ipx ),
892  $ 1, 1, descx,
893  $ mem( ipw ) )
894  sresid = result( 2 )
895 *
896  CALL pdchekpad( ictxt, 'PDQRT14',
897  $ mp, nq,
898  $ mem( ipa-iprepad ),
899  $ desca( lld_ ),
900  $ iprepad, ipostpad,
901  $ padval )
902  CALL pdchekpad( ictxt, 'PDQRT14',
903  $ mnp, nrhsq,
904  $ mem( ipx-iprepad ),
905  $ descx( lld_ ), iprepad,
906  $ ipostpad, padval )
907  CALL pdchekpad( ictxt, 'PDQRT14',
908  $ worksiz-ipostpad, 1,
909  $ mem( ipw-iprepad ),
910  $ worksiz-ipostpad,
911  $ iprepad, ipostpad,
912  $ padval )
913  END IF
914 *
915 * Print information about the tests that
916 * did not pass the threshold.
917 *
918  passed = 'PASSED'
919  DO 20 ii = 1, 2
920  IF( ( result( ii ).GE.thresh ) .AND.
921  $ ( result( ii )-result( ii ).EQ.0.0e+0
922  $ ) ) THEN
923  IF( iam.EQ.0 )
924  $ WRITE( nout, fmt = 9986 )trans,
925  $ m, n, nrhs, nb, itype, ii,
926  $ result( ii )
927  kfail = kfail + 1
928  passed = 'FAILED'
929  ELSE
930  kpass = kpass + 1
931  END IF
932  20 CONTINUE
933 *
934  ELSE
935 *
936 * By-pass the solve check
937 *
938  kpass = kpass + 1
939  sresid = sresid - sresid
940  passed = 'BYPASS'
941 *
942  END IF
943 *
944 * Gather maximum of all CPU and WALL clock
945 * timings
946 *
947  CALL slcombine( ictxt, 'All', '>', 'W', 1, 1,
948  $ wtime )
949  CALL slcombine( ictxt, 'All', '>', 'C', 1, 1,
950  $ ctime )
951 *
952 * Print results
953 *
954  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
955  addfac = 1
956  mulfac = 1
957  IF( m.GE.n ) THEN
958 *
959 * NOPS = DOPLA( 'DGEQRF', M, N, 0, 0,
960 * NB ) + DOPLA( 'DORMQR', M, NRHS, N,
961 * 0, NB )
962 *
963  mults = n*( ( ( 23.d0 / 6.d0 )+m+n /
964  $ 2.d0 )+ n*( m-n / 3.d0 ) ) +
965  $ n*nrhs*( 2.d0*m+2.d0-n )
966  adds = n*( ( 5.d0 / 6.d0 )+n*
967  $ ( 1.d0 / 2.d0+( m-n / 3.d0 ) ) )
968  $ + n*nrhs*( 2.d0*m+1.d0-n )
969  ELSE
970 *
971 * NOPS = DOPLA( 'DGELQF', M, N, 0, 0,
972 * NB ) + DOPLA( 'DORMLQ', M,
973 * NRHS, N, 0, NB )
974 *
975  mults = m*( ( ( 29.d0 / 6.d0 )+2.d0*n-m
976  $ / 2.d0 )+m*( n-m / 3.d0 ) )
977  $ + n*nrhs*( 2.d0*m+2.d0-n )
978  adds = m*( ( 5.d0 / 6.d0 )+m / 2.d0+m*
979  $ ( n-m / 3.d0 ) )
980  $ + n*nrhs*( 2.d0*m+1.d0-n )
981  END IF
982  nops = addfac*adds + mulfac*mults
983 *
984 * Calculate total megaflops, for WALL and
985 * CPU time, and print output
986 *
987 * Print WALL time if machine supports it
988 *
989  IF( wtime( 1 ).GT.0.0d+0 ) THEN
990  tmflops = nops / ( wtime( 1 )*1.0d+6 )
991  ELSE
992  tmflops = 0.0d+0
993  END IF
994 *
995  IF( wtime( 1 ).GE.0.0d+0 )
996  $ WRITE( nout, fmt = 9993 )
997  $ 'WALL', trans, m, n, nb, nrhs,
998  $ nbrhs, nprow, npcol, wtime( 1 ),
999  $ tmflops, passed
1000 *
1001 * Print CPU time if machine supports it
1002 *
1003  IF( ctime( 1 ).GT.0.0d+0 ) THEN
1004  tmflops = nops / ( ctime( 1 )*1.0d+6 )
1005  ELSE
1006  tmflops = 0.0d+0
1007  END IF
1008 *
1009  IF( ctime( 1 ).GE.0.0d+0 )
1010  $ WRITE( nout, fmt = 9993 )
1011  $ 'CPU ', trans, m, n, nb, nrhs,
1012  $ nbrhs, nprow, npcol, ctime( 1 ),
1013  $ tmflops, passed
1014  END IF
1015  30 CONTINUE
1016  40 CONTINUE
1017  50 CONTINUE
1018  60 CONTINUE
1019  70 CONTINUE
1020  80 CONTINUE
1021  CALL blacs_gridexit( ictxt )
1022  90 CONTINUE
1023 *
1024 * Print out ending messages and close output file
1025 *
1026  IF( iam.EQ.0 ) THEN
1027  ktests = kpass + kfail + kskip
1028  WRITE( nout, fmt = * )
1029  WRITE( nout, fmt = 9992 ) ktests
1030  IF( check ) THEN
1031  WRITE( nout, fmt = 9991 ) kpass
1032  WRITE( nout, fmt = 9989 ) kfail
1033  ELSE
1034  WRITE( nout, fmt = 9990 ) kpass
1035  END IF
1036  WRITE( nout, fmt = 9988 ) kskip
1037  WRITE( nout, fmt = * )
1038  WRITE( nout, fmt = * )
1039  WRITE( nout, fmt = 9987 )
1040  IF( nout.NE.6 .AND. nout.NE.0 )
1041  $ CLOSE ( nout )
1042  END IF
1043 *
1044  CALL blacs_exit( 0 )
1045 *
1046  9999 FORMAT( 'ILLEGAL ', a6, ': ', a5, ' = ', i3,
1047  $ '; It should be at least 1' )
1048  9998 FORMAT( 'ILLEGAL GRID: nprow*npcol = ', i4, '. It can be at most',
1049  $ i4 )
1050  9997 FORMAT( 'Bad ', a6, ' parameters: going on to next test case.' )
1051  9996 FORMAT( 'Unable to perform ', a, ': need TOTMEM of at least',
1052  $ i11 )
1053  9995 FORMAT( 'Time TRANS M N NB NRHS NBRHS P Q ',
1054  $ 'LS Time MFLOPS CHECK' )
1055  9994 FORMAT( '---- ----- ------ ------ --- ----- ----- ----- ----- ',
1056  $ '--------- -------- ------' )
1057  9993 FORMAT( a4, 3x, a1, 3x, i6, 1x, i6, 1x, i3, 1x, i5, 1x, i5, 1x,
1058  $ i5, 1x, i5, 1x, f9.2, 1x, f8.2, 1x, a6 )
1059  9992 FORMAT( 'Finished', i6, ' tests, with the following results:' )
1060  9991 FORMAT( i5, ' tests completed and passed residual checks.' )
1061  9990 FORMAT( i5, ' tests completed without checking.' )
1062  9989 FORMAT( i5, ' tests completed and failed residual checks.' )
1063  9988 FORMAT( i5, ' tests skipped because of illegal input values.' )
1064  9987 FORMAT( 'END OF TESTS.' )
1065  9986 FORMAT( ' TRANS=''', a1, ''', M=', i5, ', N=', i5, ', NRHS=', i4,
1066  $ ', NB=', i4, ', type', i2, ', test(', i2, ')=', g12.5 )
1067 *
1068  stop
1069 *
1070 * End of PDLSDRIVER
1071 *
1072  END
max
#define max(A, B)
Definition: pcgemr.c:180
pdchekpad
subroutine pdchekpad(ICTXT, MESS, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdchekpad.f:3
ilcm
integer function ilcm(M, N)
Definition: ilcm.f:2
sltimer
subroutine sltimer(I)
Definition: sltimer.f:47
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
slboot
subroutine slboot()
Definition: sltimer.f:2
pdmatgen
subroutine pdmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pdmatgen.f:4
pdlange
double precision function pdlange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pdlange.f:3
pdqrt17
double precision function pdqrt17(TRANS, IRESID, M, N, NRHS, A, IA, JA, DESCA, X, IX, JX, DESCX, B, IB, JB, DESCB, WORK, RWORK)
Definition: pdqrt17.f:5
pdlsdriver
program pdlsdriver
Definition: pdlsdriver.f:1
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pdqrt16
subroutine pdqrt16(TRANS, M, N, NRHS, A, IA, JA, DESCA, X, IX, JX, DESCX, B, IB, JB, DESCB, RWORK, RESID)
Definition: pdqrt16.f:3
pdqrt13
subroutine pdqrt13(SCALE, M, N, A, IA, JA, DESCA, NORMA, ISEED, WORK)
Definition: pdqrt13.f:3
pdfillpad
subroutine pdfillpad(ICTXT, M, N, A, LDA, IPRE, IPOST, CHKVAL)
Definition: pdfillpad.f:2
pdgels
subroutine pdgels(TRANS, M, N, NRHS, A, IA, JA, DESCA, B, IB, JB, DESCB, WORK, LWORK, INFO)
Definition: pdgels.f:3
pdqrt14
double precision function pdqrt14(TRANS, M, N, NRHS, A, IA, JA, DESCA, X, IX, JX, DESCX, WORK)
Definition: pdqrt14.f:3
pdlacpy
subroutine pdlacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pdlacpy.f:3
slcombine
subroutine slcombine(ICTXT, SCOPE, OP, TIMETYPE, N, IBEG, TIMES)
Definition: sltimer.f:267
min
#define min(A, B)
Definition: pcgemr.c:181
iceil
integer function iceil(INUM, IDENOM)
Definition: iceil.f:2
pdlsinfo
subroutine pdlsinfo(SUMMRY, NOUT, NMAT, MVAL, LDMVAL, NVAL, LDNVAL, NNB, NBVAL, LDNBVAL, NNR, NRVAL, LDNRVAL, NNBR, NBRVAL, LDNBRVAL, NGRIDS, PVAL, LDPVAL, QVAL, LDQVAL, THRESH, WORK, IAM, NPROCS)
Definition: pdlsinfo.f:6