SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pitrmr.c
Go to the documentation of this file.
1#include "redist.h"
158#define static2 static
159#if defined(Add_) || defined(f77IsF2C)
160#define fortran_mr2d pitrmr2do_
161#define fortran_mr2dnew pitrmr2d_
162#elif defined(UpCase)
163#define fortran_mr2dnew PITRMR2D
164#define fortran_mr2d PITRMR2DO
165#define icopy_ ICOPY
166#define ilacpy_ ILACPY
167#else
168#define fortran_mr2d pitrmr2do
169#define fortran_mr2dnew pitrmr2d
170#define icopy_ icopy
171#define ilacpy_ ilacpy
172#endif
173#define Clacpy Citrlacpy
174void Clacpy();
175typedef struct {
176 Int desctype;
177 Int ctxt;
178 Int m;
179 Int n;
180 Int nbrow;
181 Int nbcol;
182 Int sprow;
183 Int spcol;
184 Int lda;
185} MDESC;
186#define BLOCK_CYCLIC_2D 1
187typedef struct {
188 Int gstart;
189 Int len;
190} IDESC;
191#define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
192#define max(A,B) ((A)>(B)?(A):(B))
193#define min(A,B) ((A)>(B)?(B):(A))
194#define DIVUP(a,b) ( ((a)-1) /(b)+1)
195#define ROUNDUP(a,b) (DIVUP(a,b)*(b))
196#ifdef MALLOCDEBUG
197#define malloc mymalloc
198#define free myfree
199#define realloc myrealloc
200#endif
201/* Cblacs */
202extern void Cblacs_pcoord();
204extern void Csetpvmtids();
205extern void Cblacs_get();
206extern void Cblacs_pinfo();
207extern void Cblacs_gridinfo();
208extern void Cblacs_gridinit();
209extern void Cblacs_exit();
210extern void Cblacs_gridexit();
211extern void Cblacs_setup();
212extern void Cigebs2d();
213extern void Cigebr2d();
214extern void Cigesd2d();
215extern void Cigerv2d();
216extern void Cigsum2d();
217extern void Cigamn2d();
218extern void Cigamx2d();
219extern void Cigesd2d();
220extern void Cigerv2d();
221/* lapack */
222void ilacpy_();
223/* aux fonctions */
225extern void *mr2d_malloc();
226extern Int ppcm();
227extern Int localsize();
230extern void paramcheck();
231/* tools and others function */
232#define scanD0 itrscanD0
233#define dispmat itrdispmat
234#define setmemory itrsetmemory
235#define freememory itrfreememory
236#define scan_intervals itrscan_intervals
237extern void scanD0();
238extern void dispmat();
239extern void setmemory();
240extern void freememory();
241extern Int scan_intervals();
242extern void Cpitrmr2do();
243extern void Cpitrmr2d();
244/* some defines for Cpitrmr2do */
245#define SENDBUFF 0
246#define RECVBUFF 1
247#define SIZEBUFF 2
248#if 0
249#define DEBUG
250#endif
251#ifndef DEBUG
252#define NDEBUG
253#endif
254#include <stdio.h>
255#include <stdlib.h>
256#include <assert.h>
257#define DESCLEN 9
258void
259fortran_mr2d(char *uplo, char *diag, Int *m, Int *n, Int *A, Int *ia, Int *ja, Int desc_A[DESCLEN],
260 Int *B, Int *ib, Int *jb, Int desc_B[DESCLEN])
261{
262 Cpitrmr2do(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A,
263 B, *ib, *jb, (MDESC *) desc_B);
264 return;
265}
266void
267fortran_mr2dnew(char *uplo, char *diag, Int *m, Int *n, Int *A, Int *ia, Int *ja, Int desc_A[DESCLEN],
268 Int *B, Int *ib, Int *jb, Int desc_B[DESCLEN], Int *gcontext)
269{
270 Cpitrmr2d(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A,
271 B, *ib, *jb, (MDESC *) desc_B, *gcontext);
272 return;
273}
279void
280Cpitrmr2do(uplo, diag, m, n,
281 ptrmyblock, ia, ja, ma,
282 ptrmynewblock, ib, jb, mb)
283 char *uplo, *diag;
284 Int *ptrmyblock, *ptrmynewblock;
285/* pointers to the memory location of the matrix and the redistributed matrix */
286 MDESC *ma;
287 MDESC *mb;
288 Int ia, ja, ib, jb, m, n;
289{
290 Int dummy, nprocs;
291 Int gcontext;
292 /* first we initialize a global grid which serve as a reference to
293 * communicate from grid a to grid b */
294 Cblacs_pinfo(&dummy, &nprocs);
295 Cblacs_get((Int)0, (Int)0, &gcontext);
296 Cblacs_gridinit(&gcontext, "R", (Int)1, nprocs);
297 Cpitrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, ma,
298 ptrmynewblock, ib, jb, mb, gcontext);
299 Cblacs_gridexit(gcontext);
300}
301#define NBPARAM 20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis
302 * idem B puis ia,ja puis ib,jb */
303#define MAGIC_MAX 100000000
304void
305Cpitrmr2d(uplo, diag, m, n,
306 ptrmyblock, ia, ja, ma,
307 ptrmynewblock, ib, jb, mb, globcontext)
308 char *uplo, *diag;
309 Int *ptrmyblock, *ptrmynewblock;
310/* pointers to the memory location of the matrix and the redistributed matrix */
311 MDESC *ma;
312 MDESC *mb;
313 Int ia, ja, ib, jb, m, n, globcontext;
314{
315 Int *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0;
316 Int *recvptr;
317 MDESC newa, newb;
318 Int *proc0, *proc1, *param;
319 Int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs;
320 Int i, j;
321 Int nprow, npcol, gcontext;
322 Int recvsize, sendsize;
323 IDESC *h_inter; /* to store the horizontal intersections */
324 IDESC *v_inter; /* to store the vertical intersections */
325 Int hinter_nb, vinter_nb; /* number of intrsections in both directions */
326 Int dummy;
327 Int p0, q0, p1, q1;
328 Int *ra, *ca;
329 /* end of variables */
330 /* To simplify further calcul we change the matrix indexation from
331 * 1..m,1..n (fortran) to 0..m-1,0..n-1 */
332 if (m == 0 || n == 0)
333 return;
334 ia -= 1;
335 ja -= 1;
336 ib -= 1;
337 jb -= 1;
338 Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum);
339 gcontext = globcontext;
340 nprocs = nprow * npcol;
341 /* if the global context that is given to us has not the shape of a line
342 * (nprow != 1), create a new context. TODO: to be optimal, we should
343 * avoid this because it is an uncessary synchronisation */
344 if (nprow != 1) {
345 gridreshape(&gcontext);
346 Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum);
347 }
348 Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0);
349 /* compatibility T3D, must check myprow and mypcol are within bounds */
350 if (myprow0 >= p0 || mypcol0 >= q0)
351 myprow0 = mypcol0 = -1;
352 assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
353 Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1);
354 if (myprow1 >= p1 || mypcol1 >= q1)
355 myprow1 = mypcol1 = -1;
356 assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
357 /* exchange the missing parameters among the processors: shape of grids and
358 * location of the processors */
359 param = (Int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(Int));
360 ra = param + nprocs * 2 + NBPARAM;
361 ca = param + (nprocs * 2 + NBPARAM) * 2;
362 for (i = 0; i < nprocs * 2 + NBPARAM; i++)
363 param[i] = MAGIC_MAX;
364 proc0 = param + NBPARAM;
365 proc1 = param + NBPARAM + nprocs;
366 /* we calulate proc0 and proc1 that will give the number of a proc in
367 * respectively a or b in the global context */
368 if (myprow0 >= 0) {
369 proc0[myprow0 * q0 + mypcol0] = mypnum;
370 param[0] = p0;
371 param[1] = q0;
372 param[4] = ma->m;
373 param[5] = ma->n;
374 param[6] = ma->nbrow;
375 param[7] = ma->nbcol;
376 param[8] = ma->sprow;
377 param[9] = ma->spcol;
378 param[10] = ia;
379 param[11] = ja;
380 }
381 if (myprow1 >= 0) {
382 proc1[myprow1 * q1 + mypcol1] = mypnum;
383 param[2] = p1;
384 param[3] = q1;
385 param[12] = mb->m;
386 param[13] = mb->n;
387 param[14] = mb->nbrow;
388 param[15] = mb->nbcol;
389 param[16] = mb->sprow;
390 param[17] = mb->spcol;
391 param[18] = ib;
392 param[19] = jb;
393 }
394 Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, (Int)1, param, 2 * nprocs + NBPARAM,
395 ra, ca, 2 * nprocs + NBPARAM, (Int)-1, (Int)-1);
396 newa = *ma;
397 newb = *mb;
398 ma = &newa;
399 mb = &newb;
400 if (myprow0 == -1) {
401 p0 = param[0];
402 q0 = param[1];
403 ma->m = param[4];
404 ma->n = param[5];
405 ma->nbrow = param[6];
406 ma->nbcol = param[7];
407 ma->sprow = param[8];
408 ma->spcol = param[9];
409 ia = param[10];
410 ja = param[11];
411 }
412 if (myprow1 == -1) {
413 p1 = param[2];
414 q1 = param[3];
415 mb->m = param[12];
416 mb->n = param[13];
417 mb->nbrow = param[14];
418 mb->nbcol = param[15];
419 mb->sprow = param[16];
420 mb->spcol = param[17];
421 ib = param[18];
422 jb = param[19];
423 }
424 for (i = 0; i < NBPARAM; i++) {
425 if (param[i] == MAGIC_MAX) {
426 fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n");
427 exit(1);
428 }
429 }
430#ifndef NDEBUG
431 for (i = 0; i < p0 * q0; i++)
432 assert(proc0[i] >= 0 && proc0[i] < nprocs);
433 for (i = 0; i < p1 * q1; i++)
434 assert(proc1[i] >= 0 && proc1[i] < nprocs);
435#endif
436 /* check the validity of the parameters */
437 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
438 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
439 /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */
440 {
441 Int decal;
442 ia = changeorigin(myprow0, ma->sprow, p0,
443 ma->nbrow, ia, &decal, &ma->sprow);
444 ptrmyblock += decal;
445 ja = changeorigin(mypcol0, ma->spcol, q0,
446 ma->nbcol, ja, &decal, &ma->spcol);
447 ptrmyblock += decal * ma->lda;
448 ma->m = ia + m;
449 ma->n = ja + n;
450 ib = changeorigin(myprow1, mb->sprow, p1,
451 mb->nbrow, ib, &decal, &mb->sprow);
452 ptrmynewblock += decal;
453 jb = changeorigin(mypcol1, mb->spcol, q1,
454 mb->nbcol, jb, &decal, &mb->spcol);
455 ptrmynewblock += decal * mb->lda;
456 mb->m = ib + m;
457 mb->n = jb + n;
458 if (p0 == 1)
459 ma->nbrow = ma->m;
460 if (q0 == 1)
461 ma->nbcol = ma->n;
462 if (p1 == 1)
463 mb->nbrow = mb->m;
464 if (q1 == 1)
465 mb->nbcol = mb->n;
466#ifndef NDEBUG
467 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
468 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
469#endif
470 }
471 /* We compute the size of the memory buffer ( we choose the worst case,
472 * when the buffer sizes == the memory block sizes). */
473 if (myprow0 >= 0 && mypcol0 >= 0) {
474 /* Initialize pointer variables */
475 setmemory(&ptrsendbuff, memoryblocksize(ma));
476 }; /* if (mypnum < p0 * q0) */
477 if (myprow1 >= 0 && mypcol1 >= 0) {
478 /* Initialize pointer variables */
479 setmemory(&ptrrecvbuff, memoryblocksize(mb));
480 }; /* if (mypnum < p1 * q1) */
481 /* allocing room for the tabs, alloc for the worst case,local_n or local_m
482 * intervals, in fact the worst case should be less, perhaps half that,I
483 * should think of that one day. */
484 h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) *
485 ma->nbcol * sizeof(IDESC));
486 v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow)
487 * ma->nbrow * sizeof(IDESC));
488 /* We go for the scanning of indices. For each processor including mypnum,
489 * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send
490 * it. Then for each processor, we compute the size of message to be
491 * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements
492 * of recvbuff the right place (scanD)(RECVBUFF)) */
493 recvptr = ptrrecvbuff;
494 {
495 Int tot, myrang, step, sens;
496 Int *sender, *recver;
497 Int mesending, merecving;
498 tot = max(p0 * q0, p1 * q1);
499 init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1,
500 &sender, &recver, &myrang);
501 if (myrang == -1)
502 goto after_comm;
503 mesending = myprow0 >= 0;
504 assert(sender[myrang] >= 0 || !mesending);
505 assert(!mesending || proc0[sender[myrang]] == mypnum);
506 merecving = myprow1 >= 0;
507 assert(recver[myrang] >= 0 || !merecving);
508 assert(!merecving || proc1[recver[myrang]] == mypnum);
509 step = tot - 1 - myrang;
510 do {
511 for (sens = 0; sens < 2; sens++) {
512 /* be careful here, when we communicating with ourselves, we must
513 * send first (myrang > step == 0) */
514 if (mesending && recver[step] >= 0 &&
515 (sens == 0)) {
516 i = recver[step] / q1;
517 j = recver[step] % q1;
518 vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i,
519 v_inter);
520 hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j,
521 h_inter);
522 scanD0(uplo, diag, SENDBUFF, ptrsendbuff, &sendsize,
523 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
524 v_inter, vinter_nb, h_inter, hinter_nb,
525 ptrmyblock);
526 } /* if (mesending...) { */
527 if (mesending && recver[step] >= 0 &&
528 (sens == myrang > step)) {
529 i = recver[step] / q1;
530 j = recver[step] % q1;
531 if (sendsize > 0
532 && (step != myrang || !merecving)
533 ) {
534 Cigesd2d(gcontext, sendsize, (Int)1, ptrsendbuff, sendsize,
535 (Int)0, proc1[i * q1 + j]);
536 } /* sendsize > 0 */
537 } /* if (mesending ... */
538 if (merecving && sender[step] >= 0 &&
539 (sens == myrang <= step)) {
540 i = sender[step] / q0;
541 j = sender[step] % q0;
542 vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i,
543 v_inter);
544 hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j,
545 h_inter);
546 scanD0(uplo, diag, SIZEBUFF, ptrNULL, &recvsize,
547 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
548 v_inter, vinter_nb, h_inter, hinter_nb, ptrNULL);
549 if (recvsize > 0) {
550 if (step == myrang && mesending) {
551 Clacpy(recvsize, (Int)1,
552 ptrsendbuff, recvsize,
553 ptrrecvbuff, recvsize);
554 } else {
555 Cigerv2d(gcontext, recvsize, (Int)1, ptrrecvbuff, recvsize,
556 (Int)0, proc0[i * q0 + j]);
557 }
558 } /* recvsize > 0 */
559 } /* if (merecving ...) */
560 if (merecving && sender[step] >= 0 && sens == 1) {
561 scanD0(uplo, diag, RECVBUFF, ptrrecvbuff, &recvsize,
562 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
563 v_inter, vinter_nb, h_inter, hinter_nb, ptrmynewblock);
564 } /* if (merecving...) */
565 } /* for (sens = 0) */
566 step -= 1;
567 if (step < 0)
568 step = tot - 1;
569 } while (step != tot - 1 - myrang);
570after_comm:
571 free(sender);
572 } /* { int tot,nr,ns ...} */
573 /* don't forget to clean up things! */
574 if (myprow1 >= 0 && mypcol1 >= 0) {
575 freememory((char *) ptrrecvbuff);
576 };
577 if (myprow0 >= 0 && mypcol0 >= 0) {
578 freememory((char *) ptrsendbuff);
579 };
580 if (nprow != 1)
581 Cblacs_gridexit(gcontext);
582 free(v_inter);
583 free(h_inter);
584 free(param);
585}/* distrib */
587init_chenille(Int mypnum, Int nprocs, Int n0, Int *proc0, Int n1, Int *proc1, Int **psend, Int **precv, Int *myrang)
588{
589 Int ns, nr, i, tot;
590 Int *sender, *recver, *g0, *g1;
591 tot = max(n0, n1);
592 sender = (Int *) mr2d_malloc((nprocs + tot) * sizeof(Int) * 2);
593 recver = sender + tot;
594 *psend = sender;
595 *precv = recver;
596 g0 = recver + tot;
597 g1 = g0 + nprocs;
598 for (i = 0; i < nprocs; i++) {
599 g0[i] = -1;
600 g1[i] = -1;
601 }
602 for (i = 0; i < tot; i++) {
603 sender[i] = -1;
604 recver[i] = -1;
605 }
606 for (i = 0; i < n0; i++)
607 g0[proc0[i]] = i;
608 for (i = 0; i < n1; i++)
609 g1[proc1[i]] = i;
610 ns = 0;
611 nr = 0;
612 *myrang = -1;
613 for (i = 0; i < nprocs; i++)
614 if (g0[i] >= 0 && g1[i] >= 0) {
615 if (i == mypnum)
616 *myrang = nr;
617 sender[ns] = g0[i];
618 ns += 1;
619 recver[nr] = g1[i];
620 nr += 1;
621 assert(ns <= n0 && nr <= n1 && nr == ns);
622 }
623 for (i = 0; i < nprocs; i++)
624 if (g0[i] >= 0 && g1[i] < 0) {
625 if (i == mypnum)
626 *myrang = ns;
627 sender[ns] = g0[i];
628 ns += 1;
629 assert(ns <= n0);
630 }
631 for (i = 0; i < nprocs; i++)
632 if (g1[i] >= 0 && g0[i] < 0) {
633 if (i == mypnum)
634 *myrang = nr;
635 recver[nr] = g1[i];
636 nr += 1;
637 assert(nr <= n1);
638 }
639}
640void
641Clacpy(Int m, Int n, Int *a, Int lda, Int *b, Int ldb)
642{
643 Int i, j;
644 lda -= m;
645 ldb -= m;
646 assert(lda >= 0 && ldb >= 0);
647 for (j = 0; j < n; j++) {
648 for (i = 0; i < m; i++)
649 *b++ = *a++;
650 b += ldb;
651 a += lda;
652 }
653}
655gridreshape(Int *ctxtp)
656{
657 Int ori, final; /* original context, and new context created, with
658 * line form */
659 Int nprow, npcol, myrow, mycol;
660 Int *usermap;
661 Int i, j;
662 ori = *ctxtp;
663 Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol);
664 usermap = mr2d_malloc(sizeof(Int) * nprow * npcol);
665 for (i = 0; i < nprow; i++)
666 for (j = 0; j < npcol; j++) {
667 usermap[i + j * nprow] = Cblacs_pnum(ori, i, j);
668 }
669 /* Cblacs_get(0, 0, &final); */
670 Cblacs_get(ori, (Int)10, &final);
671 Cblacs_gridmap(&final, usermap, (Int)1, (Int)1, nprow * npcol);
672 *ctxtp = final;
673 free(usermap);
674}
#define Int
Definition Bconfig.h:22
void Cblacs_gridmap()
static2 Int inter_len()
Int memoryblocksize()
Int changeorigin()
#define freememory
Definition pitrmr.c:235
#define scan_intervals
Definition pitrmr.c:236
void Cblacs_gridexit()
#define max(A, B)
Definition pitrmr.c:192
void Cigsum2d()
#define ilacpy_
Definition pitrmr.c:171
#define static2
Definition pitrmr.c:158
#define scanD0
Definition pitrmr.c:232
Int Cblacs_pnum()
static2 void gridreshape()
void Cpitrmr2do()
#define DIVUP(a, b)
Definition pitrmr.c:194
#define SIZEBUFF
Definition pitrmr.c:247
#define fortran_mr2dnew
Definition pitrmr.c:169
#define Clacpy
Definition pitrmr.c:173
void Cigamx2d()
void Cblacs_pinfo()
#define NBPARAM
Definition pitrmr.c:301
void Cblacs_pcoord()
void Cigamn2d()
Int localsize()
#define setmemory
Definition pitrmr.c:234
void Cblacs_get()
void Cigerv2d()
void Cigebs2d()
#define dispmat
Definition pitrmr.c:233
static2 void buff2block()
void Cpitrmr2d()
#define SENDBUFF
Definition pitrmr.c:245
void paramcheck()
void Cblacs_setup()
#define RECVBUFF
Definition pitrmr.c:246
static2 Int block2buff()
void Cblacs_gridinit()
void Cblacs_gridinfo()
#define MAGIC_MAX
Definition pitrmr.c:302
void Csetpvmtids()
Int localindice()
void Cigesd2d()
#define DESCLEN
Definition pitrmr.c:257
Int ppcm()
void Cigebr2d()
void Cblacs_exit()
#define fortran_mr2d
Definition pitrmr.c:168
void * mr2d_malloc()
static2 void init_chenille()
Int m
Definition pcgemr.c:166
Int spcol
Definition pcgemr.c:171
Int nbcol
Definition pcgemr.c:169
Int sprow
Definition pcgemr.c:170
Int nbrow
Definition pcgemr.c:168
Int ctxt
Definition pcgemr.c:165
Int n
Definition pcgemr.c:167
Int lda
Definition pcgemr.c:172