SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ Cpsgemr2d() [2/2]

void Cpsgemr2d ( Int  m,
Int  n,
float *  ptrmyblock,
Int  ia,
Int  ja,
MDESC ma,
float *  ptrmynewblock,
Int  ib,
Int  jb,
MDESC mb,
Int  globcontext 
)

Definition at line 288 of file psgemr.c.

297{
298 float *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0;
299 float *recvptr;
300 MDESC newa, newb;
301 Int *proc0, *proc1, *param;
302 Int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs;
303 Int i, j;
304 Int nprow, npcol, gcontext;
305 Int recvsize, sendsize;
306 IDESC *h_inter; /* to store the horizontal intersections */
307 IDESC *v_inter; /* to store the vertical intersections */
308 Int hinter_nb, vinter_nb; /* number of intrsections in both directions */
309 Int dummy;
310 Int p0, q0, p1, q1;
311 Int *ra, *ca;
312 /* end of variables */
313 /* To simplify further calcul we change the matrix indexation from
314 * 1..m,1..n (fortran) to 0..m-1,0..n-1 */
315 if (m == 0 || n == 0)
316 return;
317 ia -= 1;
318 ja -= 1;
319 ib -= 1;
320 jb -= 1;
321 Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum);
322 gcontext = globcontext;
323 nprocs = nprow * npcol;
324 /* if the global context that is given to us has not the shape of a line
325 * (nprow != 1), create a new context. TODO: to be optimal, we should
326 * avoid this because it is an uncessary synchronisation */
327 if (nprow != 1) {
328 gridreshape(&gcontext);
329 Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum);
330 }
331 Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0);
332 /* compatibility T3D, must check myprow and mypcol are within bounds */
333 if (myprow0 >= p0 || mypcol0 >= q0)
334 myprow0 = mypcol0 = -1;
335 assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
336 Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1);
337 if (myprow1 >= p1 || mypcol1 >= q1)
338 myprow1 = mypcol1 = -1;
339 assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
340 /* exchange the missing parameters among the processors: shape of grids and
341 * location of the processors */
342 param = (Int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(Int));
343 ra = param + nprocs * 2 + NBPARAM;
344 ca = param + (nprocs * 2 + NBPARAM) * 2;
345 for (i = 0; i < nprocs * 2 + NBPARAM; i++)
346 param[i] = MAGIC_MAX;
347 proc0 = param + NBPARAM;
348 proc1 = param + NBPARAM + nprocs;
349 /* we calulate proc0 and proc1 that will give the number of a proc in
350 * respectively a or b in the global context */
351 if (myprow0 >= 0) {
352 proc0[myprow0 * q0 + mypcol0] = mypnum;
353 param[0] = p0;
354 param[1] = q0;
355 param[4] = ma->m;
356 param[5] = ma->n;
357 param[6] = ma->nbrow;
358 param[7] = ma->nbcol;
359 param[8] = ma->sprow;
360 param[9] = ma->spcol;
361 param[10] = ia;
362 param[11] = ja;
363 }
364 if (myprow1 >= 0) {
365 proc1[myprow1 * q1 + mypcol1] = mypnum;
366 param[2] = p1;
367 param[3] = q1;
368 param[12] = mb->m;
369 param[13] = mb->n;
370 param[14] = mb->nbrow;
371 param[15] = mb->nbcol;
372 param[16] = mb->sprow;
373 param[17] = mb->spcol;
374 param[18] = ib;
375 param[19] = jb;
376 }
377 Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, (Int)1, param, 2 * nprocs + NBPARAM,
378 ra, ca, 2 * nprocs + NBPARAM, (Int)-1, (Int)-1);
379 newa = *ma;
380 newb = *mb;
381 ma = &newa;
382 mb = &newb;
383 if (myprow0 == -1) {
384 p0 = param[0];
385 q0 = param[1];
386 ma->m = param[4];
387 ma->n = param[5];
388 ma->nbrow = param[6];
389 ma->nbcol = param[7];
390 ma->sprow = param[8];
391 ma->spcol = param[9];
392 ia = param[10];
393 ja = param[11];
394 }
395 if (myprow1 == -1) {
396 p1 = param[2];
397 q1 = param[3];
398 mb->m = param[12];
399 mb->n = param[13];
400 mb->nbrow = param[14];
401 mb->nbcol = param[15];
402 mb->sprow = param[16];
403 mb->spcol = param[17];
404 ib = param[18];
405 jb = param[19];
406 }
407 for (i = 0; i < NBPARAM; i++) {
408 if (param[i] == MAGIC_MAX) {
409 fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n");
410 exit(1);
411 }
412 }
413#ifndef NDEBUG
414 for (i = 0; i < p0 * q0; i++)
415 assert(proc0[i] >= 0 && proc0[i] < nprocs);
416 for (i = 0; i < p1 * q1; i++)
417 assert(proc1[i] >= 0 && proc1[i] < nprocs);
418#endif
419 /* check the validity of the parameters */
420 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
421 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
422 /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */
423 {
424 Int decal;
425 ia = changeorigin(myprow0, ma->sprow, p0,
426 ma->nbrow, ia, &decal, &ma->sprow);
427 ptrmyblock += decal;
428 ja = changeorigin(mypcol0, ma->spcol, q0,
429 ma->nbcol, ja, &decal, &ma->spcol);
430 ptrmyblock += decal * ma->lda;
431 ma->m = ia + m;
432 ma->n = ja + n;
433 ib = changeorigin(myprow1, mb->sprow, p1,
434 mb->nbrow, ib, &decal, &mb->sprow);
435 ptrmynewblock += decal;
436 jb = changeorigin(mypcol1, mb->spcol, q1,
437 mb->nbcol, jb, &decal, &mb->spcol);
438 ptrmynewblock += decal * mb->lda;
439 mb->m = ib + m;
440 mb->n = jb + n;
441 if (p0 == 1)
442 ma->nbrow = ma->m;
443 if (q0 == 1)
444 ma->nbcol = ma->n;
445 if (p1 == 1)
446 mb->nbrow = mb->m;
447 if (q1 == 1)
448 mb->nbcol = mb->n;
449#ifndef NDEBUG
450 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
451 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
452#endif
453 }
454 /* We compute the size of the memory buffer ( we choose the worst case,
455 * when the buffer sizes == the memory block sizes). */
456 if (myprow0 >= 0 && mypcol0 >= 0) {
457 /* Initialize pointer variables */
458 setmemory(&ptrsendbuff, memoryblocksize(ma));
459 }; /* if (mypnum < p0 * q0) */
460 if (myprow1 >= 0 && mypcol1 >= 0) {
461 /* Initialize pointer variables */
462 setmemory(&ptrrecvbuff, memoryblocksize(mb));
463 }; /* if (mypnum < p1 * q1) */
464 /* allocing room for the tabs, alloc for the worst case,local_n or local_m
465 * intervals, in fact the worst case should be less, perhaps half that,I
466 * should think of that one day. */
467 h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) *
468 ma->nbcol * sizeof(IDESC));
469 v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow)
470 * ma->nbrow * sizeof(IDESC));
471 /* We go for the scanning of indices. For each processor including mypnum,
472 * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send
473 * it. Then for each processor, we compute the size of message to be
474 * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements
475 * of recvbuff the right place (scanD)(RECVBUFF)) */
476 recvptr = ptrrecvbuff;
477 {
478 Int tot, myrang, step, sens;
479 Int *sender, *recver;
480 Int mesending, merecving;
481 tot = max(p0 * q0, p1 * q1);
482 init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1,
483 &sender, &recver, &myrang);
484 if (myrang == -1)
485 goto after_comm;
486 mesending = myprow0 >= 0;
487 assert(sender[myrang] >= 0 || !mesending);
488 assert(!mesending || proc0[sender[myrang]] == mypnum);
489 merecving = myprow1 >= 0;
490 assert(recver[myrang] >= 0 || !merecving);
491 assert(!merecving || proc1[recver[myrang]] == mypnum);
492 step = tot - 1 - myrang;
493 do {
494 for (sens = 0; sens < 2; sens++) {
495 /* be careful here, when we communicating with ourselves, we must
496 * send first (myrang > step == 0) */
497 if (mesending && recver[step] >= 0 &&
498 (sens == 0)) {
499 i = recver[step] / q1;
500 j = recver[step] % q1;
501 vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i,
502 v_inter);
503 hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j,
504 h_inter);
505 sendsize = block2buff(v_inter, vinter_nb, h_inter, hinter_nb,
506 ptrmyblock, ma, ptrsendbuff);
507 } /* if (mesending...) { */
508 if (mesending && recver[step] >= 0 &&
509 (sens == myrang > step)) {
510 i = recver[step] / q1;
511 j = recver[step] % q1;
512 if (sendsize > 0
513 && (step != myrang || !merecving)
514 ) {
515 Csgesd2d(gcontext, sendsize, (Int)1, ptrsendbuff, sendsize,
516 (Int)0, proc1[i * q1 + j]);
517 } /* sendsize > 0 */
518 } /* if (mesending ... */
519 if (merecving && sender[step] >= 0 &&
520 (sens == myrang <= step)) {
521 i = sender[step] / q0;
522 j = sender[step] % q0;
523 vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i,
524 v_inter);
525 hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j,
526 h_inter);
527 recvsize = inter_len(hinter_nb, h_inter, vinter_nb, v_inter);
528 if (recvsize > 0) {
529 if (step == myrang && mesending) {
530 Clacpy(recvsize, 1,
531 ptrsendbuff, recvsize,
532 ptrrecvbuff, recvsize);
533 } else {
534 Csgerv2d(gcontext, recvsize, (Int)1, ptrrecvbuff, recvsize,
535 0, proc0[i * q0 + j]);
536 }
537 } /* recvsize > 0 */
538 } /* if (merecving ...) */
539 if (merecving && sender[step] >= 0 && sens == 1) {
540 buff2block(v_inter, vinter_nb, h_inter, hinter_nb,
541 recvptr, ptrmynewblock, mb);
542 } /* if (merecving...) */
543 } /* for (sens = 0) */
544 step -= 1;
545 if (step < 0)
546 step = tot - 1;
547 } while (step != tot - 1 - myrang);
548after_comm:
549 free(sender);
550 } /* { int tot,nr,ns ...} */
551 /* don't forget to clean up things! */
552 if (myprow1 >= 0 && mypcol1 >= 0) {
553 freememory((char *) ptrrecvbuff);
554 };
555 if (myprow0 >= 0 && mypcol0 >= 0) {
556 freememory((char *) ptrsendbuff);
557 };
558 if (nprow != 1)
559 Cblacs_gridexit(gcontext);
560 free(v_inter);
561 free(h_inter);
562 free(param);
#define Int
Definition Bconfig.h:22
static2 Int inter_len()
Int memoryblocksize()
Int changeorigin()
#define freememory
Definition psgemr.c:220
#define scan_intervals
Definition psgemr.c:221
void Csgerv2d()
void Cblacs_gridexit()
#define max(A, B)
Definition psgemr.c:177
static2 void gridreshape()
#define DIVUP(a, b)
Definition psgemr.c:179
#define Clacpy
Definition psgemr.c:158
#define NBPARAM
Definition psgemr.c:285
void Cigamn2d()
#define setmemory
Definition psgemr.c:219
static2 void buff2block()
void paramcheck()
static2 Int block2buff()
void Cblacs_gridinfo()
#define MAGIC_MAX
Definition psgemr.c:286
void * mr2d_malloc()
void Csgesd2d()
static2 void init_chenille()
Int m
Definition pcgemr.c:166
Int spcol
Definition pcgemr.c:171
Int nbcol
Definition pcgemr.c:169
Int sprow
Definition pcgemr.c:170
Int nbrow
Definition pcgemr.c:168
Int ctxt
Definition pcgemr.c:165
Int n
Definition pcgemr.c:167
Int lda
Definition pcgemr.c:172
Here is the call graph for this function: