308{
309 float *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0;
310 float *recvptr;
312 Int *proc0, *proc1, *param;
313 Int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs;
315 Int nprow, npcol, gcontext;
316 Int recvsize, sendsize;
319 Int hinter_nb, vinter_nb;
323
324
325
326 if (m == 0 || n == 0)
327 return;
328 ia -= 1;
329 ja -= 1;
330 ib -= 1;
331 jb -= 1;
333 gcontext = globcontext;
334 nprocs = nprow * npcol;
335
336
337
338 if (nprow != 1) {
341 }
343
344 if (myprow0 >= p0 || mypcol0 >= q0)
345 myprow0 = mypcol0 = -1;
346 assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
348 if (myprow1 >= p1 || mypcol1 >= q1)
349 myprow1 = mypcol1 = -1;
350 assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
351
352
354 ra = param + nprocs * 2 +
NBPARAM;
355 ca = param + (nprocs * 2 +
NBPARAM) * 2;
356 for (i = 0; i < nprocs * 2 +
NBPARAM; i++)
359 proc1 = param +
NBPARAM + nprocs;
360
361
362 if (myprow0 >= 0) {
363 proc0[myprow0 * q0 + mypcol0] = mypnum;
364 param[0] = p0;
365 param[1] = q0;
368 param[6] = ma->
nbrow;
369 param[7] = ma->
nbcol;
370 param[8] = ma->
sprow;
371 param[9] = ma->
spcol;
372 param[10] = ia;
373 param[11] = ja;
374 }
375 if (myprow1 >= 0) {
376 proc1[myprow1 * q1 + mypcol1] = mypnum;
377 param[2] = p1;
378 param[3] = q1;
381 param[14] = mb->
nbrow;
382 param[15] = mb->
nbcol;
383 param[16] = mb->
sprow;
384 param[17] = mb->
spcol;
385 param[18] = ib;
386 param[19] = jb;
387 }
390 newa = *ma;
391 newb = *mb;
392 ma = &newa;
393 mb = &newb;
394 if (myprow0 == -1) {
395 p0 = param[0];
396 q0 = param[1];
399 ma->
nbrow = param[6];
400 ma->
nbcol = param[7];
401 ma->
sprow = param[8];
402 ma->
spcol = param[9];
403 ia = param[10];
404 ja = param[11];
405 }
406 if (myprow1 == -1) {
407 p1 = param[2];
408 q1 = param[3];
411 mb->
nbrow = param[14];
412 mb->
nbcol = param[15];
413 mb->
sprow = param[16];
414 mb->
spcol = param[17];
415 ib = param[18];
416 jb = param[19];
417 }
418 for (i = 0; i <
NBPARAM; i++) {
420 fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n");
421 exit(1);
422 }
423 }
424#ifndef NDEBUG
425 for (i = 0; i < p0 * q0; i++)
426 assert(proc0[i] >= 0 && proc0[i] < nprocs);
427 for (i = 0; i < p1 * q1; i++)
428 assert(proc1[i] >= 0 && proc1[i] < nprocs);
429#endif
430
431 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
432 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
433
434 {
438 ptrmyblock += decal;
441 ptrmyblock += decal * ma->
lda;
446 ptrmynewblock += decal;
449 ptrmynewblock += decal * mb->
lda;
452 if (p0 == 1)
454 if (q0 == 1)
456 if (p1 == 1)
458 if (q1 == 1)
460#ifndef NDEBUG
461 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
462 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
463#endif
464 }
465
466
467 if (myprow0 >= 0 && mypcol0 >= 0) {
468
470 };
471 if (myprow1 >= 0 && mypcol1 >= 0) {
472
474 };
475
476
477
482
483
484
485
486
487 recvptr = ptrrecvbuff;
488 {
489 Int tot, myrang, step, sens;
490 Int *sender, *recver;
491 Int mesending, merecving;
492 tot =
max(p0 * q0, p1 * q1);
493 init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1,
494 &sender, &recver, &myrang);
495 if (myrang == -1)
496 goto after_comm;
497 mesending = myprow0 >= 0;
498 assert(sender[myrang] >= 0 || !mesending);
499 assert(!mesending || proc0[sender[myrang]] == mypnum);
500 merecving = myprow1 >= 0;
501 assert(recver[myrang] >= 0 || !merecving);
502 assert(!merecving || proc1[recver[myrang]] == mypnum);
503 step = tot - 1 - myrang;
504 do {
505 for (sens = 0; sens < 2; sens++) {
506
507
508 if (mesending && recver[step] >= 0 &&
509 (sens == 0)) {
510 i = recver[step] / q1;
511 j = recver[step] % q1;
512 vinter_nb =
scan_intervals(
'r', ia, ib, m, ma, mb, p0, p1, myprow0, i,
513 v_inter);
514 hinter_nb =
scan_intervals(
'c', ja, jb, n, ma, mb, q0, q1, mypcol0, j,
515 h_inter);
517 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
518 v_inter, vinter_nb, h_inter, hinter_nb,
519 ptrmyblock);
520 }
521 if (mesending && recver[step] >= 0 &&
522 (sens == myrang > step)) {
523 i = recver[step] / q1;
524 j = recver[step] % q1;
525 if (sendsize > 0
526 && (step != myrang || !merecving)
527 ) {
528 Csgesd2d(gcontext, sendsize, (
Int)1, ptrsendbuff, sendsize,
529 (
Int)0, proc1[i * q1 + j]);
530 }
531 }
532 if (merecving && sender[step] >= 0 &&
533 (sens == myrang <= step)) {
534 i = sender[step] / q0;
535 j = sender[step] % q0;
536 vinter_nb =
scan_intervals(
'r', ib, ia, m, mb, ma, p1, p0, myprow1, i,
537 v_inter);
538 hinter_nb =
scan_intervals(
'c', jb, ja, n, mb, ma, q1, q0, mypcol1, j,
539 h_inter);
541 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
542 v_inter, vinter_nb, h_inter, hinter_nb, ptrNULL);
543 if (recvsize > 0) {
544 if (step == myrang && mesending) {
546 ptrsendbuff, recvsize,
547 ptrrecvbuff, recvsize);
548 } else {
549 Csgerv2d(gcontext, recvsize, (
Int)1, ptrrecvbuff, recvsize,
550 (
Int)0, proc0[i * q0 + j]);
551 }
552 }
553 }
554 if (merecving && sender[step] >= 0 && sens == 1) {
556 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
557 v_inter, vinter_nb, h_inter, hinter_nb, ptrmynewblock);
558 }
559 }
560 step -= 1;
561 if (step < 0)
562 step = tot - 1;
563 } while (step != tot - 1 - myrang);
564after_comm:
565 free(sender);
566 }
567
568 if (myprow1 >= 0 && mypcol1 >= 0) {
570 };
571 if (myprow0 >= 0 && mypcol0 >= 0) {
573 };
574 if (nprow != 1)
576 free(v_inter);
577 free(h_inter);
578 free(param);
static2 void gridreshape()
static2 void init_chenille()