PaCO++
0.05
|
#include <stdio.h>
#include <Padico/MPCircuit.h>
#include "Schedule.h"
#include "Internal.h"
#include "DistributionBloc.h"
Go to the source code of this file.
Defines | |
#define | DEBUG_COMM |
#define | DEBUG_INTERNAL |
Functions | |
void | computeReceiveBlock1D (const GlobalData_t &gd, const LocalData_t &dd, const Topology_t &stopo, const Topology_t &dtopo, vector< LocalData_t > &vOut) |
void | computeSendBlock1D (const GlobalData_t &gd, const LocalData_t &sd, const Topology_t &stopo, const Topology_t &dtopo, vector< LocalData_t > &vOut) |
void | doSchedule (const GlobalData_t &gd, const LocalData_t &ld, const Topology_t &ctopo, vector< LocalData_t > &sched_send, vector< LocalData_t > &sched_recv, void *comm) |
#define DEBUG_COMM |
Definition at line 17 of file PC/Schedule.cc.
#define DEBUG_INTERNAL |
Definition at line 16 of file PC/Schedule.cc.
void computeReceiveBlock1D | ( | const GlobalData_t & | gd, |
const LocalData_t & | dd, | ||
const Topology_t & | stopo, | ||
const Topology_t & | dtopo, | ||
vector< LocalData_t > & | vOut | ||
) |
Definition at line 105 of file PC/Schedule.cc.
References blockSize(), getProcRangeInf(), and getProcRangeSup().
{ #ifdef DEBUG_INTERNAL cerr << "\nIn compute Receive Schedule--------------------\n"; fprintf(stderr, "stopo: %ld\tdtopo: %ld\n",stopo.total, dtopo.total); fprintf(stderr, "gd.len %ld\tdd.start %d\tdd.len %d\n", gd.len, dd.start, dd.len); #endif if (stopo.total == dtopo.total) { vOut.push_back(dd); #ifdef DEBUG_INTERNAL fprintf(stderr, " rank:%d start:%d len:%d base:%p\n", dd.rank, dd.start, dd.len, dd.base); #endif } else { // Apend mode vOut.reserve(vOut.size()+stopo.total); // in bloc mode, at most one msg from each src node unsigned sbsz = blockSize(gd.len, stopo.total); unsigned long dlow = dd.start; unsigned long dhigh = dlow + dd.len; unsigned fpid, lpid; fpid = getProcRangeInf(dlow, sbsz); lpid = getProcRangeSup(dhigh, sbsz); #ifdef DEBUG_INTERNAL fprintf(stderr, " loop from %d to %d width stotal: %ld\n", fpid, lpid, stopo.total); #endif // for each dest bloc for(unsigned i=fpid; i <= lpid; i++) { vOut.resize(vOut.size()+1); LocalData_t& s = vOut[vOut.size()-1]; s.rank = i; unsigned tmp = i*sbsz; s.start = ( dlow >= tmp)?dlow:tmp; // max tmp = (i+1)*sbsz; unsigned end = ( dhigh <= tmp)?dhigh:tmp; // min s.len = end - s.start; s.base = dd.base + ((s.start - dd.start) * gd.unit_size); #ifdef DEBUG_INTERNAL fprintf(stderr, " r: from:%d start:%d len:%d base:%p\n", s.rank, s.start, s.len, s.base); #endif } } }
void computeSendBlock1D | ( | const GlobalData_t & | gd, |
const LocalData_t & | sd, | ||
const Topology_t & | stopo, | ||
const Topology_t & | dtopo, | ||
vector< LocalData_t > & | vOut | ||
) |
Definition at line 39 of file PC/Schedule.cc.
References blockSize(), getProcRangeInf(), and getProcRangeSup().
{ #ifdef DEBUG_INTERNAL cerr << "\nIn compute Send Schedule--------------------\n"; fprintf(stderr, "stopo: %ld\tdtopo: %ld\n",stopo.total, dtopo.total); fprintf(stderr, "gd.len %ld\tsd.start %d\tsd.len %d\n",gd.len, sd.start, sd.len); #endif if (stopo.total == dtopo.total) { // vOut.reserve(vOut.size()+dtopo.total); // in bloc mode, at most one msg to each dest node vOut.push_back(sd); #ifdef DEBUG_INTERNAL fprintf(stderr, " rank:%d start:%d len:%d base:%p\n", sd.rank, sd.start, sd.len, sd.base); #endif } else { // Append mode vOut.reserve(vOut.size()+dtopo.total); // in bloc mode, at most one msg to each dest node unsigned dbsz = blockSize(gd.len, dtopo.total); unsigned long slow = sd.start; unsigned long shigh = slow + sd.len; unsigned fpid, lpid; fpid = getProcRangeInf(slow, dbsz); lpid = getProcRangeSup(shigh, dbsz); #ifdef DEBUG_INTERNAL fprintf(stderr, " loop from %d to %d width dtotal: %ld\n", fpid, lpid, dtopo.total); #endif // for each dest bloc for(unsigned i=fpid; i <= lpid; i++) { vOut.resize(vOut.size()+1); LocalData_t& s = vOut[vOut.size()-1]; s.rank = i; unsigned tmp = i*dbsz; s.start = ( slow >= tmp)?slow:tmp; // max tmp = (i+1)*dbsz; unsigned end = ( shigh <= tmp)?shigh:tmp; // min s.len = end - s.start; s.base = sd.base + ((s.start - sd.start) * gd.unit_size); #ifdef DEBUG_INTERNAL fprintf(stderr, " s: to:%d start:%d len:%d base:%p\n", s.rank, s.start, s.len, s.base); #endif } } #ifdef DEBUG_INTERNAL cerr << "\nIn compute Send Schedule-------------------- done\n"; #endif }
void doSchedule | ( | const GlobalData_t & | gd, |
const LocalData_t & | ld, | ||
const Topology_t & | ctopo, | ||
vector< LocalData_t > & | sched_send, | ||
vector< LocalData_t > & | sched_recv, | ||
void * | comm | ||
) |
Definition at line 170 of file PC/Schedule.cc.
References cmp_rank(), and getProcId().
{ cerr << "\nIn doSchedule--------------------\n"; padico_mpcircuit_t schd_mpc = (padico_mpcircuit_t) comm; if (sched_send.size() || sched_recv.size()) { #ifndef NO_COM void* rreq[sched_recv.size()]; unsigned ri; ri=0; #endif vector<LocalData_t*> local_recv; vector<LocalData_t*> local_send; local_recv.clear(); local_send.clear(); // Sending data // Post Asynchronous MPCircuit receive #ifdef DEBUG_COM cerr << " #sched_recv: " << sched_recv.size() << endl; #endif for(unsigned i=0; i < sched_recv.size(); i++) { unsigned from = getProcId(sched_recv[i].rank, ctopo); if (from == ld.rank) { #ifdef DEBUG_COMM fprintf(stderr, " recv: schedr no=%d start=%d len=%d from=%d LOCAL\n", i, sched_recv[i].start, sched_recv[i].len, from); #endif local_recv.push_back(&sched_recv[i]); } else { #ifdef DEBUG_COMM fprintf(stderr, " recv: schedr no=%d start=%d len=%d from=%d base=%p\n", i, sched_recv[i].start, sched_recv[i].len, from, sched_recv[i].base); #endif #ifndef NO_COM rreq[ri++] = padico_mpcircuit_Irecv(sched_recv[i].base, sched_recv[i].len*gd.unit_size, from, 51, schd_mpc ); #endif } } // Send data via MPCircuit #ifdef DEBUG_COMM cerr << " #sched_send: " << sched_send.size() << endl; #endif for(unsigned i=0; i < sched_send.size(); i++) { unsigned to = getProcId(sched_send[i].rank, ctopo); if (to == ld.rank) { #ifdef DEBUG_COMM fprintf(stderr, " send: scheds no=%d start=%d len=%d to=%d LOCAL\n", i, sched_send[i].start, sched_send[i].len, to); #endif local_send.push_back(&sched_send[i]); } else { #ifdef DEBUG_COMM fprintf(stderr, " send: scheds no=%d start=%d len=%d to=%d base=%p\n", i, sched_send[i].start, sched_send[i].len, to, sched_send[i].base); #endif #ifndef NO_COM padico_mpcircuit_send(sched_send[i].base, sched_send[i].len*gd.unit_size, to, 51, schd_mpc); #endif } } // Do local communication vie memcpy if (local_recv.size() != local_send.size()) { cerr << "Error: local recv & send have different size: " << local_recv.size() << " " << local_send.size() << endl; } for(unsigned i=0; i < local_recv.size(); i++) { if (local_recv[i]->len != local_send[i]->len) { cerr << "Error: local recv & send have different len for i= "<<i<< " :" << local_recv[i]->len << " " << local_send[i]->len << endl; } #ifdef DEBUG_COMM fprintf(stderr, " local: scheds no=%d start=%d len=%d\n", i, sched_send[i].start, sched_send[i].len); #endif #ifndef NO_COM memcpy(local_recv[i]->base, local_send[i]->base, local_send[i]->len*gd.unit_size); #endif } // Wait all receive & send #ifndef NO_COM #ifdef DEBUG_INTERNAL cerr << "WAITING local communications to end...\n"; #endif padico_mpcircuit_waitAll(rreq, ri); #ifdef DEBUG_INTERNAL cerr << "WAITING local communications to end...ok \n"; #endif #endif } }