PaCO++  0.05
Schedule.cc File Reference
#include <stdio.h>
#include <Padico/MPCircuit.h>
#include "Schedule.h"
#include "Internal.h"
#include "DistributionBloc.h"
Include dependency graph for PC/Schedule.cc:

Go to the source code of this file.

Defines

#define DEBUG_COMM
#define DEBUG_INTERNAL

Functions

void computeReceiveBlock1D (const GlobalData_t &gd, const LocalData_t &dd, const Topology_t &stopo, const Topology_t &dtopo, vector< LocalData_t > &vOut)
void computeSendBlock1D (const GlobalData_t &gd, const LocalData_t &sd, const Topology_t &stopo, const Topology_t &dtopo, vector< LocalData_t > &vOut)
void doSchedule (const GlobalData_t &gd, const LocalData_t &ld, const Topology_t &ctopo, vector< LocalData_t > &sched_send, vector< LocalData_t > &sched_recv, void *comm)

Define Documentation

#define DEBUG_COMM

Definition at line 17 of file PC/Schedule.cc.

#define DEBUG_INTERNAL

Definition at line 16 of file PC/Schedule.cc.


Function Documentation

void computeReceiveBlock1D ( const GlobalData_t &  gd,
const LocalData_t &  dd,
const Topology_t &  stopo,
const Topology_t &  dtopo,
vector< LocalData_t > &  vOut 
)

Definition at line 105 of file PC/Schedule.cc.

References blockSize(), getProcRangeInf(), and getProcRangeSup().

                                       {

#ifdef DEBUG_INTERNAL
  cerr << "\nIn compute Receive Schedule--------------------\n";

  fprintf(stderr, "stopo: %ld\tdtopo: %ld\n",stopo.total, dtopo.total);
  fprintf(stderr, "gd.len %ld\tdd.start %d\tdd.len %d\n", gd.len, dd.start, dd.len);

#endif

  if (stopo.total == dtopo.total) {
    vOut.push_back(dd);
#ifdef DEBUG_INTERNAL
    fprintf(stderr, " rank:%d start:%d len:%d base:%p\n", dd.rank, dd.start, dd.len, dd.base);
#endif
  } else {
    // Apend mode
    vOut.reserve(vOut.size()+stopo.total); // in bloc mode, at most one msg from each src node

    unsigned sbsz = blockSize(gd.len, stopo.total);
    
    unsigned long dlow  = dd.start;
    unsigned long dhigh = dlow + dd.len;
    
    unsigned fpid, lpid;
    fpid = getProcRangeInf(dlow,  sbsz);
    lpid = getProcRangeSup(dhigh, sbsz);
    
#ifdef DEBUG_INTERNAL
    fprintf(stderr, "  loop from %d to %d width stotal: %ld\n", fpid, lpid, stopo.total);
#endif

    // for each dest bloc
    for(unsigned i=fpid; i <= lpid; i++) {
      
      vOut.resize(vOut.size()+1);
      LocalData_t& s =  vOut[vOut.size()-1];
   
      s.rank       = i;
      unsigned tmp = i*sbsz;  
      s.start = ( dlow >= tmp)?dlow:tmp; // max
   
      tmp = (i+1)*sbsz;
      unsigned end = ( dhigh <= tmp)?dhigh:tmp; // min
      
      s.len  = end - s.start;

      s.base = dd.base + ((s.start - dd.start) * gd.unit_size);

#ifdef DEBUG_INTERNAL
      fprintf(stderr, "    r: from:%d start:%d len:%d base:%p\n", s.rank, s.start, s.len, s.base);
#endif
    }
  }
}

Here is the call graph for this function:

void computeSendBlock1D ( const GlobalData_t &  gd,
const LocalData_t &  sd,
const Topology_t &  stopo,
const Topology_t &  dtopo,
vector< LocalData_t > &  vOut 
)

Definition at line 39 of file PC/Schedule.cc.

References blockSize(), getProcRangeInf(), and getProcRangeSup().

                                    {
  
#ifdef DEBUG_INTERNAL
  cerr << "\nIn compute Send Schedule--------------------\n";

  fprintf(stderr, "stopo: %ld\tdtopo: %ld\n",stopo.total, dtopo.total);
  fprintf(stderr, "gd.len %ld\tsd.start %d\tsd.len %d\n",gd.len, sd.start, sd.len);
#endif

  if (stopo.total == dtopo.total) {
    //    vOut.reserve(vOut.size()+dtopo.total); // in bloc mode, at most one msg to each dest node
    vOut.push_back(sd);
#ifdef DEBUG_INTERNAL
    fprintf(stderr, "  rank:%d start:%d len:%d base:%p\n", sd.rank, sd.start, sd.len, sd.base);
#endif
  } else {
    // Append mode
    vOut.reserve(vOut.size()+dtopo.total); // in bloc mode, at most one msg to each dest node

    unsigned dbsz = blockSize(gd.len, dtopo.total);
    
    unsigned long slow  = sd.start;
    unsigned long shigh = slow + sd.len;
    
    unsigned fpid, lpid;
    fpid = getProcRangeInf(slow,  dbsz);
    lpid = getProcRangeSup(shigh, dbsz);
    
#ifdef DEBUG_INTERNAL
    fprintf(stderr, "  loop from %d to %d width dtotal: %ld\n", fpid, lpid, dtopo.total);
#endif
    
    // for each dest bloc
    for(unsigned i=fpid; i <= lpid; i++) {           

      vOut.resize(vOut.size()+1);
      LocalData_t& s =  vOut[vOut.size()-1];
   
      s.rank    = i;
      unsigned tmp = i*dbsz;  
      s.start = ( slow >= tmp)?slow:tmp; // max
      
      tmp = (i+1)*dbsz;
      unsigned end = ( shigh <= tmp)?shigh:tmp; // min
      
      s.len   = end - s.start;

      s.base = sd.base + ((s.start - sd.start) * gd.unit_size);

#ifdef DEBUG_INTERNAL
      fprintf(stderr, "    s: to:%d start:%d len:%d base:%p\n", s.rank, s.start, s.len, s.base);
#endif
    }
  }
#ifdef DEBUG_INTERNAL
  cerr << "\nIn compute Send Schedule-------------------- done\n";
#endif
}

Here is the call graph for this function:

void doSchedule ( const GlobalData_t &  gd,
const LocalData_t &  ld,
const Topology_t &  ctopo,
vector< LocalData_t > &  sched_send,
vector< LocalData_t > &  sched_recv,
void *  comm 
)

Definition at line 170 of file PC/Schedule.cc.

References cmp_rank(), and getProcId().

                                                                                    {
  
  cerr << "\nIn doSchedule--------------------\n";

  padico_mpcircuit_t schd_mpc = (padico_mpcircuit_t) comm;

  if (sched_send.size() || sched_recv.size()) {

#ifndef NO_COM
    void* rreq[sched_recv.size()];
    unsigned ri;
    ri=0;
#endif

    vector<LocalData_t*> local_recv;
    vector<LocalData_t*> local_send;
    
    local_recv.clear();
    local_send.clear();

    // Sending data
        
    // Post Asynchronous MPCircuit receive
#ifdef DEBUG_COM
    cerr << "    #sched_recv: " << sched_recv.size() << endl;
#endif
    for(unsigned i=0; i < sched_recv.size(); i++) {
      unsigned from = getProcId(sched_recv[i].rank, ctopo);
      if (from == ld.rank) {
#ifdef DEBUG_COMM
   fprintf(stderr, "    recv: schedr no=%d start=%d len=%d from=%d LOCAL\n", i,
      sched_recv[i].start, sched_recv[i].len, from);
#endif
   local_recv.push_back(&sched_recv[i]);
      } else {
#ifdef DEBUG_COMM
   fprintf(stderr, "   recv: schedr no=%d start=%d len=%d from=%d base=%p\n", i,
      sched_recv[i].start, sched_recv[i].len, from, sched_recv[i].base);
#endif
      
#ifndef NO_COM
   rreq[ri++] = padico_mpcircuit_Irecv(sched_recv[i].base, sched_recv[i].len*gd.unit_size, 
                   from, 51, schd_mpc );
#endif
      }
    }
    
    // Send data via MPCircuit
#ifdef DEBUG_COMM
    cerr << "    #sched_send: " << sched_send.size() << endl;
#endif
    for(unsigned i=0; i < sched_send.size(); i++) {
      unsigned to = getProcId(sched_send[i].rank, ctopo);
      if (to == ld.rank) {
#ifdef DEBUG_COMM
   fprintf(stderr, "    send: scheds no=%d start=%d len=%d to=%d LOCAL\n", i,
      sched_send[i].start, sched_send[i].len, to);
#endif
   local_send.push_back(&sched_send[i]);
      } else {
#ifdef DEBUG_COMM
   fprintf(stderr, "    send: scheds no=%d start=%d len=%d to=%d base=%p\n", i,
      sched_send[i].start, sched_send[i].len, to, sched_send[i].base);
#endif
      
#ifndef NO_COM
   padico_mpcircuit_send(sched_send[i].base, sched_send[i].len*gd.unit_size, 
                  to, 51, schd_mpc);
#endif
      }
    }
    
    // Do local communication vie memcpy
    if (local_recv.size() != local_send.size()) {
      cerr << "Error: local recv & send have different size: " << local_recv.size() << " " << local_send.size() << endl;
    }
    for(unsigned i=0; i < local_recv.size(); i++) {
      if (local_recv[i]->len != local_send[i]->len) {
   cerr << "Error: local recv & send have different len for i= "<<i<< " :" << local_recv[i]->len << " " << local_send[i]->len << endl;
      }
#ifdef DEBUG_COMM
      fprintf(stderr, "    local: scheds no=%d start=%d len=%d\n", i,
         sched_send[i].start, sched_send[i].len);
#endif
#ifndef NO_COM
      memcpy(local_recv[i]->base, local_send[i]->base, local_send[i]->len*gd.unit_size);
#endif
    }


    // Wait all receive & send
#ifndef NO_COM
#ifdef DEBUG_INTERNAL
    cerr << "WAITING local communications to end...\n";
#endif

    padico_mpcircuit_waitAll(rreq, ri);
#ifdef DEBUG_INTERNAL
    cerr << "WAITING local communications to end...ok \n";
#endif
#endif
  }
}

Here is the call graph for this function: