CnC
dist_cnc.h
1 /* *******************************************************************************
2  * Copyright (c) 2007-2014, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * * Redistributions of source code must retain the above copyright notice,
8  * this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of Intel Corporation nor the names of its contributors
13  * may be used to endorse or promote products derived from this software
14  * without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  ********************************************************************************/
27 
28 // ===============================================================================
29 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
30 //
31 // INCLUDE THIS FILE ONLY TO MAKE YOUR PROGRAM READY FOR DISTRIBUTED CnC
32 //
33 // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
34 // ===============================================================================
35 
36 #ifndef __DIST_CNC__H_
37 #define __DIST_CNC__H_
38 
39 /**
40 \page distcnc Running CnC applications on distributed memory
41 
42 In principle, every clean CnC program should be immediately
43 applicable for distributed memory systems. With only a few trivial
44 changes most CnC programs can be made distribution-ready. You will
45 get a binary that runs on shared and distributed memory. Most of
46 the mechanics of data distribution etc. is handled inside the
47 runtime and the programmer does not need to bother about the gory
48 details. Of course, there are a few minor changes needed to make a
49 program distribution-ready, but once that's done, it will run on
50 distributed CnC as well as on "normal" CnC (decided at runtime).
51 
52 \section dc_comm Inter-process communication
53 Conceptually, CnC allows data and computation distribution
54 across any kind of network; currently CnC supports SOCKETS and MPI.
55 
56 \section dc_link Linking for distCnC
57 Support for distributed memory is part of the "normal" CnC
58 distribution, e.g. it comes with the necessary communication
59 libraries (cnc_socket, cnc_mpi). The communication library is
60 loaded on demand at runtime, hence you do not need to link against
61 extra libraries to create distribution-ready applications. Just
62 link your binaries like a "traditional" CnC application (explained
63 in the CnC User Guide, which can be found in the doc directory).
64 \note a distribution-ready CnC application-binary has no dependencies
65  on an MPI library, it can be run on shared memory or over SOCKETS
66  even if no MPI is available on the system
67 
68 Even though it is not a separate package or module in the CNC kit,
69 in the following we will refer to features that are specific for
70 distributed memory with "distCnC".
71 
72 \section dc_prog Making your program distCnC-ready
73 As a distributed version of a CnC program needs to do things which
74 are not required in a shared memory version, the extra code for
75 distCnC is hidden from "normal" CnC headers. To include the
76 features required for a distributed version you need to
77 \code #include <cnc/dist_cnc.h> \endcode
78 instead of \code #include <cnc/cnc.h> \endcode .
79 If you want to be able to create optimized binaries for shared
80 memory and distributed memory from the same source, you might
81 consider protecting distCnC specifics like this:
82  @code
83  #ifdef _DIST_
84  # include <cnc/dist_cnc.h>
85  #else
86  # include <cnc/cnc.h>
87  #endif
88  @endcode
89 
90 In "main", initialize an object CnC::dist_cnc_init< list-of-contexts >
91 before anything else; parameters should be all context-types that
92 you would like to be distributed. Context-types not listed in here
93 will stay local. You may mix local and distributed contexts, but
94 in most cases only one context is needed/used anyway.
95  @code
96  #ifdef _DIST_
97  CnC::dist_cnc_init< my_context_type_1 //, my_context_type_2, ...
98  > _dinit;
99  #endif
100  @endcode
101 
102 Even though the communication between process is entirely handled by
103 the CnC runtime, C++ doesn't allow automatic marshaling/serialization
104 of arbitrary data-types. Hence, if and only if your items and/or tags
105 are non-standard data types, the compiler will notify you about the
106 need for serialization/marshaling capability. If you are using
107 standard data types only then marshaling will be handled by CnC
108 automatically.
109 
110 Marshaling doesn't involve sending messages or alike, it only
111 specifies how an object/variable is packed/unpacked into/from a
112 buffer. Marshaling of structs/classes without pointers or virtual
113 functions can easily be enabled using
114 \code CNC_BITWISE_SERIALIZABLE( type); \endcode
115 others need a "serialize" method or function. The CnC kit comes
116 with an convenient interface for this which is similar to BOOST
117 serialization. It is very simple to use and requires only one
118 function/method for packing and unpacking. See \ref serialization for
119 more details.
120 
121 <b>This is it! Your CnC program will now run on distributed memory!</b>
122 
123 \attention Global variables are evil and must not be used within
124  the execution scope of steps. Read \ref dist_global
125  about how CnC supports global read-only data.
126  Apparently, pointers are nothing else than global
127  variables and hence need special treatment in distCnC
128  (see \ref serialization).
129 \note Even if your program runs on distributed memory, that does not
130  necessarily imply that the trivial extension above will make it
131  run fast. Please consult \ref dist_tuning for the tuning
132  options for distributed memory.
133 
134 The above describes the default "single-program" approach for
135 distribution. Please refer to to CnC::dist_cnc_init for more advanced
136 modes which allow SPMD-style interaction as well as distributing
137 parts of the CnC program over groups of processes.
138 
139 
140 \section dc_run Running distCnC
141 The communication infrastructure used by distCnC is chosen at
142 runtime. By default, the CnC runtime will run your application in
143 shared memory mode. When starting up, the runtime will evaluate
144 the environment variable "DIST_CNC". Currently it accepts the
145 following values
146 - SHMEM : shared memory (default)
147 - SOCKETS : communication through TCP sockets
148 - MPI : using Intel(R) MPI
149 
150 Please see \ref itac on how to profile distributed programs
151 
152 \subsection dc_sockets Using SOCKETS
153 On application start-up, when DIST_CNC=SOCKETS, CnC checks the
154 environment variable "CNC_SOCKET_HOST". If it is set to a number,
155 it will print a contact string and wait for the given number of
156 clients to connect. Usually this means that clients need to be
157 started "manually" as follows: set DIST_CNC=SOCKETS and
158 "CNC_SOCKET_CLIENT" to the given contact string and launch the
159 same executable on the desired machine.
160 
161 You can also manually provide the hostname and port number by
162 setting the enironment variables CNC_SOCKET_HOST and
163 CNC_SOCKET_HOSTNAME.
164 
165 If "CNC_SOCKET_HOST" is not a number it is interpreted as a
166 name of a script. CnC executes the script twice: First with "-n"
167 it expects the script to return the number of clients it will
168 start. The second invocation is expected to launch the client
169 processes. If the returned number of clients if prepended with
170 '+' CnC will assume that the script starts all client processes
171 in one one go when called the second time (without -n). If it's
172 a plain positive integer CnC will run the script once for each
173 client process individually.
174 
175 CnC also sets 2 environment variables that the script can read:
176 - CNC_HOST_EXECUTABLE : the name of the executable that's run
177  by the host process
178 - CNC_HOST_ARGS : the command-line arguments passed to the host
179  process
180 
181 Three example scripts are provided:
182 - misc/distributed/socket/start.sh : starts each client individually
183 - misc/distributed/socket/start_batch.sh : starts all clients in one go
184 - misc/distributed/socket/start_mpirun.sh: uses mpirun to start all clients together
185 
186 All scripts require password-less ssh login (or whatever MPI is
187 configured to use if you use the mpirun script). Set the env var
188 CNC_NUM_CLIENTS to the number of clients you want to start. To
189 facilitate the use of different machines they also read the env var
190 CNC_HOST_FILE. If found, they will read the hostnames to use from the
191 given file (expect one hostname per line). If CNC_HOST_FILE is not
192 specified all clients get started on localhost.
193 
194 For windows, the script "start.bat" does the same as start.sh,
195 except that it will start the clients on the same machine without
196 ssh or alike. Adjust the script to use your preferred remote login
197 mechanism.
198 
199 \subsection dc_mpi MPI
200 CnC comes with a communication layer based on MPI. You need the
201 Intel(R) MPI runtime to use it. You can download a free version of
202 the MPI runtime from
203 http://software.intel.com/en-us/articles/intel-mpi-library/ (under
204 "Resources"). A distCnC application is launched like any other
205 MPI application with mpirun or mpiexec, but DIST_CNC must be set
206 to MPI:
207 \code
208 env DIST_CNC=MPI mpiexec -n 4 my_cnc_program
209 \endcode
210 Alternatively, just run the app as usually (with DIST_CNC=MPI) and
211 control the number (n) of additionally spawned processes with
212 CNC_MPI_SPAWN=n. If host and client applications need to be
213 different, set CNC_MPI_EXECUTABLE to the client-program
214 name. Here's an example:
215 \code
216 env DIST_CNC=MPI CNC_MPI_SPAWN=3 CNC_MPI_EXECUTABLE=cnc_client cnc_host
217 \endcode
218 It starts your host executable "cnc_host" and then spawns 3 additional
219 processes which all execute the client executable "cnc_client".
220 
221 \subsection dc_mic Intel Xeon Phi(TM) (MIC)
222 for CnC a MIC process is just another process where work can be computed
223 on. So all you need to do is
224 - Build your application for MIC (see
225  http://software.intel.com/en-us/articles/intel-concurrent-collections-getting-started)
226 - Start a process with the MIC executable on each MIC card just
227  like on a CPU. Communication and Startup is equivalent to how it
228  works on intel64 (\ref dc_mpi and \ref dc_sockets).
229 
230 \note Of course the normal mechanics for MIC need to be considered
231  (like getting applications and dependent libraries to the MIC
232  first). You'll find documentation about this on IDZ, like
233  <A HREF="http://software.intel.com/en-us/articles/how-to-run-intel-mpi-on-xeon-phi">here</A>
234  and/or <A HREF="http://software.intel.com/en-us/articles/using-the-intel-mpi-library-on-intel-xeon-phi-coprocessor-systems">here</A>
235 \note We recommend starting only 2 threads per MIC-core, e.g. if your
236  card has 60 cores, set CNC_NUM_THREADS=120
237 \note To start different binaries with one mpirun/mpiexec command you
238  can use a syntax like this:<br>
239  mpirun -genv DIST_CNC=MPI -n 2 -host xeon xeonbinary : -n 1 -host mic0 -env CNC_NUM_THREADS=120 micbinary
240 
241 
242 \section def_dist Default Distribution
243 Step instances are distributed across clients and the host. By
244 default, they are distributed in a round-robin fashion. Note that
245 every process can put tags (and so prescribe new step instances).
246 The round-robin distribution decision is made locally on each
247 process (not globally).
248 
249 If the same tag is put multiple times, the default scheduling
250 might execute the multiply prescribed steps on different processes
251 and the preserveTags attribute of tag_collections will then not
252 have the desired effect.
253 
254 The default scheduling is intended primarily as a development aid.
255 your CnC application will be distribution ready with only little effort.
256 In some cases it might lead to good performance, in other cases
257 a sensible distribution is needed to achieve good performance.
258 See \ref dist_tuning.
259 
260 Next: \ref dist_tuning
261 
262 
263 \page dist_tuning Tuning for distributed memory
264 The CnC tuning interface provides convenient ways to control the
265 distribution of work and data across the address spaces. The
266 tuning interface is separate from the actual step-code and its
267 declarative nature allows flexible and productive experiments with
268 different distribution strategies.
269 
270 \section dist_work Distributing the work
271 Let's first look at the distribution of work/steps. You can specify
272 the distribution of work (e.g. steps) across the network by providing
273 a tuner to a step-collection (the second template argument to
274 CnC::step_collection, see \ref tuning). Similar to other tuning
275 features, the tuner defines the distribution plan based on the
276 control-tags and item-tags. For a given instance (identified by the
277 control-tag) the tuner defines the placement of the instance in the
278 communication network. This mechanism allows a declarative definition
279 of the distribution and keeps it separate from the actual program code
280 - you can change the distribution without changing the actual program.
281 
282 The method for distributing steps is called "compute_on". It takes the
283 tag of the step and the context as arguments and has to return the
284 process number to run the step on. The numbering of processes is
285 similar to ranks in MPI. Running on "N" processes, the host process is
286 "0" and the last client "N-1".
287 
288  @code
289  struct my_tuner : public CnC::step_tuner<>
290  {
291  int compute_on( const tag_type & tag, context_type & ) const { return tag % numProcs(); }
292  };
293  @endcode
294 
295 The shown tuner is derived from CnC::step_tuner. To allow a flexible
296 and generic definition of the distribution CnC::step_tuner provides
297 information specific for distributed memory:
298 CnC::tuner_base::numProcs() and CnC::tuner_base::myPid(). Both return
299 the values of the current run of your application. Using those allows
300 defining a distribution plan which adapts to the current runtime
301 configuration.
302 
303 If you wonder how the necessary gets distributed - this will be
304 covered soon. Let's first look at the computation side a bit more
305 closely; but if you can't wait see \ref dist_data.
306 
307 The given tuner above simply distributes the tags in a
308 round-robin fashion by applying the modulo operator on the tag. Here's
309 an example of how a given set of tags would be mapped to 4 processes
310 (e.g. numProcs()==4):
311 \verbatim
312 1 -> 1
313 3 -> 3
314 4 -> 0
315 5 -> 1
316 10 -> 2
317 20 -> 0
318 31 -> 3
319 34 -> 2
320 \endverbatim
321 
322 An example of such a simple tuner is \ref bs_tuner.
323 
324 Now let's do something a little more interesting. Let's assume our tag
325 is a pair of x and y coordinates. To distribute the work per row, we
326 could simply do something like
327 
328  @code
329  struct my_tuner : public CnC::step_tuner<>
330  {
331  int compute_on( const tag_type & tag, context_type & ) const { return tag.y % numProcs(); }
332  };
333  @endcode
334 
335 As you see, the tuner entirely ignores the x-part of the tag. This
336 means that all entries on a given row (identified by tag.y) gets
337 executed on the same process. Similarly, if you want to distribute
338 the work per column instead, you simply change it to
339 
340  @code
341  struct my_tuner : public CnC::step_tuner<>
342  {
343  int compute_on( const tag_type & tag, context_type & ) const { return tag.x % numProcs(); }
344  };
345  @endcode
346 
347 As we'll also see later, you can certainly also conditionally switch
348 between row- and column-wise (or any other) distribution within
349 compute_on.
350 
351 To avoid the afore-mentioned problem of becoming globally
352 inconsistent, you should make sure that the return value is
353 independent of the process it is executed on.
354 
355 CnC provides special values to make working with compute_on more
356 convenient, more generic and more effective:
357 CnC::COMPUTE_ON_LOCAL, CnC::COMPUTE_ON_ROUND_ROBIN,
358 CnC::COMPUTE_ON_ALL, CnC::COMPUTE_ON_ALL_OTHERS.
359 
360 \section dist_data Distributing the data
361 By default, the CnC runtime will deliver data items automatically
362 to where they are needed. In its current form, the C++ API does
363 not express the dependencies between instances of steps and/or
364 items. Hence, without additional information, the runtime does not
365 know what step-instances produce and consume which
366 item-instances. Even when the step-distribution is known
367 automatically automatic distribution of data requires
368 global communication. Apparently this constitutes a considerable
369 bottleneck. The CnC tuner interface provides two ways to reduce
370 this overhead.
371 
372 The ideal, most flexible and most efficient approach is to map
373 items to their consumers. It will convert the default pull-model
374 to a push-model: whenever an item becomes produced, it will be
375 sent only to those processes, which actually need it without any
376 other communication/synchronization. If you can determine which
377 steps are going to consume a given item, you can use the above
378 compute_on to map the consumer step to the actual address
379 spaces. This allows changing the distribution at a single place
380 (compute_on) and the data distribution will be automatically
381 optimized to the minimum needed data transfer.
382 
383 The runtime evaluates the tuner provided to the item-collection
384 when an item is put. If its method consumed_on (from
385 CnC::item_tuner) returns anything other than CnC::CONSUMER_UNKNOWN
386 it will send the item to the returned process id and avoid all the
387 overhead of requesting the item when consumed.
388  @code
389  struct my_tuner : public CnC::item_tuner< tag_type, item_type >
390  {
391  int consumed_on( const tag_type & tag )
392  {
393  return my_step_tuner::consumed_on( consumer_step );
394  }
395  };
396  @endcode
397 
398 As more than one process might consume the item, you
399 can also return a vector of ids (instead of a single id) and the
400 runtime will send the item to all given processes.
401  @code
402  struct my_tuner : public CnC::item_tuner< tag_type, item_type >
403  {
404  std::vector< int > consumed_on( const tag_type & tag )
405  {
406  std::vector< int > consumers;
407  foreach( consumer_step of tag ) {
408  int _tmp = my_step_tuner::consumed_on( consumer_step );
409  consumers.push_back( _tmp );
410  }
411  return consumers;
412  }
413  };
414  @endcode
415 
416 Like for compute_on, CnC provides special values to facilitate and
417 generalize the use of consumed_on: CnC::CONSUMER_UNKNOWN,
418 CnC::CONSUMER_LOCAL, CnC::CONSUMER_ALL and
419 CnC::CONSUMER_ALL_OTHERS.
420 
421 Note that consumed_on can return CnC::CONSUMER_UNKOWN for some
422 item-instances, and process rank(s) for others.
423 
424 Sometimes the program semantics make it easier to think about the
425 producer of an item. CnC provides a mechanism to keep the
426 pull-model but allows declaring the owner/producer of the item. If
427 the producer of an item is specified the CnC-runtime can
428 significantly reduce the communication overhead because it on
429 longer requires global communication to find the owner of the
430 item. For this, simply define the depends-method in your
431 step-tuner (derived from CnC::step_tuner) and provide the
432 owning/producing process as an additional argument.
433 
434  @code
435  struct my_tuner : public CnC::step_tuner<>
436  {
437  int produced_on( const tag_type & tag ) const
438  {
439  return producer_known ? my_step_tuner::consumed_on( tag ) : tag % numProcs();
440  }
441  };
442  @endcode
443 
444 Like for consumed_on, CnC provides special values
445 CnC::PRODUCER_UNKNOWN and CnC::PRODUCER_LOCAL to facilitate and
446 generalize the use of produced_on.
447 
448 The push-model consumed_on smoothly cooperates with the
449 pull-model as long as they don't conflict.
450 
451 \section dist_sync Keeping data and work distribution in sync
452 For a more productive development, you might consider implementing
453 consumed_on by thinking about which other steps (not processes)
454 consume the item. With that knowledge you can easily use the
455 appropriate compute_on function to determine the consuming process.
456 The great benefit here is that you can then change compute
457 distribution (e.g. change compute_on) and the data will automatically
458 follow in an optimal way; data and work distribution will always be in
459 sync. It allows experimenting with different distribution plans with
460 much less trouble and lets you define different strategies at a single
461 place. Here is a simple example code which lets you select different
462 strategies at runtime. Adding a new strategy only requires extending
463 the compute_on function:
464 \ref bs_tuner
465 A more complex example is this one: \ref cholesky_tuner
466 
467 \section dist_global Using global read-only data with distCnC
468 Many algorithms require global data that is initialized once and
469 during computation it stays read-only (dynamic single assignment,
470 DSA). In principle this is aligned with the CnC methodology as
471 long as the initialization is done from the environment. The CnC
472 API allows global DSA data through the context, e.g. you can store
473 global data in the context, initialize it there and then use it in
474 a read-only fashion within your step codes.
475 
476 The internal mechanism works as follows: on remote processes the
477 user context is default constructed and then
478 de-serialized/un-marshaled. On the host, construction and
479 serialization/marshaling is done in a lazy manner, e.g. not
480 before something actually needs being transferred. This allows
481 creating contexts on the host with non-default constructors, but
482 it requires overloading the serialize method of the context. The
483 actual time of transfer is not statically known, the earliest
484 possible time is the first item- or tag-put. All changes to the
485 context until that point will be duplicated remotely, later
486 changes will not.
487 
488 Here is a simple example code which uses this feature:
489 \ref bs_tuner
490 
491 Next: \ref non_cnc
492 **/
493 
494 #ifdef _CnC_H_ALREADY_INCLUDED_
495 #warning dist_cnc.h included after cnc.h. Distribution capabilities will not be activated.
496 #endif
497 
498 #ifndef _DIST_CNC_
499 # define _DIST_CNC_
500 #endif
501 
502 #include <cnc/internal/dist/dist_init.h>
503 
504 namespace CnC {
505  namespace Internal {
506  class void_context;
507  }
508 
509  /// To enable remote CnC you must create one such object. The
510  /// lifetime of the object defines the "scope" of
511  /// distribution. Contexts created in the "scope" of the
512  /// dist_cnc_init objects (e.g. when it exists) will get
513  /// distributed to participating processes (see \ref dc_run).
514  ///
515  /// Usually, a single dist_cnc_init object is created for the
516  /// entire lifetime of a program. e.g. the dist_cnc_init object
517  /// is created right when entering main and (auto-)destructed when
518  /// main terminates. In this default mode all processes other than
519  /// the root/host process exit the program when the dist_cnc_init
520  /// objects gets dextructed.
521  ///
522  /// Actually, the current implementation allows only a single
523  /// dist_cnc_init object at a time for every process. Hence, all
524  /// contexts on a given process are distributed in the same way.
525  /// However, an optional parameter/flag allows allows defining the
526  /// processes that actually "share" the dist_cnc_init object (and
527  /// so their contexts). An optional flag/parameter is interpreted
528  /// as a MPI_Comm to be used by the dist_cnc_init scope. This
529  /// allows different groups of processes (defined by the
530  /// MPI_Comm's) to work on different CnC contexts/graphs
531  /// concurrently. If no MPI_Comm was specified (e.g. the default)
532  /// client processes exit the program when the host dist_cnc_init
533  /// object is destructed. If a MPI_Comm is provided they also wait
534  /// until the host process destructs its dist_cnc_init object but
535  /// simply returns from the constructor rather than exiting the
536  /// program. Apparently all this only works when using the MPI
537  /// communication infrastructure.
538  ///
539  /// Additionally, two modes of operation are supported:
540  /// 1. By default, constructing a dist_cnc_init objects blocks all
541  /// processes except the root process in the constructor.
542  /// Hence, code after the object instantiation will be executed
543  /// only on the host process.
544  /// 2. If dist_env is set to true, the constructor returns on all
545  /// processes and execution continues in a SPMD style, e.g. all
546  /// processes continue program execution. The SPMD style mode
547  /// allows alternating between MPI phases and CnC phases. This
548  /// mode is currently supported only using MPI communication.
549  /// You have to ensure that all processes fully completed their
550  /// local context creation before putting any data into a
551  /// context's collection. Similarly, you have to synchronize
552  /// context-destruction. It is recommended to put a MPI_Barrier
553  /// right after instantiating a context and just before it gets
554  /// destructed (e.g. at the end of its scope).
555  ///
556  /// \note It is possible to combine SPMD mode and providing a
557  /// MPI_Comm. You can even change the grouping in phases by
558  /// using different MPI_Comm's at different times of the
559  /// execution. E.g. the lifetime of a dist_cnc_object might
560  /// be a (collective) function call. Make sure each process
561  /// has only single dist_cnc_object alive at each point in
562  /// time.
563  ///
564  /// \note All context classes ever used in the program must be
565  /// referenced as template arguments if they should be
566  /// distributed.
567  /// \note All distributed contexts must have all
568  /// collections they use as members and must be
569  /// default-constructible.
570  /// \note Pointers as tags are not supported by distCnC.
571  ///
572  /// Execution and other internal details described in
573  /// CnC::Internal::dist_init
574  template< class C1, class C2 = Internal::void_context, class C3 = Internal::void_context,
575  class C4 = Internal::void_context, class C5 = Internal::void_context >
576  struct /*CNC_API*/ dist_cnc_init : public Internal::dist_init< C1, C2, C3, C4, C5 >
577  {
578  dist_cnc_init() : Internal::dist_init< C1, C2, C3, C4, C5 >() {}
579  /// \param dist_env enable SPMD-style access to contexts
580  /// \param flag MPI_Comm to be used (MPI only)
581  dist_cnc_init( bool dist_env, long flag = 0 ) : Internal::dist_init< C1, C2, C3, C4, C5 >( flag, dist_env ) {}
582  /// \param dist_env enable SPMD-style access to contexts
583  /// \param flag MPI_Comm to be used (MPI only)
584  dist_cnc_init( long flag, bool dist_env = false ) : Internal::dist_init< C1, C2, C3, C4, C5 >( flag, dist_env ) {}
585  };
586 
587 } // namespace CnC
588 
589 #include <cnc/cnc.h>
590 
591 #endif // __DIST_CNC__H_
CnC API.
Definition: cnc.h:49
dist_cnc_init(long flag, bool dist_env=false)
Definition: dist_cnc.h:584
dist_cnc_init(bool dist_env, long flag=0)
Definition: dist_cnc.h:581