parallel_reduce.h

00001 /*
00002     Copyright 2005-2012 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_parallel_reduce_H
00022 #define __TBB_parallel_reduce_H
00023 
00024 #include <new>
00025 #include "task.h"
00026 #include "aligned_space.h"
00027 #include "partitioner.h"
00028 #include "tbb_profiling.h"
00029 
00030 namespace tbb {
00031 
00032 namespace interface6 {
00034 namespace internal {
00035 
00036     using namespace tbb::internal;
00037 
00039     enum {
00040         root_task, left_child, right_child
00041     };
00042 
00044     typedef char reduction_context;
00045 
00047 
00048     template<typename Body>
00049     class finish_reduce: public flag_task {
00051         bool has_right_zombie;
00052         const reduction_context my_context;
00053         Body* my_body;
00054         aligned_space<Body,1> zombie_space;
00055         finish_reduce( reduction_context context_ ) :
00056             has_right_zombie(false), // TODO: substitute by flag_task::child_stolen?
00057             my_context(context_),
00058             my_body(NULL)
00059         {
00060         }
00061         task* execute() {
00062             if( has_right_zombie ) {
00063                 // Right child was stolen.
00064                 Body* s = zombie_space.begin();
00065                 my_body->join( *s );
00066                 s->~Body();
00067             }
00068             if( my_context==left_child )
00069                 itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body );
00070             return NULL;
00071         }
00072         template<typename Range,typename Body_, typename Partitioner>
00073         friend class start_reduce;
00074     };
00075 
00077 
00078     template<typename Range, typename Body, typename Partitioner>
00079     class start_reduce: public task {
00080         typedef finish_reduce<Body> finish_type;
00081         Body* my_body;
00082         Range my_range;
00083         typename Partitioner::task_partition_type my_partition;
00084         reduction_context my_context; // TODO: factor out into start_reduce_base
00085         /*override*/ task* execute();
00086         template<typename Body_>
00087         friend class finish_reduce;
00088 
00089 public:
00091         start_reduce( const Range& range, Body* body, Partitioner& partitioner ) :
00092             my_body(body),
00093             my_range(range),
00094             my_partition(partitioner),
00095             my_context(root_task)
00096         {
00097         }
00099 
00100         start_reduce( start_reduce& parent_, split ) :
00101             my_body(parent_.my_body),
00102             my_range(parent_.my_range,split()),
00103             my_partition(parent_.my_partition,split()),
00104             my_context(right_child)
00105         {
00106             my_partition.set_affinity(*this);
00107             parent_.my_context = left_child;
00108         }
00110 
00111         start_reduce( start_reduce& parent_, const Range& r, depth_t d ) :
00112             my_body(parent_.my_body),
00113             my_range(r),
00114             my_partition(parent_.my_partition,split()),
00115             my_context(right_child)
00116         {
00117             my_partition.set_affinity(*this);
00118             my_partition.align_depth( d );
00119             parent_.my_context = left_child;
00120         }
00122         /*override*/ void note_affinity( affinity_id id ) {
00123             my_partition.note_affinity( id );
00124         }
00125         static void run( const Range& range, Body& body, Partitioner& partitioner ) {
00126             if( !range.empty() ) {
00127 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
00128                 task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) );
00129 #else
00130                 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
00131                 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
00132                 task_group_context context;
00133                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
00134 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
00135             }
00136         }
00137 #if __TBB_TASK_GROUP_CONTEXT
00138         static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
00139             if( !range.empty() )
00140                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
00141         }
00142 #endif /* __TBB_TASK_GROUP_CONTEXT */
00144         finish_type *create_continuation() {
00145             return new( allocate_continuation() ) finish_type(my_context);
00146         }
00148         void run_body( Range &r ) { (*my_body)( r ); }
00149     };
00150     template<typename Range, typename Body, typename Partitioner>
00151     task* start_reduce<Range,Body,Partitioner>::execute() {
00152         my_partition.check_being_stolen( *this );
00153         if( my_context==right_child ) {
00154             finish_type* parent_ptr = static_cast<finish_type*>(parent());
00155             if( !itt_load_word_with_acquire(parent_ptr->my_body) ) { // TODO: replace by is_stolen_task() or by parent_ptr->ref_count() == 2???
00156                 my_body = new( parent_ptr->zombie_space.begin() ) Body(*my_body,split());
00157                 parent_ptr->has_right_zombie = true;
00158             }
00159         } else __TBB_ASSERT(my_context==root_task,NULL);// because left leaf spawns right leafs without recycling
00160         my_partition.execute(*this, my_range);
00161         if( my_context==left_child ) {
00162             finish_type* parent_ptr = static_cast<finish_type*>(parent());
00163             __TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),NULL);
00164             itt_store_word_with_release(parent_ptr->my_body, my_body );
00165         }
00166         return NULL;
00167     }
00168 
00169 #if TBB_PREVIEW_DETERMINISTIC_REDUCE
00171 
00172     template<typename Body>
00173     class finish_deterministic_reduce: public task {
00174         Body &my_left_body;
00175         Body my_right_body;
00176 
00177         finish_deterministic_reduce( Body &body ) :
00178             my_left_body( body ),
00179             my_right_body( body, split() )
00180         {
00181         }
00182         task* execute() {
00183             my_left_body.join( my_right_body );
00184             return NULL;
00185         }
00186         template<typename Range,typename Body_>
00187         friend class start_deterministic_reduce;
00188     };
00189 
00191 
00192     template<typename Range, typename Body>
00193     class start_deterministic_reduce: public task {
00194         typedef finish_deterministic_reduce<Body> finish_type;
00195         Body &my_body;
00196         Range my_range;
00197         /*override*/ task* execute();
00198 
00200         start_deterministic_reduce( const Range& range, Body& body ) :
00201             my_body( body ),
00202             my_range( range )
00203         {
00204         }
00206 
00207         start_deterministic_reduce( start_deterministic_reduce& parent_, finish_type& c ) :
00208             my_body( c.my_right_body ),
00209             my_range( parent_.my_range, split() )
00210         {
00211         }
00212 
00213 public:
00214         static void run( const Range& range, Body& body ) {
00215             if( !range.empty() ) {
00216 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
00217                 task::spawn_root_and_wait( *new(task::allocate_root()) start_deterministic_reduce(range,&body) );
00218 #else
00219                 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
00220                 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
00221                 task_group_context context;
00222                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
00223 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
00224             }
00225         }
00226 #if __TBB_TASK_GROUP_CONTEXT
00227         static void run( const Range& range, Body& body, task_group_context& context ) {
00228             if( !range.empty() )
00229                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
00230         }
00231 #endif /* __TBB_TASK_GROUP_CONTEXT */
00232     };
00233 
00234     template<typename Range, typename Body>
00235     task* start_deterministic_reduce<Range,Body>::execute() {
00236         if( !my_range.is_divisible() ) {
00237             my_body( my_range );
00238             return NULL;
00239         } else {
00240             finish_type& c = *new( allocate_continuation() ) finish_type( my_body );
00241             recycle_as_child_of(c);
00242             c.set_ref_count(2);
00243             start_deterministic_reduce& b = *new( c.allocate_child() ) start_deterministic_reduce( *this, c );
00244             task::spawn(b);
00245             return this;
00246         }
00247     }
00248 #endif /* TBB_PREVIEW_DETERMINISTIC_REDUCE */
00249 } // namespace internal
00251 } //namespace interfaceX
00252 
00254 namespace internal {
00255     using interface6::internal::start_reduce;
00256 #if TBB_PREVIEW_DETERMINISTIC_REDUCE
00257     using interface6::internal::start_deterministic_reduce;
00258 #endif
00260 
00264     template<typename Range, typename Value, typename RealBody, typename Reduction>
00265     class lambda_reduce_body {
00266 
00267 //FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
00268 //       (might require some performance measurements)
00269 
00270         const Value&     identity_element;
00271         const RealBody&  my_real_body;
00272         const Reduction& my_reduction;
00273         Value            my_value;
00274         lambda_reduce_body& operator= ( const lambda_reduce_body& other );
00275     public:
00276         lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
00277             : identity_element(identity)
00278             , my_real_body(body)
00279             , my_reduction(reduction)
00280             , my_value(identity)
00281         { }
00282         lambda_reduce_body( const lambda_reduce_body& other )
00283             : identity_element(other.identity_element)
00284             , my_real_body(other.my_real_body)
00285             , my_reduction(other.my_reduction)
00286             , my_value(other.my_value)
00287         { }
00288         lambda_reduce_body( lambda_reduce_body& other, tbb::split )
00289             : identity_element(other.identity_element)
00290             , my_real_body(other.my_real_body)
00291             , my_reduction(other.my_reduction)
00292             , my_value(other.identity_element)
00293         { }
00294         void operator()(Range& range) {
00295             my_value = my_real_body(range, const_cast<const Value&>(my_value));
00296         }
00297         void join( lambda_reduce_body& rhs ) {
00298             my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
00299         }
00300         Value result() const {
00301             return my_value;
00302         }
00303     };
00304 
00305 } // namespace internal
00307 
00308 // Requirements on Range concept are documented in blocked_range.h
00309 
00328 
00330 
00331 template<typename Range, typename Body>
00332 void parallel_reduce( const Range& range, Body& body ) {
00333     internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
00334 }
00335 
00337 
00338 template<typename Range, typename Body>
00339 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
00340     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
00341 }
00342 
00344 
00345 template<typename Range, typename Body>
00346 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
00347     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
00348 }
00349 
00351 
00352 template<typename Range, typename Body>
00353 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
00354     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
00355 }
00356 
00357 #if __TBB_TASK_GROUP_CONTEXT
00359 
00360 template<typename Range, typename Body>
00361 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
00362     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
00363 }
00364 
00366 
00367 template<typename Range, typename Body>
00368 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
00369     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
00370 }
00371 
00373 
00374 template<typename Range, typename Body>
00375 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
00376     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
00377 }
00378 #endif /* __TBB_TASK_GROUP_CONTEXT */
00379 
00383 
00384 
00385 template<typename Range, typename Value, typename RealBody, typename Reduction>
00386 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
00387     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00388     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
00389                           ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
00390     return body.result();
00391 }
00392 
00394 
00395 template<typename Range, typename Value, typename RealBody, typename Reduction>
00396 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00397                        const simple_partitioner& partitioner ) {
00398     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00399     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
00400                           ::run(range, body, partitioner );
00401     return body.result();
00402 }
00403 
00405 
00406 template<typename Range, typename Value, typename RealBody, typename Reduction>
00407 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00408                        const auto_partitioner& partitioner ) {
00409     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00410     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
00411                           ::run( range, body, partitioner );
00412     return body.result();
00413 }
00414 
00416 
00417 template<typename Range, typename Value, typename RealBody, typename Reduction>
00418 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00419                        affinity_partitioner& partitioner ) {
00420     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00421     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
00422                                         ::run( range, body, partitioner );
00423     return body.result();
00424 }
00425 
00426 #if __TBB_TASK_GROUP_CONTEXT
00428 
00429 template<typename Range, typename Value, typename RealBody, typename Reduction>
00430 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00431                        const simple_partitioner& partitioner, task_group_context& context ) {
00432     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00433     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
00434                           ::run( range, body, partitioner, context );
00435     return body.result();
00436 }
00437 
00439 
00440 template<typename Range, typename Value, typename RealBody, typename Reduction>
00441 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00442                        const auto_partitioner& partitioner, task_group_context& context ) {
00443     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00444     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
00445                           ::run( range, body, partitioner, context );
00446     return body.result();
00447 }
00448 
00450 
00451 template<typename Range, typename Value, typename RealBody, typename Reduction>
00452 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00453                        affinity_partitioner& partitioner, task_group_context& context ) {
00454     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00455     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
00456                                         ::run( range, body, partitioner, context );
00457     return body.result();
00458 }
00459 #endif /* __TBB_TASK_GROUP_CONTEXT */
00460 
00461 #if TBB_PREVIEW_DETERMINISTIC_REDUCE
00463 
00464 template<typename Range, typename Body>
00465 void parallel_deterministic_reduce( const Range& range, Body& body ) {
00466     internal::start_deterministic_reduce<Range,Body>::run( range, body );
00467 }
00468 
00469 #if __TBB_TASK_GROUP_CONTEXT
00471 
00472 template<typename Range, typename Body>
00473 void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
00474     internal::start_deterministic_reduce<Range,Body>::run( range, body, context );
00475 }
00476 #endif /* __TBB_TASK_GROUP_CONTEXT */
00477 
00481 
00482 
00483 template<typename Range, typename Value, typename RealBody, typename Reduction>
00484 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
00485     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00486     internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
00487                           ::run(range, body);
00488     return body.result();
00489 }
00490 
00491 #if __TBB_TASK_GROUP_CONTEXT
00493 
00494 template<typename Range, typename Value, typename RealBody, typename Reduction>
00495 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00496                        task_group_context& context ) {
00497     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00498     internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
00499                           ::run( range, body, context );
00500     return body.result();
00501 }
00502 #endif /* __TBB_TASK_GROUP_CONTEXT */
00503 #endif /* TBB_PREVIEW_DETERMINISTIC_REDUCE */
00504 
00505 
00506 } // namespace tbb
00507 
00508 #endif /* __TBB_parallel_reduce_H */
00509 

Copyright © 2005-2012 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.