parallel_scan.h

00001 /*
00002     Copyright 2005-2010 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_parallel_scan_H
00022 #define __TBB_parallel_scan_H
00023 
00024 #include "task.h"
00025 #include "aligned_space.h"
00026 #include <new>
00027 #include "partitioner.h"
00028 
00029 namespace tbb {
00030 
00032 
00033 struct pre_scan_tag {
00034     static bool is_final_scan() {return false;}
00035 };
00036 
00038 
00039 struct final_scan_tag {
00040     static bool is_final_scan() {return true;}
00041 };
00042 
00044 namespace internal {
00045 
00047 
00048     template<typename Range, typename Body>
00049     class final_sum: public task {
00050     public:
00051         Body body;
00052     private:
00053         aligned_space<Range,1> range;
00055         Body* stuff_last;
00056     public:
00057         final_sum( Body& body_ ) :
00058             body(body_,split())
00059         {
00060             poison_pointer(stuff_last);
00061         }
00062         ~final_sum() {
00063             range.begin()->~Range();
00064         }     
00065         void finish_construction( const Range& range_, Body* stuff_last_ ) {
00066             new( range.begin() ) Range(range_);
00067             stuff_last = stuff_last_;
00068         }
00069     private:
00070         /*override*/ task* execute() {
00071             body( *range.begin(), final_scan_tag() );
00072             if( stuff_last )
00073                 stuff_last->assign(body);
00074             return NULL;
00075         }
00076     };       
00077 
00079 
00080     template<typename Range, typename Body>
00081     class sum_node: public task {
00082         typedef final_sum<Range,Body> final_sum_type;
00083     public:
00084         final_sum_type *incoming; 
00085         final_sum_type *body;
00086         Body *stuff_last;
00087     private:
00088         final_sum_type *left_sum;
00089         sum_node *left;
00090         sum_node *right;     
00091         bool left_is_final;
00092         Range range;
00093         sum_node( const Range range_, bool left_is_final_ ) : 
00094             left_sum(NULL), 
00095             left(NULL), 
00096             right(NULL), 
00097             left_is_final(left_is_final_), 
00098             range(range_)
00099         {
00100             // Poison fields that will be set by second pass.
00101             poison_pointer(body);
00102             poison_pointer(incoming);
00103         }
00104         task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
00105             if( !n ) {
00106                 f.recycle_as_child_of( *this );
00107                 f.finish_construction( range_, stuff_last_ );
00108                 return &f;
00109             } else {
00110                 n->body = &f;
00111                 n->incoming = incoming_;
00112                 n->stuff_last = stuff_last_;
00113                 return n;
00114             }
00115         }
00116         /*override*/ task* execute() {
00117             if( body ) {
00118                 if( incoming )
00119                     left_sum->body.reverse_join( incoming->body );
00120                 recycle_as_continuation();
00121                 sum_node& c = *this;
00122                 task* b = c.create_child(Range(range,split()),*left_sum,right,left_sum,stuff_last);
00123                 task* a = left_is_final ? NULL : c.create_child(range,*body,left,incoming,NULL);
00124                 set_ref_count( (a!=NULL)+(b!=NULL) );
00125                 body = NULL; 
00126                 if( a ) spawn(*b);
00127                 else a = b;
00128                 return a;
00129             } else {
00130                 return NULL;
00131             }
00132         }
00133         template<typename Range_,typename Body_,typename Partitioner_>
00134         friend class start_scan;
00135 
00136         template<typename Range_,typename Body_>
00137         friend class finish_scan;
00138     };
00139 
00141 
00142     template<typename Range, typename Body>
00143     class finish_scan: public task {
00144         typedef sum_node<Range,Body> sum_node_type;
00145         typedef final_sum<Range,Body> final_sum_type;
00146         final_sum_type** const sum;
00147         sum_node_type*& return_slot;
00148     public:
00149         final_sum_type* right_zombie;
00150         sum_node_type& result;
00151 
00152         /*override*/ task* execute() {
00153             __TBB_ASSERT( result.ref_count()==(result.left!=NULL)+(result.right!=NULL), NULL );
00154             if( result.left )
00155                 result.left_is_final = false;
00156             if( right_zombie && sum ) 
00157                 ((*sum)->body).reverse_join(result.left_sum->body);
00158             __TBB_ASSERT( !return_slot, NULL );
00159             if( right_zombie || result.right ) {
00160                 return_slot = &result;
00161             } else {
00162                 destroy( result );
00163             }
00164             if( right_zombie && !sum && !result.right ) {
00165                 destroy(*right_zombie);
00166                 right_zombie = NULL;
00167             }
00168             return NULL;
00169         }
00170 
00171         finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) : 
00172             sum(sum_),
00173             return_slot(return_slot_), 
00174             right_zombie(NULL),
00175             result(result_)
00176         {
00177             __TBB_ASSERT( !return_slot, NULL );
00178         }
00179         ~finish_scan(){
00180 #if __TBB_TASK_GROUP_CONTEXT
00181             if (is_cancelled()) {
00182                 if (result.ref_count() == 0) destroy(result);
00183                 if (right_zombie) destroy(*right_zombie);
00184             }
00185 #endif
00186         }
00187     };
00188 
00190 
00191     template<typename Range, typename Body, typename Partitioner=simple_partitioner>
00192     class start_scan: public task {
00193         typedef sum_node<Range,Body> sum_node_type;
00194         typedef final_sum<Range,Body> final_sum_type;
00195         final_sum_type* body;
00197         final_sum_type** sum; 
00198         sum_node_type** return_slot;
00200         sum_node_type* parent_sum;
00201         bool is_final;
00202         bool is_right_child;
00203         Range range;
00204         typename Partitioner::partition_type partition;
00205         /*override*/ task* execute();
00206 #if __TBB_TASK_GROUP_CONTEXT
00207         tbb::task_group_context &my_context;
00208 #endif
00209 
00210         // The class is intended to destroy allocated tasks if exception occurs
00211         class task_cleaner: internal::no_copy {
00212             typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
00213 
00214             internal::sum_node<Range,Body>* my_root;
00215             final_sum_type* my_temp_body;
00216             const Range& my_range;
00217             Body& my_body;
00218             start_pass1_type* my_pass1;
00219         public:
00220             bool do_clean; // Set to true if cleanup is required.
00221             task_cleaner(internal::sum_node<Range,Body>* root, final_sum_type* temp_body, const Range& range, Body& body, start_pass1_type* pass1)
00222                 : my_root(root), my_temp_body(temp_body), my_range(range), my_body(body), my_pass1(pass1), do_clean(true) {}
00223             ~task_cleaner(){
00224                 if (do_clean) {
00225                     my_body.assign(my_temp_body->body);
00226                     my_temp_body->finish_construction( my_range, NULL );
00227                     my_temp_body->destroy(*my_temp_body);
00228                 }
00229             }
00230         };
00231 
00232     public:
00233         start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ 
00234 #if __TBB_TASK_GROUP_CONTEXT
00235             , tbb::task_group_context &context_
00236 #endif
00237             ) :
00238             body(parent_.body),
00239             sum(parent_.sum),
00240             return_slot(&return_slot_),
00241             parent_sum(parent_sum_),
00242             is_final(parent_.is_final),
00243             is_right_child(false),
00244             range(parent_.range,split()),
00245             partition(parent_.partition,split())
00246 #if __TBB_TASK_GROUP_CONTEXT
00247           , my_context(context_)
00248 #endif
00249         {
00250             __TBB_ASSERT( !*return_slot, NULL );
00251         }
00252 
00253         start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_
00254 #if __TBB_TASK_GROUP_CONTEXT
00255         , tbb::task_group_context &_context
00256 #endif
00257             ) :
00258             body(&body_),
00259             sum(NULL),
00260             return_slot(&return_slot_),
00261             parent_sum(NULL),
00262             is_final(true),
00263             is_right_child(false),
00264             range(range_),
00265             partition(partitioner_)
00266 #if __TBB_TASK_GROUP_CONTEXT
00267             , my_context (_context)
00268 #endif
00269         {
00270             __TBB_ASSERT( !*return_slot, NULL );
00271         }
00272 
00273         static void run(  const Range& range, Body& body, const Partitioner& partitioner 
00274 #if __TBB_TASK_GROUP_CONTEXT
00275         , task_group_context& context
00276 #endif
00277             ) {
00278             if( !range.empty() ) {
00279                 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
00280                 internal::sum_node<Range,Body>* root = NULL;
00281                 typedef internal::final_sum<Range,Body> final_sum_type;
00282 #if __TBB_TASK_GROUP_CONTEXT
00283                 final_sum_type* temp_body = new(task::allocate_root(context)) final_sum_type( body );
00284                 start_pass1_type& pass1 = *new(task::allocate_root(context)) start_pass1_type(
00285                     /*return_slot=*/root,
00286                     range,
00287                     *temp_body,
00288                     partitioner,
00289                     context
00290                     );
00291 #else
00292                 final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body );
00293                 start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
00294                     /*return_slot=*/root,
00295                     range,
00296                     *temp_body,
00297                     partitioner );
00298 #endif
00299                 task_cleaner my_cleaner(root, temp_body, range, body, &pass1);
00300 
00301                 task::spawn_root_and_wait( pass1 );
00302                 if( root ) {
00303                     my_cleaner.do_clean = false;
00304                     root->body = temp_body;
00305                     root->incoming = NULL;
00306                     root->stuff_last = &body;
00307                     task::spawn_root_and_wait( *root );
00308                 }
00309             }
00310         }
00311     };
00312 
00313     template<typename Range, typename Body, typename Partitioner>
00314     task* start_scan<Range,Body,Partitioner>::execute() {
00315         typedef internal::finish_scan<Range,Body> finish_pass1_type;
00316         finish_pass1_type* p = parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
00317         // Inspecting p->result.left_sum would ordinarily be a race condition.
00318         // But we inspect it only if we are not a stolen task, in which case we
00319         // know that task assigning to p->result.left_sum has completed.
00320         bool treat_as_stolen = is_right_child && (is_stolen_task() || body!=p->result.left_sum);
00321         if( treat_as_stolen ) {
00322             // Invocation is for right child that has been really stolen or needs to be virtually stolen
00323 #if __TBB_TASK_GROUP_CONTEXT
00324             p->right_zombie = body = new( allocate_root(my_context) ) final_sum_type(body->body);
00325 #else
00326             p->right_zombie = body = new( allocate_root() ) final_sum_type(body->body);
00327 #endif
00328             is_final = false;
00329         }
00330         task* next_task = NULL;
00331         if( (is_right_child && !treat_as_stolen) || !range.is_divisible() || partition.should_execute_range(*this) ) {
00332             if( is_final )
00333                 (body->body)( range, final_scan_tag() );
00334             else if( sum )
00335                 (body->body)( range, pre_scan_tag() );
00336             if( sum ) 
00337                 *sum = body;
00338             __TBB_ASSERT( !*return_slot, NULL );
00339         } else {
00340             sum_node_type* result;
00341             if( parent_sum ) 
00342                 result = new(allocate_additional_child_of(*parent_sum)) sum_node_type(range,/*left_is_final=*/is_final);
00343             else
00344 #if __TBB_TASK_GROUP_CONTEXT
00345                 result = new(task::allocate_root(my_context)) sum_node_type(range,/*left_is_final=*/is_final);
00346 #else
00347                 result = new(task::allocate_root()) sum_node_type(range,/*left_is_final=*/is_final);
00348 #endif
00349             finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*return_slot,sum,*result);
00350             // Split off right child
00351 #if __TBB_TASK_GROUP_CONTEXT
00352             start_scan& b = *new( c.allocate_child() ) start_scan( /*return_slot=*/result->right, *this, result, my_context );
00353 #else
00354             start_scan& b = *new( c.allocate_child() ) start_scan( /*return_slot=*/result->right, *this, result );
00355 #endif
00356             b.is_right_child = true;    
00357             // Left child is recycling of *this.  Must recycle this before spawning b, 
00358             // otherwise b might complete and decrement c.ref_count() to zero, which
00359             // would cause c.execute() to run prematurely.
00360             recycle_as_child_of(c);
00361             c.set_ref_count(2);
00362             c.spawn(b);
00363             sum = &result->left_sum;
00364             return_slot = &result->left;
00365             is_right_child = false;
00366             next_task = this;
00367             parent_sum = result; 
00368             __TBB_ASSERT( !*return_slot, NULL );
00369         }
00370         return next_task;
00371     } 
00372 } // namespace internal
00374 
00375 // Requirements on Range concept are documented in blocked_range.h
00376 
00394 
00396 
00397 template<typename Range, typename Body>
00398 void parallel_scan( const Range& range, Body& body ) {
00399 #if __TBB_TASK_GROUP_CONTEXT
00400     task_group_context context;
00401 #endif // __TBB_TASK_GROUP_CONTEXT
00402     internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER()
00403 #if __TBB_TASK_GROUP_CONTEXT
00404         , context
00405 #endif
00406         );
00407 }
00408 
00410 
00411 template<typename Range, typename Body>
00412 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
00413 #if __TBB_TASK_GROUP_CONTEXT
00414     task_group_context context;
00415 #endif // __TBB_TASK_GROUP_CONTEXT
00416     internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner
00417 #if __TBB_TASK_GROUP_CONTEXT
00418         , context
00419 #endif
00420         );
00421 }
00422 
00424 
00425 template<typename Range, typename Body>
00426 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
00427 #if __TBB_TASK_GROUP_CONTEXT
00428     task_group_context context;
00429 #endif // __TBB_TASK_GROUP_CONTEXT
00430     internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner
00431 #if __TBB_TASK_GROUP_CONTEXT
00432         , context
00433 #endif
00434         );
00435 }
00436 #if __TBB_TASK_GROUP_CONTEXT
00438 
00439 template<typename Range, typename Body>
00440 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner, tbb::task_group_context & context ) {
00441     internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner,context);
00442 }
00443 
00445 
00446 template<typename Range, typename Body>
00447 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner, tbb::task_group_context & context ) {
00448     internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner,context);
00449 }
00450 
00452 
00453 template<typename Range, typename Body>
00454 void parallel_scan( const Range& range, Body& body, tbb::task_group_context & context ) {
00455     internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER(),context);
00456 }
00457 #endif
00458 
00460 
00461 } // namespace tbb
00462 
00463 #endif /* __TBB_parallel_scan_H */
00464 

Copyright © 2005-2010 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.