00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef __TBB_parallel_scan_H
00022 #define __TBB_parallel_scan_H
00023
00024 #include "task.h"
00025 #include "aligned_space.h"
00026 #include <new>
00027 #include "partitioner.h"
00028
00029 namespace tbb {
00030
00032
00033 struct pre_scan_tag {
00034 static bool is_final_scan() {return false;}
00035 };
00036
00038
00039 struct final_scan_tag {
00040 static bool is_final_scan() {return true;}
00041 };
00042
00044 namespace internal {
00045
00047
00048 template<typename Range, typename Body>
00049 class final_sum: public task {
00050 public:
00051 Body body;
00052 private:
00053 aligned_space<Range,1> range;
00055 Body* stuff_last;
00056 public:
00057 final_sum( Body& body_ ) :
00058 body(body_,split())
00059 {
00060 poison_pointer(stuff_last);
00061 }
00062 ~final_sum() {
00063 range.begin()->~Range();
00064 }
00065 void finish_construction( const Range& range_, Body* stuff_last_ ) {
00066 new( range.begin() ) Range(range_);
00067 stuff_last = stuff_last_;
00068 }
00069 private:
00070 task* execute() {
00071 body( *range.begin(), final_scan_tag() );
00072 if( stuff_last )
00073 stuff_last->assign(body);
00074 return NULL;
00075 }
00076 };
00077
00079
00080 template<typename Range, typename Body>
00081 class sum_node: public task {
00082 typedef final_sum<Range,Body> final_sum_type;
00083 public:
00084 final_sum_type *incoming;
00085 final_sum_type *body;
00086 Body *stuff_last;
00087 private:
00088 final_sum_type *left_sum;
00089 sum_node *left;
00090 sum_node *right;
00091 bool left_is_final;
00092 Range range;
00093 sum_node( const Range range_, bool left_is_final_ ) :
00094 left_sum(NULL),
00095 left(NULL),
00096 right(NULL),
00097 left_is_final(left_is_final_),
00098 range(range_)
00099 {
00100
00101 poison_pointer(body);
00102 poison_pointer(incoming);
00103 }
00104 task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
00105 if( !n ) {
00106 f.recycle_as_child_of( *this );
00107 f.finish_construction( range_, stuff_last_ );
00108 return &f;
00109 } else {
00110 n->body = &f;
00111 n->incoming = incoming_;
00112 n->stuff_last = stuff_last_;
00113 return n;
00114 }
00115 }
00116 task* execute() {
00117 if( body ) {
00118 if( incoming )
00119 left_sum->body.reverse_join( incoming->body );
00120 recycle_as_continuation();
00121 sum_node& c = *this;
00122 task* b = c.create_child(Range(range,split()),*left_sum,right,left_sum,stuff_last);
00123 task* a = left_is_final ? NULL : c.create_child(range,*body,left,incoming,NULL);
00124 set_ref_count( (a!=NULL)+(b!=NULL) );
00125 body = NULL;
00126 if( a ) spawn(*b);
00127 else a = b;
00128 return a;
00129 } else {
00130 return NULL;
00131 }
00132 }
00133 template<typename Range_,typename Body_,typename Partitioner_>
00134 friend class start_scan;
00135
00136 template<typename Range_,typename Body_>
00137 friend class finish_scan;
00138 };
00139
00141
00142 template<typename Range, typename Body>
00143 class finish_scan: public task {
00144 typedef sum_node<Range,Body> sum_node_type;
00145 typedef final_sum<Range,Body> final_sum_type;
00146 final_sum_type** const sum;
00147 sum_node_type*& return_slot;
00148 public:
00149 final_sum_type* right_zombie;
00150 sum_node_type& result;
00151
00152 task* execute() {
00153 __TBB_ASSERT( result.ref_count()==(result.left!=NULL)+(result.right!=NULL), NULL );
00154 if( result.left )
00155 result.left_is_final = false;
00156 if( right_zombie && sum )
00157 ((*sum)->body).reverse_join(result.left_sum->body);
00158 __TBB_ASSERT( !return_slot, NULL );
00159 if( right_zombie || result.right ) {
00160 return_slot = &result;
00161 } else {
00162 destroy( result );
00163 }
00164 if( right_zombie && !sum && !result.right ) {
00165 destroy(*right_zombie);
00166 right_zombie = NULL;
00167 }
00168 return NULL;
00169 }
00170
00171 finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
00172 sum(sum_),
00173 return_slot(return_slot_),
00174 right_zombie(NULL),
00175 result(result_)
00176 {
00177 __TBB_ASSERT( !return_slot, NULL );
00178 }
00179 ~finish_scan(){
00180 #if __TBB_TASK_GROUP_CONTEXT
00181 if (is_cancelled()) {
00182 if (result.ref_count() == 0) destroy(result);
00183 if (right_zombie) destroy(*right_zombie);
00184 }
00185 #endif
00186 }
00187 };
00188
00190
00191 template<typename Range, typename Body, typename Partitioner=simple_partitioner>
00192 class start_scan: public task {
00193 typedef sum_node<Range,Body> sum_node_type;
00194 typedef final_sum<Range,Body> final_sum_type;
00195 final_sum_type* body;
00197 final_sum_type** sum;
00198 sum_node_type** return_slot;
00200 sum_node_type* parent_sum;
00201 bool is_final;
00202 bool is_right_child;
00203 Range range;
00204 typename Partitioner::partition_type partition;
00205 task* execute();
00206 #if __TBB_TASK_GROUP_CONTEXT
00207 tbb::task_group_context &my_context;
00208 #endif
00209
00210
00211 class task_cleaner: internal::no_copy {
00212 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
00213
00214 internal::sum_node<Range,Body>* my_root;
00215 final_sum_type* my_temp_body;
00216 const Range& my_range;
00217 Body& my_body;
00218 start_pass1_type* my_pass1;
00219 public:
00220 bool do_clean;
00221 task_cleaner(internal::sum_node<Range,Body>* root, final_sum_type* temp_body, const Range& range, Body& body, start_pass1_type* pass1)
00222 : my_root(root), my_temp_body(temp_body), my_range(range), my_body(body), my_pass1(pass1), do_clean(true) {}
00223 ~task_cleaner(){
00224 if (do_clean) {
00225 my_body.assign(my_temp_body->body);
00226 my_temp_body->finish_construction( my_range, NULL );
00227 my_temp_body->destroy(*my_temp_body);
00228 }
00229 }
00230 };
00231
00232 public:
00233 start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_
00234 #if __TBB_TASK_GROUP_CONTEXT
00235 , tbb::task_group_context &context_
00236 #endif
00237 ) :
00238 body(parent_.body),
00239 sum(parent_.sum),
00240 return_slot(&return_slot_),
00241 parent_sum(parent_sum_),
00242 is_final(parent_.is_final),
00243 is_right_child(false),
00244 range(parent_.range,split()),
00245 partition(parent_.partition,split())
00246 #if __TBB_TASK_GROUP_CONTEXT
00247 , my_context(context_)
00248 #endif
00249 {
00250 __TBB_ASSERT( !*return_slot, NULL );
00251 }
00252
00253 start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_
00254 #if __TBB_TASK_GROUP_CONTEXT
00255 , tbb::task_group_context &_context
00256 #endif
00257 ) :
00258 body(&body_),
00259 sum(NULL),
00260 return_slot(&return_slot_),
00261 parent_sum(NULL),
00262 is_final(true),
00263 is_right_child(false),
00264 range(range_),
00265 partition(partitioner_)
00266 #if __TBB_TASK_GROUP_CONTEXT
00267 , my_context (_context)
00268 #endif
00269 {
00270 __TBB_ASSERT( !*return_slot, NULL );
00271 }
00272
00273 static void run( const Range& range, Body& body, const Partitioner& partitioner
00274 #if __TBB_TASK_GROUP_CONTEXT
00275 , task_group_context& context
00276 #endif
00277 ) {
00278 if( !range.empty() ) {
00279 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
00280 internal::sum_node<Range,Body>* root = NULL;
00281 typedef internal::final_sum<Range,Body> final_sum_type;
00282 #if __TBB_TASK_GROUP_CONTEXT
00283 final_sum_type* temp_body = new(task::allocate_root(context)) final_sum_type( body );
00284 start_pass1_type& pass1 = *new(task::allocate_root(context)) start_pass1_type(
00285 root,
00286 range,
00287 *temp_body,
00288 partitioner,
00289 context
00290 );
00291 #else
00292 final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body );
00293 start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
00294 root,
00295 range,
00296 *temp_body,
00297 partitioner );
00298 #endif
00299 task_cleaner my_cleaner(root, temp_body, range, body, &pass1);
00300
00301 task::spawn_root_and_wait( pass1 );
00302 if( root ) {
00303 my_cleaner.do_clean = false;
00304 root->body = temp_body;
00305 root->incoming = NULL;
00306 root->stuff_last = &body;
00307 task::spawn_root_and_wait( *root );
00308 }
00309 }
00310 }
00311 };
00312
00313 template<typename Range, typename Body, typename Partitioner>
00314 task* start_scan<Range,Body,Partitioner>::execute() {
00315 typedef internal::finish_scan<Range,Body> finish_pass1_type;
00316 finish_pass1_type* p = parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
00317
00318
00319
00320 bool treat_as_stolen = is_right_child && (is_stolen_task() || body!=p->result.left_sum);
00321 if( treat_as_stolen ) {
00322
00323 #if __TBB_TASK_GROUP_CONTEXT
00324 p->right_zombie = body = new( allocate_root(my_context) ) final_sum_type(body->body);
00325 #else
00326 p->right_zombie = body = new( allocate_root() ) final_sum_type(body->body);
00327 #endif
00328 is_final = false;
00329 }
00330 task* next_task = NULL;
00331 if( (is_right_child && !treat_as_stolen) || !range.is_divisible() || partition.should_execute_range(*this) ) {
00332 if( is_final )
00333 (body->body)( range, final_scan_tag() );
00334 else if( sum )
00335 (body->body)( range, pre_scan_tag() );
00336 if( sum )
00337 *sum = body;
00338 __TBB_ASSERT( !*return_slot, NULL );
00339 } else {
00340 sum_node_type* result;
00341 if( parent_sum )
00342 result = new(allocate_additional_child_of(*parent_sum)) sum_node_type(range,is_final);
00343 else
00344 #if __TBB_TASK_GROUP_CONTEXT
00345 result = new(task::allocate_root(my_context)) sum_node_type(range,is_final);
00346 #else
00347 result = new(task::allocate_root()) sum_node_type(range,is_final);
00348 #endif
00349 finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*return_slot,sum,*result);
00350
00351 #if __TBB_TASK_GROUP_CONTEXT
00352 start_scan& b = *new( c.allocate_child() ) start_scan( result->right, *this, result, my_context );
00353 #else
00354 start_scan& b = *new( c.allocate_child() ) start_scan( result->right, *this, result );
00355 #endif
00356 b.is_right_child = true;
00357
00358
00359
00360 recycle_as_child_of(c);
00361 c.set_ref_count(2);
00362 c.spawn(b);
00363 sum = &result->left_sum;
00364 return_slot = &result->left;
00365 is_right_child = false;
00366 next_task = this;
00367 parent_sum = result;
00368 __TBB_ASSERT( !*return_slot, NULL );
00369 }
00370 return next_task;
00371 }
00372 }
00374
00375
00376
00394
00396
00397 template<typename Range, typename Body>
00398 void parallel_scan( const Range& range, Body& body ) {
00399 #if __TBB_TASK_GROUP_CONTEXT
00400 task_group_context context;
00401 #endif // __TBB_TASK_GROUP_CONTEXT
00402 internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER()
00403 #if __TBB_TASK_GROUP_CONTEXT
00404 , context
00405 #endif
00406 );
00407 }
00408
00410
00411 template<typename Range, typename Body>
00412 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
00413 #if __TBB_TASK_GROUP_CONTEXT
00414 task_group_context context;
00415 #endif // __TBB_TASK_GROUP_CONTEXT
00416 internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner
00417 #if __TBB_TASK_GROUP_CONTEXT
00418 , context
00419 #endif
00420 );
00421 }
00422
00424
00425 template<typename Range, typename Body>
00426 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
00427 #if __TBB_TASK_GROUP_CONTEXT
00428 task_group_context context;
00429 #endif // __TBB_TASK_GROUP_CONTEXT
00430 internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner
00431 #if __TBB_TASK_GROUP_CONTEXT
00432 , context
00433 #endif
00434 );
00435 }
00436 #if __TBB_TASK_GROUP_CONTEXT
00438
00439 template<typename Range, typename Body>
00440 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner, tbb::task_group_context & context ) {
00441 internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner,context);
00442 }
00443
00445
00446 template<typename Range, typename Body>
00447 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner, tbb::task_group_context & context ) {
00448 internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner,context);
00449 }
00450
00452
00453 template<typename Range, typename Body>
00454 void parallel_scan( const Range& range, Body& body, tbb::task_group_context & context ) {
00455 internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER(),context);
00456 }
00457 #endif
00458
00460
00461 }
00462
00463 #endif
00464