tbb_machine.h

00001 /*
00002     Copyright 2005-2010 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_machine_H
00022 #define __TBB_machine_H
00023 
00024 #include "tbb_stddef.h"
00025 
00026 #if _WIN32||_WIN64
00027 
00028 #ifdef _MANAGED
00029 #pragma managed(push, off)
00030 #endif
00031 
00032 #if __MINGW32__
00033 #include "machine/linux_ia32.h"
00034 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00035 #define __TBB_Yield()  SwitchToThread()
00036 #elif defined(_M_IX86)
00037 #include "machine/windows_ia32.h"
00038 #elif defined(_M_AMD64) 
00039 #include "machine/windows_intel64.h"
00040 #elif _XBOX 
00041 #include "machine/xbox360_ppc.h"
00042 #else
00043 #error Unsupported platform
00044 #endif
00045 
00046 #ifdef _MANAGED
00047 #pragma managed(pop)
00048 #endif
00049 
00050 #elif __linux__ || __FreeBSD__
00051 
00052 #if __i386__
00053 #include "machine/linux_ia32.h"
00054 #elif __x86_64__
00055 #include "machine/linux_intel64.h"
00056 #elif __ia64__
00057 #include "machine/linux_ia64.h"
00058 #endif
00059 
00060 #elif __APPLE__
00061 
00062 #if __i386__
00063 #include "machine/linux_ia32.h"
00064 #elif __x86_64__
00065 #include "machine/linux_intel64.h"
00066 #elif __POWERPC__
00067 #include "machine/mac_ppc.h"
00068 #endif
00069 
00070 #elif _AIX
00071 
00072 #include "machine/ibm_aix51.h"
00073 
00074 #elif __sun || __SUNPRO_CC
00075 
00076 #define __asm__ asm 
00077 #define __volatile__ volatile
00078 #if __i386  || __i386__
00079 #include "machine/linux_ia32.h"
00080 #elif __x86_64__
00081 #include "machine/linux_intel64.h"
00082 #elif __sparc
00083 #include "machine/sunos_sparc.h"
00084 #endif
00085 
00086 #endif
00087 
00088 #if    !defined(__TBB_CompareAndSwap4) \
00089     || !defined(__TBB_CompareAndSwap8) \
00090     || !defined(__TBB_Yield)           \
00091     || !defined(__TBB_release_consistency_helper)
00092 #error Minimal requirements for tbb_machine.h not satisfied 
00093 #endif
00094 
00095 #ifndef __TBB_load_with_acquire
00097     template<typename T>
00098     inline T __TBB_load_with_acquire(const volatile T& location) {
00099         T temp = location;
00100         __TBB_release_consistency_helper();
00101         return temp;
00102     }
00103 #endif
00104 
00105 #ifndef __TBB_store_with_release
00107     template<typename T, typename V>
00108     inline void __TBB_store_with_release(volatile T& location, V value) {
00109         __TBB_release_consistency_helper();
00110         location = T(value); 
00111     }
00112 #endif
00113 
00114 #ifndef __TBB_Pause
00115     inline void __TBB_Pause(int32_t) {
00116         __TBB_Yield();
00117     }
00118 #endif
00119 
00120 namespace tbb {
00121 namespace internal {
00122 
00124 
00125 class atomic_backoff {
00127 
00129     static const int32_t LOOPS_BEFORE_YIELD = 16;
00130     int32_t count;
00131 public:
00132     atomic_backoff() : count(1) {}
00133 
00135     void pause() {
00136         if( count<=LOOPS_BEFORE_YIELD ) {
00137             __TBB_Pause(count);
00138             // Pause twice as long the next time.
00139             count*=2;
00140         } else {
00141             // Pause is so long that we might as well yield CPU to scheduler.
00142             __TBB_Yield();
00143         }
00144     }
00145 
00146     // pause for a few times and then return false immediately.
00147     bool bounded_pause() {
00148         if( count<=LOOPS_BEFORE_YIELD ) {
00149             __TBB_Pause(count);
00150             // Pause twice as long the next time.
00151             count*=2;
00152             return true;
00153         } else {
00154             return false;
00155         }
00156     }
00157 
00158     void reset() {
00159         count = 1;
00160     }
00161 };
00162 
00164 
00165 template<typename T, typename U>
00166 void spin_wait_while_eq( const volatile T& location, U value ) {
00167     atomic_backoff backoff;
00168     while( location==value ) backoff.pause();
00169 }
00170 
00172 
00173 template<typename T, typename U>
00174 void spin_wait_until_eq( const volatile T& location, const U value ) {
00175     atomic_backoff backoff;
00176     while( location!=value ) backoff.pause();
00177 }
00178 
00179 // T should be unsigned, otherwise sign propagation will break correctness of bit manipulations.
00180 // S should be either 1 or 2, for the mask calculation to work correctly.
00181 // Together, these rules limit applicability of Masked CAS to unsigned char and unsigned short.
00182 template<size_t S, typename T>
00183 inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand ) {
00184     volatile uint32_t * base = (uint32_t*)( (uintptr_t)ptr & ~(uintptr_t)0x3 );
00185 #if __TBB_BIG_ENDIAN
00186     const uint8_t bitoffset = uint8_t( 8*( 4-S - (uintptr_t(ptr) & 0x3) ) );
00187 #else
00188     const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) );
00189 #endif
00190     const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset;
00191     atomic_backoff b;
00192     uint32_t result;
00193     for(;;) {
00194         result = *base; // reload the base value which might change during the pause
00195         uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset );
00196         uint32_t new_value = ( result & ~mask ) | ( value << bitoffset );
00197         // __TBB_CompareAndSwap4 presumed to have full fence. 
00198         result = __TBB_CompareAndSwap4( base, new_value, old_value );
00199         if(  result==old_value               // CAS succeeded
00200           || ((result^old_value)&mask)!=0 )  // CAS failed and the bits of interest have changed
00201             break;
00202         else                                 // CAS failed but the bits of interest left unchanged
00203             b.pause();
00204     }
00205     return T((result & mask) >> bitoffset);
00206 }
00207 
00208 template<size_t S, typename T>
00209 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand ) { 
00210     return __TBB_CompareAndSwapW((T *)ptr,value,comparand);
00211 }
00212 
00213 template<>
00214 inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
00215 #ifdef __TBB_CompareAndSwap1
00216     return __TBB_CompareAndSwap1(ptr,value,comparand);
00217 #else
00218     return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,value,comparand);
00219 #endif
00220 }
00221 
00222 template<>
00223 inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) {
00224 #ifdef __TBB_CompareAndSwap2
00225     return __TBB_CompareAndSwap2(ptr,value,comparand);
00226 #else
00227     return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,value,comparand);
00228 #endif
00229 }
00230 
00231 template<>
00232 inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) { 
00233     return __TBB_CompareAndSwap4(ptr,value,comparand);
00234 }
00235 
00236 template<>
00237 inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) { 
00238     return __TBB_CompareAndSwap8(ptr,value,comparand);
00239 }
00240 
00241 template<size_t S, typename T>
00242 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
00243     atomic_backoff b;
00244     T result;
00245     for(;;) {
00246         result = *reinterpret_cast<volatile T *>(ptr);
00247         // __TBB_CompareAndSwapGeneric presumed to have full fence. 
00248         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result ) 
00249             break;
00250         b.pause();
00251     }
00252     return result;
00253 }
00254 
00255 template<size_t S, typename T>
00256 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
00257     atomic_backoff b;
00258     T result;
00259     for(;;) {
00260         result = *reinterpret_cast<volatile T *>(ptr);
00261         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00262         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result ) 
00263             break;
00264         b.pause();
00265     }
00266     return result;
00267 }
00268 
00269 // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as 
00270 // strict as type T.  Type type should have a trivial default constructor and destructor, so that
00271 // arrays of that type can be declared without initializers.  
00272 // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
00273 // to a type bigger than T.
00274 // The default definition here works on machines where integers are naturally aligned and the
00275 // strictest alignment is 16.
00276 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
00277 
00278 #if __GNUC__ || __SUNPRO_CC
00279 struct __TBB_machine_type_with_strictest_alignment {
00280     int member[4];
00281 } __attribute__((aligned(16)));
00282 #elif _MSC_VER
00283 __declspec(align(16)) struct __TBB_machine_type_with_strictest_alignment {
00284     int member[4];
00285 };
00286 #else
00287 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) or __TBB_machine_type_with_strictest_alignment
00288 #endif
00289 
00290 template<size_t N> struct type_with_alignment {__TBB_machine_type_with_strictest_alignment member;};
00291 template<> struct type_with_alignment<1> { char member; };
00292 template<> struct type_with_alignment<2> { uint16_t member; };
00293 template<> struct type_with_alignment<4> { uint32_t member; };
00294 template<> struct type_with_alignment<8> { uint64_t member; };
00295 
00296 #if _MSC_VER||defined(__GNUC__)&&__GNUC__==3 && __GNUC_MINOR__<=2  
00298 
00300 template<size_t Size, typename T> 
00301 struct work_around_alignment_bug {
00302 #if _MSC_VER
00303     static const size_t alignment = __alignof(T);
00304 #else
00305     static const size_t alignment = __alignof__(T);
00306 #endif
00307 };
00308 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
00309 #elif __GNUC__ || __SUNPRO_CC
00310 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__alignof__(T)>
00311 #else
00312 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) __TBB_machine_type_with_strictest_alignment
00313 #endif
00314 #endif  /* ____TBB_TypeWithAlignmentAtLeastAsStrict */
00315 
00316 // Template class here is to avoid instantiation of the static data for modules that don't use it
00317 template<typename T>
00318 struct reverse {
00319     static const T byte_table[256];
00320 };
00321 // An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
00322 // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
00323 template<typename T>
00324 const T reverse<T>::byte_table[256] = {
00325     0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
00326     0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
00327     0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
00328     0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
00329     0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
00330     0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
00331     0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
00332     0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
00333     0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
00334     0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
00335     0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
00336     0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
00337     0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
00338     0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
00339     0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
00340     0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
00341 };
00342 
00343 } // namespace internal
00344 } // namespace tbb
00345 
00346 #ifndef __TBB_CompareAndSwap1
00347 #define __TBB_CompareAndSwap1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t>
00348 #endif
00349 
00350 #ifndef __TBB_CompareAndSwap2 
00351 #define __TBB_CompareAndSwap2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t>
00352 #endif
00353 
00354 #ifndef __TBB_CompareAndSwapW
00355 #define __TBB_CompareAndSwapW tbb::internal::__TBB_CompareAndSwapGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00356 #endif
00357 
00358 #ifndef __TBB_FetchAndAdd1
00359 #define __TBB_FetchAndAdd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t>
00360 #endif
00361 
00362 #ifndef __TBB_FetchAndAdd2
00363 #define __TBB_FetchAndAdd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t>
00364 #endif
00365 
00366 #ifndef __TBB_FetchAndAdd4
00367 #define __TBB_FetchAndAdd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t>
00368 #endif
00369 
00370 #ifndef __TBB_FetchAndAdd8
00371 #define __TBB_FetchAndAdd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t>
00372 #endif
00373 
00374 #ifndef __TBB_FetchAndAddW
00375 #define __TBB_FetchAndAddW tbb::internal::__TBB_FetchAndAddGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00376 #endif
00377 
00378 #ifndef __TBB_FetchAndStore1
00379 #define __TBB_FetchAndStore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t>
00380 #endif
00381 
00382 #ifndef __TBB_FetchAndStore2
00383 #define __TBB_FetchAndStore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t>
00384 #endif
00385 
00386 #ifndef __TBB_FetchAndStore4
00387 #define __TBB_FetchAndStore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t>
00388 #endif
00389 
00390 #ifndef __TBB_FetchAndStore8
00391 #define __TBB_FetchAndStore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t>
00392 #endif
00393 
00394 #ifndef __TBB_FetchAndStoreW
00395 #define __TBB_FetchAndStoreW tbb::internal::__TBB_FetchAndStoreGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00396 #endif
00397 
00398 #if __TBB_DECL_FENCED_ATOMICS
00399 
00400 #ifndef __TBB_CompareAndSwap1__TBB_full_fence
00401 #define __TBB_CompareAndSwap1__TBB_full_fence __TBB_CompareAndSwap1
00402 #endif 
00403 #ifndef __TBB_CompareAndSwap1acquire
00404 #define __TBB_CompareAndSwap1acquire __TBB_CompareAndSwap1__TBB_full_fence
00405 #endif 
00406 #ifndef __TBB_CompareAndSwap1release
00407 #define __TBB_CompareAndSwap1release __TBB_CompareAndSwap1__TBB_full_fence
00408 #endif 
00409 
00410 #ifndef __TBB_CompareAndSwap2__TBB_full_fence
00411 #define __TBB_CompareAndSwap2__TBB_full_fence __TBB_CompareAndSwap2
00412 #endif
00413 #ifndef __TBB_CompareAndSwap2acquire
00414 #define __TBB_CompareAndSwap2acquire __TBB_CompareAndSwap2__TBB_full_fence
00415 #endif
00416 #ifndef __TBB_CompareAndSwap2release
00417 #define __TBB_CompareAndSwap2release __TBB_CompareAndSwap2__TBB_full_fence
00418 #endif
00419 
00420 #ifndef __TBB_CompareAndSwap4__TBB_full_fence
00421 #define __TBB_CompareAndSwap4__TBB_full_fence __TBB_CompareAndSwap4
00422 #endif 
00423 #ifndef __TBB_CompareAndSwap4acquire
00424 #define __TBB_CompareAndSwap4acquire __TBB_CompareAndSwap4__TBB_full_fence
00425 #endif 
00426 #ifndef __TBB_CompareAndSwap4release
00427 #define __TBB_CompareAndSwap4release __TBB_CompareAndSwap4__TBB_full_fence
00428 #endif 
00429 
00430 #ifndef __TBB_CompareAndSwap8__TBB_full_fence
00431 #define __TBB_CompareAndSwap8__TBB_full_fence __TBB_CompareAndSwap8
00432 #endif
00433 #ifndef __TBB_CompareAndSwap8acquire
00434 #define __TBB_CompareAndSwap8acquire __TBB_CompareAndSwap8__TBB_full_fence
00435 #endif
00436 #ifndef __TBB_CompareAndSwap8release
00437 #define __TBB_CompareAndSwap8release __TBB_CompareAndSwap8__TBB_full_fence
00438 #endif
00439 
00440 #ifndef __TBB_FetchAndAdd1__TBB_full_fence
00441 #define __TBB_FetchAndAdd1__TBB_full_fence __TBB_FetchAndAdd1
00442 #endif
00443 #ifndef __TBB_FetchAndAdd1acquire
00444 #define __TBB_FetchAndAdd1acquire __TBB_FetchAndAdd1__TBB_full_fence
00445 #endif
00446 #ifndef __TBB_FetchAndAdd1release
00447 #define __TBB_FetchAndAdd1release __TBB_FetchAndAdd1__TBB_full_fence
00448 #endif
00449 
00450 #ifndef __TBB_FetchAndAdd2__TBB_full_fence
00451 #define __TBB_FetchAndAdd2__TBB_full_fence __TBB_FetchAndAdd2
00452 #endif
00453 #ifndef __TBB_FetchAndAdd2acquire
00454 #define __TBB_FetchAndAdd2acquire __TBB_FetchAndAdd2__TBB_full_fence
00455 #endif
00456 #ifndef __TBB_FetchAndAdd2release
00457 #define __TBB_FetchAndAdd2release __TBB_FetchAndAdd2__TBB_full_fence
00458 #endif
00459 
00460 #ifndef __TBB_FetchAndAdd4__TBB_full_fence
00461 #define __TBB_FetchAndAdd4__TBB_full_fence __TBB_FetchAndAdd4
00462 #endif
00463 #ifndef __TBB_FetchAndAdd4acquire
00464 #define __TBB_FetchAndAdd4acquire __TBB_FetchAndAdd4__TBB_full_fence
00465 #endif
00466 #ifndef __TBB_FetchAndAdd4release
00467 #define __TBB_FetchAndAdd4release __TBB_FetchAndAdd4__TBB_full_fence
00468 #endif
00469 
00470 #ifndef __TBB_FetchAndAdd8__TBB_full_fence
00471 #define __TBB_FetchAndAdd8__TBB_full_fence __TBB_FetchAndAdd8
00472 #endif
00473 #ifndef __TBB_FetchAndAdd8acquire
00474 #define __TBB_FetchAndAdd8acquire __TBB_FetchAndAdd8__TBB_full_fence
00475 #endif
00476 #ifndef __TBB_FetchAndAdd8release
00477 #define __TBB_FetchAndAdd8release __TBB_FetchAndAdd8__TBB_full_fence
00478 #endif
00479 
00480 #ifndef __TBB_FetchAndStore1__TBB_full_fence
00481 #define __TBB_FetchAndStore1__TBB_full_fence __TBB_FetchAndStore1
00482 #endif
00483 #ifndef __TBB_FetchAndStore1acquire
00484 #define __TBB_FetchAndStore1acquire __TBB_FetchAndStore1__TBB_full_fence
00485 #endif
00486 #ifndef __TBB_FetchAndStore1release
00487 #define __TBB_FetchAndStore1release __TBB_FetchAndStore1__TBB_full_fence
00488 #endif
00489 
00490 #ifndef __TBB_FetchAndStore2__TBB_full_fence
00491 #define __TBB_FetchAndStore2__TBB_full_fence __TBB_FetchAndStore2
00492 #endif
00493 #ifndef __TBB_FetchAndStore2acquire
00494 #define __TBB_FetchAndStore2acquire __TBB_FetchAndStore2__TBB_full_fence
00495 #endif
00496 #ifndef __TBB_FetchAndStore2release
00497 #define __TBB_FetchAndStore2release __TBB_FetchAndStore2__TBB_full_fence
00498 #endif
00499 
00500 #ifndef __TBB_FetchAndStore4__TBB_full_fence
00501 #define __TBB_FetchAndStore4__TBB_full_fence __TBB_FetchAndStore4
00502 #endif
00503 #ifndef __TBB_FetchAndStore4acquire
00504 #define __TBB_FetchAndStore4acquire __TBB_FetchAndStore4__TBB_full_fence
00505 #endif
00506 #ifndef __TBB_FetchAndStore4release
00507 #define __TBB_FetchAndStore4release __TBB_FetchAndStore4__TBB_full_fence
00508 #endif
00509 
00510 #ifndef __TBB_FetchAndStore8__TBB_full_fence
00511 #define __TBB_FetchAndStore8__TBB_full_fence __TBB_FetchAndStore8
00512 #endif
00513 #ifndef __TBB_FetchAndStore8acquire
00514 #define __TBB_FetchAndStore8acquire __TBB_FetchAndStore8__TBB_full_fence
00515 #endif
00516 #ifndef __TBB_FetchAndStore8release
00517 #define __TBB_FetchAndStore8release __TBB_FetchAndStore8__TBB_full_fence
00518 #endif
00519 
00520 #endif // __TBB_DECL_FENCED_ATOMICS
00521 
00522 // Special atomic functions
00523 #ifndef __TBB_FetchAndAddWrelease
00524 #define __TBB_FetchAndAddWrelease __TBB_FetchAndAddW
00525 #endif
00526 
00527 #ifndef __TBB_FetchAndIncrementWacquire
00528 #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
00529 #endif
00530 
00531 #ifndef __TBB_FetchAndDecrementWrelease
00532 #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
00533 #endif
00534 
00535 #if __TBB_WORDSIZE==4
00536 // On 32-bit platforms, "atomic.h" requires definition of __TBB_Store8 and __TBB_Load8
00537 #ifndef __TBB_Store8
00538 inline void __TBB_Store8 (volatile void *ptr, int64_t value) {
00539     tbb::internal::atomic_backoff b;
00540     for(;;) {
00541         int64_t result = *(int64_t *)ptr;
00542         if( __TBB_CompareAndSwap8(ptr,value,result)==result ) break;
00543         b.pause();
00544     }
00545 }
00546 #endif
00547 
00548 #ifndef __TBB_Load8
00549 inline int64_t __TBB_Load8 (const volatile void *ptr) {
00550     int64_t result = *(int64_t *)ptr;
00551     result = __TBB_CompareAndSwap8((volatile void *)ptr,result,result);
00552     return result;
00553 }
00554 #endif
00555 #endif /* __TBB_WORDSIZE==4 */
00556 
00557 #ifndef __TBB_Log2
00558 inline intptr_t __TBB_Log2( uintptr_t x ) {
00559     if( x==0 ) return -1;
00560     intptr_t result = 0;
00561     uintptr_t tmp;
00562 #if __TBB_WORDSIZE>=8
00563     if( (tmp = x>>32) ) { x=tmp; result += 32; }
00564 #endif
00565     if( (tmp = x>>16) ) { x=tmp; result += 16; }
00566     if( (tmp = x>>8) )  { x=tmp; result += 8; }
00567     if( (tmp = x>>4) )  { x=tmp; result += 4; }
00568     if( (tmp = x>>2) )  { x=tmp; result += 2; }
00569     return (x&2)? result+1: result;
00570 }
00571 #endif
00572 
00573 #ifndef __TBB_AtomicOR
00574 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
00575     tbb::internal::atomic_backoff b;
00576     for(;;) {
00577         uintptr_t tmp = *(volatile uintptr_t *)operand;
00578         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
00579         if( result==tmp ) break;
00580         b.pause();
00581     }
00582 }
00583 #endif
00584 
00585 #ifndef __TBB_AtomicAND
00586 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
00587     tbb::internal::atomic_backoff b;
00588     for(;;) {
00589         uintptr_t tmp = *(volatile uintptr_t *)operand;
00590         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
00591         if( result==tmp ) break;
00592         b.pause();
00593     }
00594 }
00595 #endif
00596 
00597 #ifndef __TBB_TryLockByte
00598 inline bool __TBB_TryLockByte( unsigned char &flag ) {
00599     return __TBB_CompareAndSwap1(&flag,1,0)==0;
00600 }
00601 #endif
00602 
00603 #ifndef __TBB_LockByte
00604 inline uintptr_t __TBB_LockByte( unsigned char& flag ) {
00605     if ( !__TBB_TryLockByte(flag) ) {
00606         tbb::internal::atomic_backoff b;
00607         do {
00608             b.pause();
00609         } while ( !__TBB_TryLockByte(flag) );
00610     }
00611     return 0;
00612 }
00613 #endif
00614 
00615 #ifndef __TBB_ReverseByte
00616 inline unsigned char __TBB_ReverseByte(unsigned char src) {
00617     return tbb::internal::reverse<unsigned char>::byte_table[src];
00618 }
00619 #endif
00620 
00621 template<typename T>
00622 T __TBB_ReverseBits(T src)
00623 {
00624     T dst;
00625     unsigned char *original = (unsigned char *) &src;
00626     unsigned char *reversed = (unsigned char *) &dst;
00627 
00628     for( int i = sizeof(T)-1; i >= 0; i-- )
00629         reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
00630 
00631     return dst;
00632 }
00633 
00634 #endif /* __TBB_machine_H */

Copyright © 2005-2010 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.