tbb_machine.h

00001 /*
00002     Copyright 2005-2012 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_machine_H
00022 #define __TBB_machine_H
00023 
00116 #include "tbb_stddef.h"
00117 
00118 namespace tbb {
00119 namespace internal {
00120 
00122 // Overridable helpers declarations
00123 //
00124 // A machine/*.h file may choose to define these templates, otherwise it must
00125 // request default implementation by setting appropriate __TBB_USE_GENERIC_XXX macro(s).
00126 //
00127 template <typename T, std::size_t S>
00128 struct machine_load_store;
00129 
00130 template <typename T, std::size_t S>
00131 struct machine_load_store_relaxed;
00132 
00133 template <typename T, std::size_t S>
00134 struct machine_load_store_seq_cst;
00135 //
00136 // End of overridable helpers declarations
00138 
00139 template<size_t S> struct atomic_selector;
00140 
00141 template<> struct atomic_selector<1> {
00142     typedef int8_t word;
00143     inline static word fetch_store ( volatile void* location, word value );
00144 };
00145 
00146 template<> struct atomic_selector<2> {
00147     typedef int16_t word;
00148     inline static word fetch_store ( volatile void* location, word value );
00149 };
00150 
00151 template<> struct atomic_selector<4> {
00152 #if _MSC_VER && !_WIN64
00153     // Work-around that avoids spurious /Wp64 warnings
00154     typedef intptr_t word;
00155 #else
00156     typedef int32_t word;
00157 #endif
00158     inline static word fetch_store ( volatile void* location, word value );
00159 };
00160 
00161 template<> struct atomic_selector<8> {
00162     typedef int64_t word;
00163     inline static word fetch_store ( volatile void* location, word value );
00164 };
00165 
00166 }} // namespaces internal, tbb
00167 
00168 #if _WIN32||_WIN64
00169 
00170 #ifdef _MANAGED
00171 #pragma managed(push, off)
00172 #endif
00173 
00174     #if __MINGW64__ || __MINGW32__
00175         extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00176         #define __TBB_Yield()  SwitchToThread()
00177         #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
00178             #include "machine/gcc_generic.h"
00179         #elif __MINGW64__
00180             #include "machine/linux_intel64.h"
00181         #elif __MINGW32__
00182             #include "machine/linux_ia32.h"
00183         #endif
00184     #elif defined(_M_IX86)
00185         #include "machine/windows_ia32.h"
00186     #elif defined(_M_X64) 
00187         #include "machine/windows_intel64.h"
00188     #elif _XBOX
00189         #include "machine/xbox360_ppc.h"
00190     #endif
00191 
00192 #ifdef _MANAGED
00193 #pragma managed(pop)
00194 #endif
00195 
00196 #elif __linux__ || __FreeBSD__ || __NetBSD__
00197 
00198     #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
00199         #include "machine/gcc_generic.h"
00200     #elif __i386__
00201         #include "machine/linux_ia32.h"
00202     #elif __x86_64__
00203         #include "machine/linux_intel64.h"
00204     #elif __ia64__
00205         #include "machine/linux_ia64.h"
00206     #elif __powerpc__
00207         #include "machine/mac_ppc.h"
00208     #elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT
00209         #include "machine/gcc_generic.h"
00210     #endif
00211     #include "machine/linux_common.h"
00212 
00213 #elif __APPLE__
00214 
00215     #if __i386__
00216         #include "machine/linux_ia32.h"
00217     #elif __x86_64__
00218         #include "machine/linux_intel64.h"
00219     #elif __POWERPC__
00220         #include "machine/mac_ppc.h"
00221     #endif
00222     #include "machine/macos_common.h"
00223 
00224 #elif _AIX
00225 
00226     #include "machine/ibm_aix51.h"
00227 
00228 #elif __sun || __SUNPRO_CC
00229 
00230     #define __asm__ asm
00231     #define __volatile__ volatile
00232 
00233     #if __i386  || __i386__
00234         #include "machine/linux_ia32.h"
00235     #elif __x86_64__
00236         #include "machine/linux_intel64.h"
00237     #elif __sparc
00238         #include "machine/sunos_sparc.h"
00239     #endif
00240     #include <sched.h>
00241 
00242     #define __TBB_Yield() sched_yield()
00243 
00244 #endif /* OS selection */
00245 
00246 #ifndef __TBB_64BIT_ATOMICS
00247     #define __TBB_64BIT_ATOMICS 1
00248 #endif
00249 
00250 // Special atomic functions
00251 #if __TBB_USE_FENCED_ATOMICS
00252     #define __TBB_machine_cmpswp1   __TBB_machine_cmpswp1full_fence
00253     #define __TBB_machine_cmpswp2   __TBB_machine_cmpswp2full_fence
00254     #define __TBB_machine_cmpswp4   __TBB_machine_cmpswp4full_fence
00255     #define __TBB_machine_cmpswp8   __TBB_machine_cmpswp8full_fence
00256 
00257     #if __TBB_WORDSIZE==8
00258         #define __TBB_machine_fetchadd8             __TBB_machine_fetchadd8full_fence
00259         #define __TBB_machine_fetchstore8           __TBB_machine_fetchstore8full_fence
00260         #define __TBB_FetchAndAddWrelease(P,V)      __TBB_machine_fetchadd8release(P,V)
00261         #define __TBB_FetchAndIncrementWacquire(P)  __TBB_machine_fetchadd8acquire(P,1)
00262         #define __TBB_FetchAndDecrementWrelease(P)  __TBB_machine_fetchadd8release(P,(-1))
00263     #else
00264         #error Define macros for 4-byte word, similarly to the above __TBB_WORDSIZE==8 branch.
00265     #endif /* __TBB_WORDSIZE==4 */
00266 #else /* !__TBB_USE_FENCED_ATOMICS */
00267     #define __TBB_FetchAndAddWrelease(P,V)      __TBB_FetchAndAddW(P,V)
00268     #define __TBB_FetchAndIncrementWacquire(P)  __TBB_FetchAndAddW(P,1)
00269     #define __TBB_FetchAndDecrementWrelease(P)  __TBB_FetchAndAddW(P,(-1))
00270 #endif /* !__TBB_USE_FENCED_ATOMICS */
00271 
00272 #if __TBB_WORDSIZE==4
00273     #define __TBB_CompareAndSwapW(P,V,C)    __TBB_machine_cmpswp4(P,V,C)
00274     #define __TBB_FetchAndAddW(P,V)         __TBB_machine_fetchadd4(P,V)
00275     #define __TBB_FetchAndStoreW(P,V)       __TBB_machine_fetchstore4(P,V)
00276 #elif  __TBB_WORDSIZE==8
00277     #if __TBB_USE_GENERIC_DWORD_LOAD_STORE || __TBB_USE_GENERIC_DWORD_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_STORE
00278         #error These macros should only be used on 32-bit platforms.
00279     #endif
00280 
00281     #define __TBB_CompareAndSwapW(P,V,C)    __TBB_machine_cmpswp8(P,V,C)
00282     #define __TBB_FetchAndAddW(P,V)         __TBB_machine_fetchadd8(P,V)
00283     #define __TBB_FetchAndStoreW(P,V)       __TBB_machine_fetchstore8(P,V)
00284 #else /* __TBB_WORDSIZE != 8 */
00285     #error Unsupported machine word size.
00286 #endif /* __TBB_WORDSIZE */
00287 
00288 #ifndef __TBB_Pause
00289     inline void __TBB_Pause(int32_t) {
00290         __TBB_Yield();
00291     }
00292 #endif
00293 
00294 namespace tbb {
00295 
00297 inline void atomic_fence () { __TBB_full_memory_fence(); }
00298 
00299 namespace internal {
00300 
00302 
00303 class atomic_backoff : no_copy {
00305 
00307     static const int32_t LOOPS_BEFORE_YIELD = 16;
00308     int32_t count;
00309 public:
00310     atomic_backoff() : count(1) {}
00311 
00313     void pause() {
00314         if( count<=LOOPS_BEFORE_YIELD ) {
00315             __TBB_Pause(count);
00316             // Pause twice as long the next time.
00317             count*=2;
00318         } else {
00319             // Pause is so long that we might as well yield CPU to scheduler.
00320             __TBB_Yield();
00321         }
00322     }
00323 
00324     // pause for a few times and then return false immediately.
00325     bool bounded_pause() {
00326         if( count<=LOOPS_BEFORE_YIELD ) {
00327             __TBB_Pause(count);
00328             // Pause twice as long the next time.
00329             count*=2;
00330             return true;
00331         } else {
00332             return false;
00333         }
00334     }
00335 
00336     void reset() {
00337         count = 1;
00338     }
00339 };
00340 
00342 
00343 template<typename T, typename U>
00344 void spin_wait_while_eq( const volatile T& location, U value ) {
00345     atomic_backoff backoff;
00346     while( location==value ) backoff.pause();
00347 }
00348 
00350 
00351 template<typename T, typename U>
00352 void spin_wait_until_eq( const volatile T& location, const U value ) {
00353     atomic_backoff backoff;
00354     while( location!=value ) backoff.pause();
00355 }
00356 
00357 //TODO: add static_assert for the requirements stated below
00358 //TODO: check if it works with signed types
00359 
00360 // there are following restrictions/limitations for this operation:
00361 //  - T should be unsigned, otherwise sign propagation will break correctness of bit manipulations.
00362 //  - T should be integer type of at most 4 bytes, for the casts and calculations to work.
00363 //      (Together, these rules limit applicability of Masked CAS to uint8_t and uint16_t only,
00364 //      as it does nothing useful for 4 bytes).
00365 //  - The operation assumes that the architecture consistently uses either little-endian or big-endian:
00366 //      it does not support mixed-endian or page-specific bi-endian architectures.
00367 // This function is the only use of __TBB_BIG_ENDIAN.
00368 #if (__TBB_BIG_ENDIAN!=-1)
00369     #if ( __TBB_USE_GENERIC_PART_WORD_CAS)
00370         #error generic implementation of part-word CAS was explicitly disabled for this configuration
00371     #endif
00372 template<typename T>
00373 inline T __TBB_MaskedCompareAndSwap (volatile T * const ptr, const T value, const T comparand ) {
00374     struct endianness{ static bool is_big_endian(){
00375         #ifndef __TBB_BIG_ENDIAN
00376             const uint32_t probe = 0x03020100;
00377             return (((const char*)(&probe))[0]==0x03);
00378         #elif (__TBB_BIG_ENDIAN==0) || (__TBB_BIG_ENDIAN==1)
00379             return __TBB_BIG_ENDIAN;
00380         #else
00381             #error unexpected value of __TBB_BIG_ENDIAN
00382         #endif
00383     }};
00384 
00385     const uint32_t byte_offset            = (uint32_t) ((uintptr_t)ptr & 0x3);
00386     volatile uint32_t * const aligned_ptr = (uint32_t*)((uintptr_t)ptr - byte_offset );
00387 
00388     // location of T within uint32_t for a C++ shift operation
00389     const uint32_t bits_to_shift     = 8*(endianness::is_big_endian() ? (4 - sizeof(T) - (byte_offset)) : byte_offset);
00390     const uint32_t mask              = (((uint32_t)1<<(sizeof(T)*8)) - 1 )<<bits_to_shift;
00391     const uint32_t shifted_comparand = ((uint32_t)comparand << bits_to_shift)&mask;
00392     const uint32_t shifted_value     = ((uint32_t)value     << bits_to_shift)&mask;
00393 
00394     for(atomic_backoff b;;b.pause()) {
00395         const uint32_t surroundings  = *aligned_ptr & ~mask ; // reload the aligned_ptr value which might change during the pause
00396         const uint32_t big_comparand = surroundings | shifted_comparand ;
00397         const uint32_t big_value     = surroundings | shifted_value     ;
00398         // __TBB_machine_cmpswp4 presumed to have full fence.
00399         // Cast shuts up /Wp64 warning
00400         const uint32_t big_result = (uint32_t)__TBB_machine_cmpswp4( aligned_ptr, big_value, big_comparand );
00401         if( big_result == big_comparand                    // CAS succeeded
00402           || ((big_result ^ big_comparand) & mask) != 0)   // CAS failed and the bits of interest have changed
00403         {
00404             return T((big_result & mask) >> bits_to_shift);
00405         }
00406         else continue;                                     // CAS failed but the bits of interest left unchanged
00407     }
00408 }
00409 #endif
00410 template<size_t S, typename T>
00411 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand );
00412 
00413 template<>
00414 inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
00415 #if __TBB_USE_GENERIC_PART_WORD_CAS
00416     return __TBB_MaskedCompareAndSwap<uint8_t>((volatile uint8_t *)ptr,value,comparand);
00417 #else
00418     return __TBB_machine_cmpswp1(ptr,value,comparand);
00419 #endif
00420 }
00421 
00422 template<>
00423 inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) {
00424 #if __TBB_USE_GENERIC_PART_WORD_CAS
00425     return __TBB_MaskedCompareAndSwap<uint16_t>((volatile uint16_t *)ptr,value,comparand);
00426 #else
00427     return __TBB_machine_cmpswp2(ptr,value,comparand);
00428 #endif
00429 }
00430 
00431 template<>
00432 inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) {
00433     // Cast shuts up /Wp64 warning
00434     return (uint32_t)__TBB_machine_cmpswp4(ptr,value,comparand);
00435 }
00436 
00437 #if __TBB_64BIT_ATOMICS
00438 template<>
00439 inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) {
00440     return __TBB_machine_cmpswp8(ptr,value,comparand);
00441 }
00442 #endif
00443 
00444 template<size_t S, typename T>
00445 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
00446     atomic_backoff b;
00447     T result;
00448     for(;;) {
00449         result = *reinterpret_cast<volatile T *>(ptr);
00450         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00451         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result )
00452             break;
00453         b.pause();
00454     }
00455     return result;
00456 }
00457 
00458 template<size_t S, typename T>
00459 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
00460     atomic_backoff b;
00461     T result;
00462     for(;;) {
00463         result = *reinterpret_cast<volatile T *>(ptr);
00464         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00465         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result )
00466             break;
00467         b.pause();
00468     }
00469     return result;
00470 }
00471 
00472 #if __TBB_USE_GENERIC_PART_WORD_CAS
00473 #define __TBB_machine_cmpswp1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t>
00474 #define __TBB_machine_cmpswp2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t>
00475 #endif
00476 
00477 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_PART_WORD_FETCH_ADD
00478 #define __TBB_machine_fetchadd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t>
00479 #define __TBB_machine_fetchadd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t>
00480 #endif
00481 
00482 #if __TBB_USE_GENERIC_FETCH_ADD
00483 #define __TBB_machine_fetchadd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t>
00484 #endif
00485 
00486 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_ADD
00487 #define __TBB_machine_fetchadd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t>
00488 #endif
00489 
00490 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_PART_WORD_FETCH_STORE
00491 #define __TBB_machine_fetchstore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t>
00492 #define __TBB_machine_fetchstore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t>
00493 #endif
00494 
00495 #if __TBB_USE_GENERIC_FETCH_STORE
00496 #define __TBB_machine_fetchstore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t>
00497 #endif
00498 
00499 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_DWORD_FETCH_STORE
00500 #define __TBB_machine_fetchstore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t>
00501 #endif
00502 
00503 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
00504 #define __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(S)                                             \
00505     atomic_selector<S>::word atomic_selector<S>::fetch_store ( volatile void* location, word value ) {  \
00506         return __TBB_machine_fetchstore##S( location, value );                                          \
00507     }
00508 
00509 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(1)
00510 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(2)
00511 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(4)
00512 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(8)
00513 
00514 #undef __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE
00515 #endif /* __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
00516 
00517 #if __TBB_USE_GENERIC_DWORD_LOAD_STORE
00518 inline void __TBB_machine_store8 (volatile void *ptr, int64_t value) {
00519     for(;;) {
00520         int64_t result = *(int64_t *)ptr;
00521         if( __TBB_machine_cmpswp8(ptr,value,result)==result ) break;
00522     }
00523 }
00524 
00525 inline int64_t __TBB_machine_load8 (const volatile void *ptr) {
00526     // Comparand and new value may be anything, they only must be equal, and
00527     // the value should have a low probability to be actually found in 'location'.
00528     const int64_t anyvalue = 2305843009213693951LL;
00529     return __TBB_machine_cmpswp8(const_cast<volatile void *>(ptr),anyvalue,anyvalue);
00530 }
00531 #endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */
00532 
00533 #if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
00534 
00540 template <typename T, size_t S>
00541 struct machine_load_store {
00542     static T load_with_acquire ( const volatile T& location ) {
00543         T to_return = location;
00544         __TBB_acquire_consistency_helper();
00545         return to_return;
00546     }
00547     static void store_with_release ( volatile T &location, T value ) {
00548         __TBB_release_consistency_helper();
00549         location = value;
00550     }
00551 };
00552 
00553 //in general, plain load and store of 32bit compiler is not atomic for 64bit types
00554 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00555 template <typename T>
00556 struct machine_load_store<T,8> {
00557     static T load_with_acquire ( const volatile T& location ) {
00558         return (T)__TBB_machine_load8( (const volatile void*)&location );
00559     }
00560     static void store_with_release ( volatile T& location, T value ) {
00561         __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
00562     }
00563 };
00564 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
00565 #endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */
00566 
00567 #if __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE
00568 template <typename T, size_t S>
00569 struct machine_load_store_seq_cst {
00570     static T load ( const volatile T& location ) {
00571         __TBB_full_memory_fence();
00572         return machine_load_store<T,S>::load_with_acquire( location );
00573     }
00574 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
00575     static void store ( volatile T &location, T value ) {
00576         atomic_selector<S>::fetch_store( (volatile void*)&location, (typename atomic_selector<S>::word)value );
00577     }
00578 #else /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
00579     static void store ( volatile T &location, T value ) {
00580         machine_load_store<T,S>::store_with_release( location, value );
00581         __TBB_full_memory_fence();
00582     }
00583 #endif /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
00584 };
00585 
00586 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00587 
00589 template <typename T>
00590 struct machine_load_store_seq_cst<T,8> {
00591     static T load ( const volatile T& location ) {
00592         // Comparand and new value may be anything, they only must be equal, and
00593         // the value should have a low probability to be actually found in 'location'.
00594         const int64_t anyvalue = 2305843009213693951LL;
00595         return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T*>(&location), anyvalue, anyvalue );
00596     }
00597     static void store ( volatile T &location, T value ) {
00598         int64_t result = (volatile int64_t&)location;
00599         while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)value, result) != result )
00600             result = (volatile int64_t&)location;
00601     }
00602 };
00603 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
00604 #endif /*__TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE */
00605 
00606 #if __TBB_USE_GENERIC_RELAXED_LOAD_STORE
00607 // Relaxed operations add volatile qualifier to prevent compiler from optimizing them out.
00611 template <typename T, size_t S>
00612 struct machine_load_store_relaxed {
00613     static inline T load ( const volatile T& location ) {
00614         return location;
00615     }
00616     static inline void store ( volatile T& location, T value ) {
00617         location = value;
00618     }
00619 };
00620 
00621 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00622 template <typename T>
00623 struct machine_load_store_relaxed<T,8> {
00624     static inline T load ( const volatile T& location ) {
00625         return (T)__TBB_machine_load8( (const volatile void*)&location );
00626     }
00627     static inline void store ( volatile T& location, T value ) {
00628         __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
00629     }
00630 };
00631 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
00632 #endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */
00633 
00634 #undef __TBB_WORDSIZE //this macro is forbidden to use outside of atomic machinery
00635 
00636 template<typename T>
00637 inline T __TBB_load_with_acquire(const volatile T &location) {
00638     return machine_load_store<T,sizeof(T)>::load_with_acquire( location );
00639 }
00640 template<typename T, typename V>
00641 inline void __TBB_store_with_release(volatile T& location, V value) {
00642     machine_load_store<T,sizeof(T)>::store_with_release( location, T(value) );
00643 }
00645 inline void __TBB_store_with_release(volatile size_t& location, size_t value) {
00646     machine_load_store<size_t,sizeof(size_t)>::store_with_release( location, value );
00647 }
00648 
00649 template<typename T>
00650 inline T __TBB_load_full_fence(const volatile T &location) {
00651     return machine_load_store_seq_cst<T,sizeof(T)>::load( location );
00652 }
00653 template<typename T, typename V>
00654 inline void __TBB_store_full_fence(volatile T& location, V value) {
00655     machine_load_store_seq_cst<T,sizeof(T)>::store( location, T(value) );
00656 }
00658 inline void __TBB_store_full_fence(volatile size_t& location, size_t value) {
00659     machine_load_store_seq_cst<size_t,sizeof(size_t)>::store( location, value );
00660 }
00661 
00662 template<typename T>
00663 inline T __TBB_load_relaxed (const volatile T& location) {
00664     return machine_load_store_relaxed<T,sizeof(T)>::load( const_cast<T&>(location) );
00665 }
00666 template<typename T, typename V>
00667 inline void __TBB_store_relaxed ( volatile T& location, V value ) {
00668     machine_load_store_relaxed<T,sizeof(T)>::store( const_cast<T&>(location), T(value) );
00669 }
00671 inline void __TBB_store_relaxed ( volatile size_t& location, size_t value ) {
00672     machine_load_store_relaxed<size_t,sizeof(size_t)>::store( const_cast<size_t&>(location), value );
00673 }
00674 
00675 // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as
00676 // strict as type T.  The type should have a trivial default constructor and destructor, so that
00677 // arrays of that type can be declared without initializers.
00678 // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
00679 // to a type bigger than T.
00680 // The default definition here works on machines where integers are naturally aligned and the
00681 // strictest alignment is 64.
00682 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
00683 
00684 #if __TBB_ATTRIBUTE_ALIGNED_PRESENT
00685 
00686 #define __TBB_DefineTypeWithAlignment(PowerOf2)       \
00687 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
00688     uint32_t member[PowerOf2/sizeof(uint32_t)];       \
00689 } __attribute__((aligned(PowerOf2)));
00690 #define __TBB_alignof(T) __alignof__(T)
00691 
00692 #elif __TBB_DECLSPEC_ALIGN_PRESENT
00693 
00694 #define __TBB_DefineTypeWithAlignment(PowerOf2)       \
00695 __declspec(align(PowerOf2))                           \
00696 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
00697     uint32_t member[PowerOf2/sizeof(uint32_t)];       \
00698 };
00699 #define __TBB_alignof(T) __alignof(T)
00700 
00701 #else /* A compiler with unknown syntax for data alignment */
00702 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T)
00703 #endif
00704 
00705 /* Now declare types aligned to useful powers of two */
00706 // TODO: Is __TBB_DefineTypeWithAlignment(8) needed on 32 bit platforms?
00707 __TBB_DefineTypeWithAlignment(16)
00708 __TBB_DefineTypeWithAlignment(32)
00709 __TBB_DefineTypeWithAlignment(64)
00710 
00711 typedef __TBB_machine_type_with_alignment_64 __TBB_machine_type_with_strictest_alignment;
00712 
00713 // Primary template is a declaration of incomplete type so that it fails with unknown alignments
00714 template<size_t N> struct type_with_alignment;
00715 
00716 // Specializations for allowed alignments
00717 template<> struct type_with_alignment<1> { char member; };
00718 template<> struct type_with_alignment<2> { uint16_t member; };
00719 template<> struct type_with_alignment<4> { uint32_t member; };
00720 template<> struct type_with_alignment<8> { uint64_t member; };
00721 template<> struct type_with_alignment<16> {__TBB_machine_type_with_alignment_16 member; };
00722 template<> struct type_with_alignment<32> {__TBB_machine_type_with_alignment_32 member; };
00723 template<> struct type_with_alignment<64> {__TBB_machine_type_with_alignment_64 member; };
00724 
00725 #if __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN
00727 
00729 template<size_t Size, typename T>
00730 struct work_around_alignment_bug {
00731     static const size_t alignment = __TBB_alignof(T);
00732 };
00733 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
00734 #else
00735 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__TBB_alignof(T)>
00736 #endif  /* __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN */
00737 
00738 #endif  /* __TBB_TypeWithAlignmentAtLeastAsStrict */
00739 
00740 // Template class here is to avoid instantiation of the static data for modules that don't use it
00741 template<typename T>
00742 struct reverse {
00743     static const T byte_table[256];
00744 };
00745 // An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
00746 // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
00747 template<typename T>
00748 const T reverse<T>::byte_table[256] = {
00749     0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
00750     0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
00751     0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
00752     0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
00753     0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
00754     0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
00755     0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
00756     0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
00757     0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
00758     0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
00759     0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
00760     0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
00761     0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
00762     0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
00763     0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
00764     0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
00765 };
00766 
00767 } // namespace internal
00768 } // namespace tbb
00769 
00770 // Preserving access to legacy APIs
00771 using tbb::internal::__TBB_load_with_acquire;
00772 using tbb::internal::__TBB_store_with_release;
00773 
00774 // Mapping historically used names to the ones expected by atomic_load_store_traits
00775 #define __TBB_load_acquire  __TBB_load_with_acquire
00776 #define __TBB_store_release __TBB_store_with_release
00777 
00778 #ifndef __TBB_Log2
00779 inline intptr_t __TBB_Log2( uintptr_t x ) {
00780     if( x==0 ) return -1;
00781     intptr_t result = 0;
00782     uintptr_t tmp;
00783 
00784     if( sizeof(x)>4 && (tmp = ((uint64_t)x)>>32)) { x=tmp; result += 32; }
00785     if( (tmp = x>>16) ) { x=tmp; result += 16; }
00786     if( (tmp = x>>8) )  { x=tmp; result += 8; }
00787     if( (tmp = x>>4) )  { x=tmp; result += 4; }
00788     if( (tmp = x>>2) )  { x=tmp; result += 2; }
00789     return (x&2)? result+1: result;
00790 }
00791 #endif
00792 
00793 #ifndef __TBB_AtomicOR
00794 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
00795     tbb::internal::atomic_backoff b;
00796     for(;;) {
00797         uintptr_t tmp = *(volatile uintptr_t *)operand;
00798         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
00799         if( result==tmp ) break;
00800         b.pause();
00801     }
00802 }
00803 #endif
00804 
00805 #ifndef __TBB_AtomicAND
00806 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
00807     tbb::internal::atomic_backoff b;
00808     for(;;) {
00809         uintptr_t tmp = *(volatile uintptr_t *)operand;
00810         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
00811         if( result==tmp ) break;
00812         b.pause();
00813     }
00814 }
00815 #endif
00816 
00817 #ifndef __TBB_Flag
00818 typedef unsigned char __TBB_Flag;
00819 #endif
00820 typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag;
00821 
00822 #ifndef __TBB_TryLockByte
00823 inline bool __TBB_TryLockByte( __TBB_atomic_flag &flag ) {
00824     return __TBB_machine_cmpswp1(&flag,1,0)==0;
00825 }
00826 #endif
00827 
00828 #ifndef __TBB_LockByte
00829 inline __TBB_Flag __TBB_LockByte( __TBB_atomic_flag& flag ) {
00830     if ( !__TBB_TryLockByte(flag) ) {
00831         tbb::internal::atomic_backoff b;
00832         do {
00833             b.pause();
00834         } while ( !__TBB_TryLockByte(flag) );
00835     }
00836     return 0;
00837 }
00838 #endif
00839 
00840 #ifndef  __TBB_UnlockByte
00841 #define __TBB_UnlockByte __TBB_store_with_release
00842 #endif
00843 
00844 #ifndef __TBB_ReverseByte
00845 inline unsigned char __TBB_ReverseByte(unsigned char src) {
00846     return tbb::internal::reverse<unsigned char>::byte_table[src];
00847 }
00848 #endif
00849 
00850 template<typename T>
00851 T __TBB_ReverseBits(T src) {
00852     T dst;
00853     unsigned char *original = (unsigned char *) &src;
00854     unsigned char *reversed = (unsigned char *) &dst;
00855 
00856     for( int i = sizeof(T)-1; i >= 0; i-- )
00857         reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
00858 
00859     return dst;
00860 }
00861 
00862 #endif /* __TBB_machine_H */

Copyright © 2005-2012 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.