19 #if BT_USE_OPENMP && BT_THREADSAFE
25 #if BT_USE_PPL && BT_THREADSAFE
34 #if BT_USE_TBB && BT_THREADSAFE
37 #define __TBB_NO_IMPLICIT_LINKAGE 1
39 #include <tbb/task_scheduler_init.h>
40 #include <tbb/parallel_for.h>
41 #include <tbb/blocked_range.h>
53 #if __cplusplus >= 201103L
57 #define USE_CPP11_ATOMICS 1
59 #elif defined(_MSC_VER)
62 #define USE_MSVC_INTRINSICS 1
64 #elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
68 #define USE_GCC_BUILTIN_ATOMICS 1
70 #elif defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
73 #define USE_GCC_BUILTIN_ATOMICS_OLD 1
82 #define THREAD_LOCAL_STATIC thread_local static
86 std::atomic<int>* aDest =
reinterpret_cast<std::atomic<int>*
>(&mLock);
88 return std::atomic_compare_exchange_weak_explicit(aDest, &expected,
int(1), std::memory_order_acq_rel, std::memory_order_acquire);
102 std::atomic<int>* aDest =
reinterpret_cast<std::atomic<int>*
>(&mLock);
103 std::atomic_store_explicit(aDest,
int(0), std::memory_order_release);
106 #elif USE_MSVC_INTRINSICS
108 #define WIN32_LEAN_AND_MEAN
113 #define THREAD_LOCAL_STATIC __declspec(thread) static
117 volatile long* aDest =
reinterpret_cast<long*
>(&mLock);
118 return (0 == _InterlockedCompareExchange(aDest, 1, 0));
132 volatile long* aDest =
reinterpret_cast<long*
>(&mLock);
133 _InterlockedExchange(aDest, 0);
136 #elif USE_GCC_BUILTIN_ATOMICS
138 #define THREAD_LOCAL_STATIC static __thread
144 const int memOrderSuccess = __ATOMIC_ACQ_REL;
145 const int memOrderFail = __ATOMIC_ACQUIRE;
146 return __atomic_compare_exchange_n(&mLock, &expected,
int(1), weak, memOrderSuccess, memOrderFail);
160 __atomic_store_n(&mLock,
int(0), __ATOMIC_RELEASE);
163 #elif USE_GCC_BUILTIN_ATOMICS_OLD
165 #define THREAD_LOCAL_STATIC static __thread
169 return __sync_bool_compare_and_swap(&mLock,
int(0),
int(1));
184 __sync_fetch_and_and(&mLock,
int(0));
189 #error "no threading primitives defined -- unknown platform"
198 btAssert(!
"unimplemented btSpinMutex::lock() called");
203 btAssert(!
"unimplemented btSpinMutex::unlock() called");
208 btAssert(!
"unimplemented btSpinMutex::tryLock() called");
212 #define THREAD_LOCAL_STATIC static
234 btAssert(!
"thread counter exceeded");
274 #define BT_DETECT_BAD_THREAD_INDEX 0
276 #if BT_DETECT_BAD_THREAD_INDEX
278 typedef DWORD ThreadId_t;
279 const static ThreadId_t kInvalidThreadId = 0;
282 static ThreadId_t getDebugThreadId()
284 return GetCurrentThreadId();
292 const unsigned int kNullIndex = ~0
U;
294 if (sThreadIndex == kNullIndex)
299 #if BT_DETECT_BAD_THREAD_INDEX
302 ThreadId_t tid = getDebugThreadId();
304 if (gDebugThreadIds[sThreadIndex] == kInvalidThreadId)
307 gDebugThreadIds[sThreadIndex] = tid;
311 if (gDebugThreadIds[sThreadIndex] != tid)
315 btAssert(!
"there are 2 or more threads with the same thread-index!");
391 btAssert(!
"btSetTaskScheduler must be called from the main thread!");
416 #if BT_DETECT_BAD_THREAD_INDEX
422 gDebugThreadIds[i] = kInvalidThreadId;
433 btAssert(!
"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE");
443 #if BT_DETECT_BAD_THREAD_INDEX
449 gDebugThreadIds[i] = kInvalidThreadId;
460 btAssert(!
"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE");
461 return body.
sumLoop(iBegin, iEnd);
480 body.forLoop(iBegin, iEnd);
485 return body.sumLoop(iBegin, iEnd);
489 #if BT_USE_OPENMP && BT_THREADSAFE
504 return omp_get_max_threads();
516 omp_set_num_threads(1);
517 omp_set_num_threads(m_numThreads);
518 m_savedThreadCounter = 0;
528 #pragma omp parallel for schedule(static, 1)
529 for (
int i = iBegin; i < iEnd; i += grainSize)
532 body.forLoop(i, (
std::min)(i + grainSize, iEnd));
541 #pragma omp parallel for schedule(static, 1) reduction(+ \
543 for (
int i = iBegin; i < iEnd; i += grainSize)
546 sum += body.sumLoop(i, (
std::min)(i + grainSize, iEnd));
554 #if BT_USE_TBB && BT_THREADSAFE
561 tbb::task_scheduler_init* m_tbbSchedulerInit;
567 m_tbbSchedulerInit =
NULL;
569 ~btTaskSchedulerTBB()
571 if (m_tbbSchedulerInit)
573 delete m_tbbSchedulerInit;
574 m_tbbSchedulerInit =
NULL;
580 return tbb::task_scheduler_init::default_num_threads();
589 if (m_tbbSchedulerInit)
592 delete m_tbbSchedulerInit;
593 m_tbbSchedulerInit =
NULL;
595 m_tbbSchedulerInit =
new tbb::task_scheduler_init(m_numThreads);
596 m_savedThreadCounter = 0;
602 struct ForBodyAdapter
607 void operator()(
const tbb::blocked_range<int>& range)
const
610 mBody->
forLoop(range.begin(), range.end());
616 ForBodyAdapter tbbBody(&body);
620 tbb::simple_partitioner());
623 struct SumBodyAdapter
630 void join(
const SumBodyAdapter&
src) { mSum +=
src.mSum; }
631 void operator()(
const tbb::blocked_range<int>& range)
634 mSum += mBody->
sumLoop(range.begin(), range.end());
640 SumBodyAdapter tbbBody(&body);
642 tbb::parallel_deterministic_reduce(tbb::blocked_range<int>(iBegin, iEnd, grainSize), tbbBody);
649 #if BT_USE_PPL && BT_THREADSAFE
656 concurrency::combinable<btScalar> m_sum;
664 return concurrency::GetProcessorCount();
675 using namespace concurrency;
676 if (CurrentScheduler::Id() != -1)
678 CurrentScheduler::Detach();
680 SchedulerPolicy policy;
684 policy.SetConcurrencyLimits(1, 1);
685 CurrentScheduler::Create(policy);
686 CurrentScheduler::Detach();
688 policy.SetConcurrencyLimits(m_numThreads, m_numThreads);
689 CurrentScheduler::Create(policy);
690 m_savedThreadCounter = 0;
696 struct ForBodyAdapter
702 ForBodyAdapter(
const btIParallelForBody* body,
int grainSize,
int end) : mBody(body), mGrainSize(grainSize), mIndexEnd(end) {}
713 ForBodyAdapter pplBody(&body, grainSize, iEnd);
722 struct SumBodyAdapter
725 concurrency::combinable<btScalar>* mSum;
729 SumBodyAdapter(
const btIParallelSumBody* body, concurrency::combinable<btScalar>*
sum,
int grainSize,
int end) : mBody(body), mSum(
sum), mGrainSize(grainSize), mIndexEnd(end) {}
733 mSum->local() += mBody->
sumLoop(i, (
std::min)(i + mGrainSize, mIndexEnd));
741 SumBodyAdapter pplBody(&body, &m_sum, grainSize, iEnd);
749 return m_sum.combine(sumFunc);
758 return &sTaskScheduler;
764 #if BT_USE_OPENMP && BT_THREADSAFE
765 static btTaskSchedulerOpenMP sTaskScheduler;
766 return &sTaskScheduler;
775 #if BT_USE_TBB && BT_THREADSAFE
776 static btTaskSchedulerTBB sTaskScheduler;
777 return &sTaskScheduler;
786 #if BT_USE_PPL && BT_THREADSAFE
787 static btTaskSchedulerPPL sTaskScheduler;
788 return &sTaskScheduler;
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
btSequentialImpulseConstraintSolverMt int btPersistentManifold int btTypedConstraint int const btContactSolverInfo btIDebugDraw *debugDrawer BT_OVERRIDE
static T sum(const btAlignedObjectArray< T > &items)
void btPopThreadsAreRunning()
btITaskScheduler * btGetSequentialTaskScheduler()
void btResetThreadIndexCounter()
static btITaskScheduler * gBtTaskScheduler
btScalar btParallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody &body)
static btSpinMutex gThreadsRunningCounterMutex
btITaskScheduler * btGetTaskScheduler()
bool btThreadsAreRunning()
void btPushThreadsAreRunning()
static int gThreadsRunningCounter
btITaskScheduler * btGetTBBTaskScheduler()
unsigned int btGetCurrentThreadIndex()
btITaskScheduler * btGetOpenMPTaskScheduler()
static ThreadsafeCounter gThreadCounter
void btSetTaskScheduler(btITaskScheduler *ts)
btITaskScheduler * btGetPPLTaskScheduler()
#define THREAD_LOCAL_STATIC
void btParallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body)
const unsigned int BT_MAX_THREAD_COUNT
virtual void forLoop(int iBegin, int iEnd) const =0
virtual btScalar sumLoop(int iBegin, int iEnd) const =0
btITaskScheduler(const char *name)
virtual int getNumThreads() const =0
unsigned int m_savedThreadCounter
virtual int getMaxNumThreads() const =0
virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body)=0
virtual void deactivate()
virtual void setNumThreads(int numThreads)=0
virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody &body)=0
virtual void setNumThreads(int numThreads) BT_OVERRIDE
virtual int getMaxNumThreads() const BT_OVERRIDE
virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody &body) BT_OVERRIDE
virtual int getNumThreads() const BT_OVERRIDE
btTaskSchedulerSequential()
virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body) BT_OVERRIDE
SyclQueue void void * src
void split(const std::string &s, const char delim, std::vector< std::string > &tokens)
void parallel_for(IndexRange range, int64_t grain_size, const Function &function)
static const pxr::TfToken b("b", pxr::TfToken::Immortal)