BeBOP Optimized Sparse Kernel Interface Library
1.0.1h
|
00001 00036 /* $Id: cycle.h,v 1.4 2008/06/17 04:21:57 rvuduc Exp $ */ 00037 00038 /* machine-dependent cycle counters code. Needs to be inlined. */ 00039 00040 /***************************************************************************/ 00041 /* To use the cycle counters in your code, simply #include "cycle.h" (this 00042 file), and then use the functions/macros: 00043 00044 ticks getticks(void); 00045 00046 ticks is an opaque typedef defined below, representing the current time. 00047 You extract the elapsed time between two calls to gettick() via: 00048 00049 double elapsed(ticks t1, ticks t0); 00050 00051 which returns a double-precision variable in arbitrary units. You 00052 are not expected to convert this into human units like seconds; it 00053 is intended only for *comparisons* of time intervals. 00054 00055 (In order to use some of the OS-dependent timer routines like 00056 Solaris' gethrtime, you need to paste the autoconf snippet below 00057 into your configure.ac file and #include "config.h" before cycle.h, 00058 or define the relevant macros manually if you are not using autoconf.) 00059 */ 00060 00061 /***************************************************************************/ 00062 /* This file uses macros like HAVE_GETHRTIME that are assumed to be 00063 defined according to whether the corresponding function/type/header 00064 is available on your system. The necessary macros are most 00065 conveniently defined if you are using GNU autoconf, via the tests: 00066 00067 dnl --------------------------------------------------------------------- 00068 00069 AC_C_INLINE 00070 AC_HEADER_TIME 00071 AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h]) 00072 00073 AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H 00074 #include <sys/time.h> 00075 #endif]) 00076 00077 AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime]) 00078 00079 dnl Cray UNICOS _rtc() (real-time clock) intrinsic 00080 AC_MSG_CHECKING([for _rtc intrinsic]) 00081 rtc_ok=yes 00082 AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H 00083 #include <intrinsics.h> 00084 #endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no]) 00085 AC_MSG_RESULT($rtc_ok) 00086 00087 dnl --------------------------------------------------------------------- 00088 */ 00089 00090 /***************************************************************************/ 00091 00092 #if TIME_WITH_SYS_TIME 00093 # include <sys/time.h> 00094 # include <time.h> 00095 #else 00096 # if HAVE_SYS_TIME_H 00097 # include <sys/time.h> 00098 # else 00099 # include <time.h> 00100 # endif 00101 #endif 00102 00103 #define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \ 00104 { \ 00105 return (double)(t1 - t0); \ 00106 } 00107 00108 /*----------------------------------------------------------------*/ 00109 /* PAPI (Added by rich vuduc <richie@cs.berkeley.edu>, October 2004) */ 00110 #if defined(HAVE_PAPI) && !defined(HAVE_TICK_COUNTER) 00111 00113 #include <papi.h> 00114 00115 typedef long_long ticks; 00116 00117 #define getticks PAPI_get_real_cyc() 00118 00119 #define HAVE_TICK_COUNTER 00120 #define TIMER_DESC "PAPI Cycle Counter" 00121 #endif 00122 00123 /*----------------------------------------------------------------*/ 00124 /* Solaris */ 00125 #if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER) 00126 typedef hrtime_t ticks; 00127 00128 #define getticks gethrtime 00129 00130 INLINE_ELAPSED(inline) 00131 00132 #define HAVE_TICK_COUNTER 00133 #define TIMER_DESC "Solaris gethrtime()" 00134 #endif 00135 00136 /*----------------------------------------------------------------*/ 00137 /* AIX v. 4+ routines to read the real-time clock or time-base register */ 00138 #if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER) 00139 typedef timebasestruct_t ticks; 00140 00141 static inline ticks getticks(void) 00142 { 00143 ticks t; 00144 read_real_time(&t, TIMEBASE_SZ); 00145 return t; 00146 } 00147 00148 static inline double elapsed(ticks t1, ticks t0) /* time in nanoseconds */ 00149 { 00150 time_base_to_time(&t1, TIMEBASE_SZ); 00151 time_base_to_time(&t0, TIMEBASE_SZ); 00152 return ((t1.tb_high - t0.tb_high) * 1e9 + (t1.tb_low - t0.tb_low)); 00153 } 00154 00155 #define HAVE_TICK_COUNTER 00156 #define TIMER_DESC "AIX read_real_time()" 00157 #endif 00158 00159 /*----------------------------------------------------------------*/ 00160 /* 00161 * PowerPC ``cycle'' counter using the time base register. 00162 */ 00163 #if ((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh))) && !defined(HAVE_TICK_COUNTER) 00164 typedef unsigned long long ticks; 00165 00166 static __inline__ ticks getticks(void) 00167 { 00168 unsigned int tbl, tbu0, tbu1; 00169 00170 do { 00171 __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0)); 00172 __asm__ __volatile__ ("mftb %0" : "=r"(tbl)); 00173 __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1)); 00174 } while (tbu0 != tbu1); 00175 00176 return (((unsigned long long)tbu0) << 32) | tbl; 00177 } 00178 00179 INLINE_ELAPSED(__inline__) 00180 00181 #define HAVE_TICK_COUNTER 00182 #define TIMER_DESC "PowerPC time base register" 00183 #endif 00184 /*----------------------------------------------------------------*/ 00185 /* 00186 * Pentium cycle counter 00187 */ 00188 #if (defined(__GNUC__) || defined(__ICC) || defined(__PGI)) && (defined(__i386__) || defined(__i386) || defined(__x86_64) || defined(__amd64__)) && !defined(HAVE_TICK_COUNTER) 00189 typedef unsigned long long ticks; 00190 00191 static __inline__ ticks getticks(void) 00192 { 00193 ticks ret; 00194 00195 __asm__ __volatile__("rdtsc": "=A" (ret)); 00196 /* no input, nothing else clobbered */ 00197 return ret; 00198 } 00199 00200 INLINE_ELAPSED(__inline__) 00201 00202 #define HAVE_TICK_COUNTER 00203 #define TIMER_DESC "Intel x86 RDTSC cycle counter (gcc and Intel compilers)" 00204 #endif 00205 00206 /* Visual C++ -- thanks to Morten Nissov for his help with this */ 00207 #if _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER) 00208 #include <windows.h> 00209 typedef LARGE_INTEGER ticks; 00210 #define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */ 00211 00212 static __inline ticks getticks(void) 00213 { 00214 ticks ret; 00215 00216 __asm { 00217 RDTSC 00218 mov ret.HighPart, edx 00219 mov ret.LowPart, eax 00220 } 00221 return ret; 00222 } 00223 00224 static __inline double elapsed(ticks t1, ticks t0) 00225 { 00226 return (double)(t1.QuadPart - t0.QuadPart); 00227 } 00228 00229 #define HAVE_TICK_COUNTER 00230 #define TIMER_DESC "Intel x86 RDTSC cycle counter (MSC/Win32 inline assembly)" 00231 #endif 00232 00233 /*----------------------------------------------------------------*/ 00234 /* 00235 * X86-64 cycle counter 00236 */ 00237 #if defined(__GNUC__) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) 00238 typedef unsigned long long ticks; 00239 00240 static __inline__ ticks getticks(void) 00241 { 00242 unsigned a, d; 00243 __asm volatile("rdtsc" : "=a" (a), "=d" (d)); 00244 return ((ticks)a) | (((ticks)d) << 32); 00245 } 00246 00247 INLINE_ELAPSED(__inline__) 00248 00249 #define HAVE_TICK_COUNTER 00250 #define TIMER_DESC "Intel x86-64 cycle counter" 00251 #endif 00252 00253 /* Visual C++ (FIXME: how to detect compilation for x86-64?) */ 00254 #if _MSC_VER >= 1400 && !defined(HAVE_TICK_COUNTER) 00255 typedef ULONG64 ticks; 00256 00257 #define getticks __rdtsc 00258 00259 INLINE_ELAPSED(__inline) 00260 00261 #define HAVE_TICK_COUNTER 00262 #define TIMER_DESC "Intel x86-64 cycle counter (MSC/Win32 inline assembly)" 00263 #endif 00264 00265 /*----------------------------------------------------------------*/ 00266 /* 00267 * IA64 cycle counter 00268 */ 00269 /* intel's ecc compiler */ 00270 #if defined(__ECC) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) 00271 typedef unsigned long ticks; 00272 #include <ia64intrin.h> 00273 00274 static __inline__ ticks getticks(void) 00275 { 00276 return __getReg(_IA64_REG_AR_ITC); 00277 } 00278 00279 INLINE_ELAPSED(__inline__) 00280 00281 #define HAVE_TICK_COUNTER 00282 #define TIMER_DESC "Intel IA-64 cycle counter (Intel compiler)" 00283 #endif 00284 00285 #if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) 00286 typedef unsigned long ticks; 00287 00288 static __inline__ ticks getticks(void) 00289 { 00290 ticks ret; 00291 00292 __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret)); 00293 return ret; 00294 } 00295 00296 INLINE_ELAPSED(__inline__) 00297 00298 #define HAVE_TICK_COUNTER 00299 #define TIMER_DESC "Intel IA-64 cycle counter (gcc)" 00300 #endif 00301 00302 /* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */ 00303 #if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER) 00304 #include <machine/sys/inline.h> 00305 typedef unsigned long ticks; 00306 00307 static inline ticks getticks(void) 00308 { 00309 ticks ret; 00310 00311 ret = _Asm_mov_from_ar (_AREG_ITC); 00312 return ret; 00313 } 00314 00315 INLINE_ELAPSED(inline) 00316 00317 #define HAVE_TICK_COUNTER 00318 #define TIMER_DESC "Intel IA-64 cycle counter (HP compiler)" 00319 #endif 00320 00321 /*----------------------------------------------------------------*/ 00322 /* 00323 * PA-RISC cycle counter 00324 */ 00325 #if defined(__hppa__) || defined(__hppa) && !defined(HAVE_TICK_COUNTER) 00326 typedef unsigned long ticks; 00327 00328 # ifdef __GNUC__ 00329 static __inline__ ticks getticks(void) 00330 { 00331 ticks ret; 00332 00333 __asm__ __volatile__("mfctl 16, %0": "=r" (ret)); 00334 /* no input, nothing else clobbered */ 00335 return ret; 00336 } 00337 00338 #define TIMER_DESC "PA-RISC cycle counter (gcc)" 00339 00340 # else 00341 # include <machine/inline.h> 00342 static inline unsigned long getticks(void) 00343 { 00344 register ticks ret; 00345 _MFCTL(16, ret); 00346 return ret; 00347 } 00348 00349 #define TIMER_DESC "PA-RISC cycle counter (HP compiler?)" 00350 00351 # endif 00352 00353 INLINE_ELAPSED(inline) 00354 00355 #define HAVE_TICK_COUNTER 00356 #endif 00357 00358 /*----------------------------------------------------------------*/ 00359 /* S390, courtesy of James Treacy */ 00360 #if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER) 00361 typedef unsigned long long ticks; 00362 00363 static __inline__ ticks getticks(void) 00364 { 00365 ticks cycles; 00366 __asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory", "cc"); 00367 return cycles; 00368 } 00369 00370 INLINE_ELAPSED(__inline__) 00371 00372 #define HAVE_TICK_COUNTER 00373 #define TIMER_DESC "IBM S390 cycle counter" 00374 #endif 00375 /*----------------------------------------------------------------*/ 00376 #if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER) 00377 /* 00378 * The 32-bit cycle counter on alpha overflows pretty quickly, 00379 * unfortunately. A 1GHz machine overflows in 4 seconds. 00380 */ 00381 typedef unsigned int ticks; 00382 00383 static __inline__ ticks getticks(void) 00384 { 00385 unsigned long cc; 00386 __asm__ __volatile__ ("rpcc %0" : "=r"(cc)); 00387 return (cc & 0xFFFFFFFF); 00388 } 00389 00390 INLINE_ELAPSED(__inline__) 00391 00392 #define HAVE_TICK_COUNTER 00393 #define TIMER_DESC "HP/Compaq/DEC Alpha cycle counter (32-bit)" 00394 #endif 00395 00396 /*----------------------------------------------------------------*/ 00397 #if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER) 00398 typedef unsigned long ticks; 00399 00400 static __inline__ ticks getticks(void) 00401 { 00402 ticks ret; 00403 __asm__("rd %%tick, %0" : "=r" (ret)); 00404 return ret; 00405 } 00406 00407 INLINE_ELAPSED(__inline__) 00408 00409 #define HAVE_TICK_COUNTER 00410 #define TIMER_DESC "Sun SPARC v9 cycle counter" 00411 #endif 00412 00413 /*----------------------------------------------------------------*/ 00414 #if defined(__DECC) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER) 00415 # include <c_asm.h> 00416 typedef unsigned int ticks; 00417 00418 static __inline ticks getticks(void) 00419 { 00420 unsigned long cc; 00421 cc = asm("rpcc %v0"); 00422 return (cc & 0xFFFFFFFF); 00423 } 00424 00425 INLINE_ELAPSED(__inline) 00426 00427 #define HAVE_TICK_COUNTER 00428 #define TIMER_DESC "DEC Alpha cycle counter" 00429 #endif 00430 /*----------------------------------------------------------------*/ 00431 /* SGI/Irix */ 00432 #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER) 00433 typedef struct timespec ticks; 00434 00435 static inline ticks getticks(void) 00436 { 00437 struct timespec t; 00438 clock_gettime(CLOCK_SGI_CYCLE, &t); 00439 return t; 00440 } 00441 00442 static inline double elapsed(ticks t1, ticks t0) 00443 { 00444 return (double)(t1.tv_sec - t0.tv_sec) * 1.0E9 + 00445 (double)(t1.tv_nsec - t0.tv_nsec); 00446 } 00447 #define HAVE_TICK_COUNTER 00448 #define TIMER_DESC "SGI/Irix cycle counter" 00449 #endif 00450 00451 /*----------------------------------------------------------------*/ 00452 /* Cray UNICOS _rtc() intrinsic function */ 00453 #if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER) 00454 #ifdef HAVE_INTRINSICS_H 00455 # include <intrinsics.h> 00456 #endif 00457 00458 typedef long long ticks; 00459 00460 #define getticks _rtc 00461 00462 INLINE_ELAPSED(inline) 00463 00464 #define HAVE_TICK_COUNTER 00465 #define TIMER_DESC "Cray UNICOS _rtc() intrinsic read-counter routine" 00466 #endif 00467 00468 /* eof */