blitz Version 0.10
|
00001 // -*- C++ -*- 00002 /*************************************************************************** 00003 * blitz/tuning.h Platform-specific code tuning 00004 * 00005 * $Id: tuning.h,v 1.5 2011/03/25 22:41:16 julianc Exp $ 00006 * 00007 * Copyright (C) 1997-2011 Todd Veldhuizen <tveldhui@acm.org> 00008 * 00009 * This file is a part of Blitz. 00010 * 00011 * Blitz is free software: you can redistribute it and/or modify 00012 * it under the terms of the GNU Lesser General Public License 00013 * as published by the Free Software Foundation, either version 3 00014 * of the License, or (at your option) any later version. 00015 * 00016 * Blitz is distributed in the hope that it will be useful, 00017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00019 * GNU Lesser General Public License for more details. 00020 * 00021 * You should have received a copy of the GNU Lesser General Public 00022 * License along with Blitz. If not, see <http://www.gnu.org/licenses/>. 00023 * 00024 * Suggestions: blitz-devel@lists.sourceforge.net 00025 * Bugs: blitz-support@lists.sourceforge.net 00026 * 00027 * For more information, please see the Blitz++ Home Page: 00028 * https://sourceforge.net/projects/blitz/ 00029 * 00030 ***************************************************************************/ 00031 00032 #ifndef BZ_TUNING_H 00033 #define BZ_TUNING_H 00034 00035 // These estimates should be conservative (i.e. underestimate the 00036 // cache sizes). 00037 #define BZ_L1_CACHE_ESTIMATED_SIZE 8192 00038 #define BZ_L2_CACHE_ESTIMATED_SIZE 65536 00039 00040 00041 #undef BZ_PARTIAL_LOOP_UNROLL 00042 #define BZ_PASS_EXPR_BY_VALUE 00043 #undef BZ_PTR_INC_FASTER_THAN_INDIRECTION 00044 #define BZ_MANUAL_VECEXPR_COPY_CONSTRUCTOR 00045 #undef BZ_KCC_COPY_PROPAGATION_KLUDGE 00046 #undef BZ_ALTERNATE_FORWARD_BACKWARD_TRAVERSALS 00047 #undef BZ_ARRAY_EXPR_PASS_INDEX_BY_VALUE 00048 #define BZ_INLINE_GROUP1 00049 #define BZ_INLINE_GROUP2 00050 #define BZ_COLLAPSE_LOOPS 00051 #define BZ_USE_FAST_READ_ARRAY_EXPR 00052 #define BZ_ARRAY_EXPR_USE_COMMON_STRIDE 00053 #undef BZ_ARRAY_SPACE_FILLING_TRAVERSAL 00054 #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL 00055 #undef BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS 00056 #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL 00057 #define BZ_ARRAY_2D_STENCIL_TILING 00058 #define BZ_ARRAY_2D_STENCIL_TILE_SIZE 128 00059 #undef BZ_INTERLACE_ARRAYS 00060 #undef BZ_ALIGN_BLOCKS_ON_CACHELINE_BOUNDARY 00061 #define BZ_FAST_COMPILE 00062 00063 00064 #ifndef BZ_DISABLE_NEW_ET 00065 #define BZ_NEW_EXPRESSION_TEMPLATES 00066 #endif 00067 00068 #ifdef BZ_FAST_COMPILE 00069 #define BZ_ETPARMS_CONSTREF 00070 #define BZ_NO_INLINE_ET 00071 #endif 00072 00073 /* 00074 * Platform-specific tuning 00075 */ 00076 00077 #ifdef _CRAYT3E 00078 // The backend compiler on the T3E does a better job of 00079 // loop unrolling. 00080 #undef BZ_PARTIAL_LOOP_UNROLL 00081 #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL 00082 #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL 00083 #endif 00084 00085 #ifdef __GNUC__ 00086 // The egcs compiler does a good job of loop unrolling, if 00087 // -funroll-loops is used. 00088 #undef BZ_PARTIAL_LOOP_UNROLL 00089 #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL 00090 #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL 00091 #endif 00092 00093 #ifdef BZ_DISABLE_KCC_COPY_PROPAGATION_KLUDGE 00094 #undef BZ_KCC_COPY_PROPAGATION_KLUDGE 00095 #endif 00096 00097 #ifdef BZ_INLINE_GROUP1 00098 #define _bz_inline1 inline 00099 #else 00100 #define _bz_inline1 00101 #endif 00102 00103 #ifdef BZ_INLINE_GROUP2 00104 #define _bz_inline2 inline 00105 #else 00106 #define _bz_inline2 00107 #endif 00108 00109 #ifdef BZ_NO_INLINE_ET 00110 #define _bz_inline_et 00111 #else 00112 #define _bz_inline_et inline 00113 #endif 00114 00115 #ifdef BZ_ETPARMS_CONSTREF 00116 #define BZ_ETPARM(X) const X& 00117 #else 00118 #define BZ_ETPARM(X) X 00119 #endif 00120 00121 #ifdef __DECCXX 00122 // The DEC cxx compiler has problems with loop unrolling 00123 // because of aliasing. Loop unrolling and anti-aliasing 00124 // is done by Blitz++. 00125 00126 #define BZ_PARTIAL_LOOP_UNROLL 00127 #define BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS 00128 #define BZ_ARRAY_STACK_TRAVERSAL_UNROLL 00129 #endif 00130 00131 /* 00132 * BZ_NO_PROPAGATE(X) prevents the compiler from performing 00133 * copy propagation on a variable. This is used for loop 00134 * unrolling to prevent KAI C++ from rearranging the 00135 * ordering of memory accesses. 00136 */ 00137 00138 #define BZ_NO_PROPAGATE(X) X 00139 00140 #ifdef __KCC 00141 #ifdef BZ_USE_NO_PROPAGATE 00142 extern "C" int __kai_apply(const char*, ...); 00143 00144 #undef BZ_NO_PROPAGATE(X) 00145 #define BZ_NO_PROPAGATE(X) __kai_apply("(%a)",&X) 00146 #endif 00147 #endif 00148 00149 #endif // BZ_TUNING_H