blitz Version 0.10
blitz/tuning.h
Go to the documentation of this file.
00001 // -*- C++ -*-
00002 /***************************************************************************
00003  * blitz/tuning.h      Platform-specific code tuning
00004  *
00005  * $Id: tuning.h,v 1.5 2011/03/25 22:41:16 julianc Exp $
00006  *
00007  * Copyright (C) 1997-2011 Todd Veldhuizen <tveldhui@acm.org>
00008  *
00009  * This file is a part of Blitz.
00010  *
00011  * Blitz is free software: you can redistribute it and/or modify 
00012  * it under the terms of the GNU Lesser General Public License
00013  * as published by the Free Software Foundation, either version 3
00014  * of the License, or (at your option) any later version.
00015  *
00016  * Blitz is distributed in the hope that it will be useful,
00017  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019  * GNU Lesser General Public License for more details.
00020  *
00021  * You should have received a copy of the GNU Lesser General Public 
00022  * License along with Blitz.  If not, see <http://www.gnu.org/licenses/>.
00023  * 
00024  * Suggestions:          blitz-devel@lists.sourceforge.net
00025  * Bugs:                 blitz-support@lists.sourceforge.net    
00026  *
00027  * For more information, please see the Blitz++ Home Page:
00028  *    https://sourceforge.net/projects/blitz/
00029  *
00030  ***************************************************************************/
00031 
00032 #ifndef BZ_TUNING_H
00033 #define BZ_TUNING_H
00034 
00035 // These estimates should be conservative (i.e. underestimate the
00036 // cache sizes).
00037 #define BZ_L1_CACHE_ESTIMATED_SIZE    8192
00038 #define BZ_L2_CACHE_ESTIMATED_SIZE    65536
00039 
00040 
00041 #undef  BZ_PARTIAL_LOOP_UNROLL
00042 #define BZ_PASS_EXPR_BY_VALUE
00043 #undef  BZ_PTR_INC_FASTER_THAN_INDIRECTION
00044 #define BZ_MANUAL_VECEXPR_COPY_CONSTRUCTOR
00045 #undef  BZ_KCC_COPY_PROPAGATION_KLUDGE
00046 #undef  BZ_ALTERNATE_FORWARD_BACKWARD_TRAVERSALS
00047 #undef  BZ_ARRAY_EXPR_PASS_INDEX_BY_VALUE
00048 #define BZ_INLINE_GROUP1
00049 #define BZ_INLINE_GROUP2
00050 #define BZ_COLLAPSE_LOOPS
00051 #define BZ_USE_FAST_READ_ARRAY_EXPR
00052 #define BZ_ARRAY_EXPR_USE_COMMON_STRIDE
00053 #undef  BZ_ARRAY_SPACE_FILLING_TRAVERSAL
00054 #undef  BZ_ARRAY_FAST_TRAVERSAL_UNROLL
00055 #undef  BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS
00056 #undef  BZ_ARRAY_STACK_TRAVERSAL_UNROLL
00057 #define BZ_ARRAY_2D_STENCIL_TILING
00058 #define BZ_ARRAY_2D_STENCIL_TILE_SIZE       128
00059 #undef  BZ_INTERLACE_ARRAYS
00060 #undef  BZ_ALIGN_BLOCKS_ON_CACHELINE_BOUNDARY
00061 #define BZ_FAST_COMPILE
00062 
00063 
00064 #ifndef BZ_DISABLE_NEW_ET
00065  #define BZ_NEW_EXPRESSION_TEMPLATES
00066 #endif
00067 
00068 #ifdef BZ_FAST_COMPILE
00069 #define BZ_ETPARMS_CONSTREF
00070 #define BZ_NO_INLINE_ET
00071 #endif
00072 
00073 /*
00074  * Platform-specific tuning
00075  */
00076 
00077 #ifdef _CRAYT3E
00078  // The backend compiler on the T3E does a better job of
00079  // loop unrolling.
00080  #undef BZ_PARTIAL_LOOP_UNROLL
00081  #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL
00082  #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL
00083 #endif
00084 
00085 #ifdef __GNUC__
00086  // The egcs compiler does a good job of loop unrolling, if
00087  // -funroll-loops is used.
00088  #undef BZ_PARTIAL_LOOP_UNROLL
00089  #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL
00090  #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL
00091 #endif
00092 
00093 #ifdef  BZ_DISABLE_KCC_COPY_PROPAGATION_KLUDGE
00094  #undef BZ_KCC_COPY_PROPAGATION_KLUDGE
00095 #endif
00096 
00097 #ifdef  BZ_INLINE_GROUP1
00098  #define _bz_inline1 inline
00099 #else
00100  #define _bz_inline1
00101 #endif
00102 
00103 #ifdef  BZ_INLINE_GROUP2
00104  #define _bz_inline2 inline
00105 #else
00106  #define _bz_inline2
00107 #endif
00108 
00109 #ifdef  BZ_NO_INLINE_ET
00110  #define _bz_inline_et 
00111 #else
00112  #define _bz_inline_et inline
00113 #endif
00114 
00115 #ifdef  BZ_ETPARMS_CONSTREF
00116  #define BZ_ETPARM(X) const X&
00117 #else
00118  #define BZ_ETPARM(X) X
00119 #endif
00120 
00121 #ifdef __DECCXX
00122  // The DEC cxx compiler has problems with loop unrolling
00123  // because of aliasing.  Loop unrolling and anti-aliasing
00124  // is done by Blitz++.
00125 
00126   #define  BZ_PARTIAL_LOOP_UNROLL
00127   #define  BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS
00128   #define  BZ_ARRAY_STACK_TRAVERSAL_UNROLL
00129 #endif
00130 
00131 /*
00132  * BZ_NO_PROPAGATE(X) prevents the compiler from performing
00133  * copy propagation on a variable.  This is used for loop
00134  * unrolling to prevent KAI C++ from rearranging the
00135  * ordering of memory accesses.
00136  */
00137 
00138 #define BZ_NO_PROPAGATE(X)   X
00139 
00140 #ifdef __KCC
00141 #ifdef BZ_USE_NO_PROPAGATE
00142     extern "C" int __kai_apply(const char*, ...);
00143 
00144     #undef  BZ_NO_PROPAGATE(X)
00145     #define BZ_NO_PROPAGATE(X)  __kai_apply("(%a)",&X)
00146 #endif
00147 #endif
00148 
00149 #endif // BZ_TUNING_H
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines