numpy 2.0.0
src/multiarray/nditer_impl.h
Go to the documentation of this file.
00001 /*
00002  * This is a PRIVATE INTERNAL NumPy header, intended to be used *ONLY*
00003  * by the iterator implementation code. All other internal NumPy code
00004  * should use the exposed iterator API.
00005  */
00006 #ifndef NPY_ITERATOR_IMPLEMENTATION_CODE
00007 #error "This header is intended for use ONLY by iterator implementation code."
00008 #endif
00009 
00010 #ifndef _NPY_PRIVATE__NDITER_IMPL_H_
00011 #define _NPY_PRIVATE__NDITER_IMPL_H_
00012 
00013 #define PY_SSIZE_T_CLEAN
00014 #include "Python.h"
00015 #include "structmember.h"
00016 
00017 #define NPY_NO_DEPRECATED_API
00018 #define _MULTIARRAYMODULE
00019 #include <numpy/ndarrayobject.h>
00020 #include <numpy/npy_3kcompat.h>
00021 #include "convert_datatype.h"
00022 
00023 #include "lowlevel_strided_loops.h"
00024 
00025 /********** ITERATOR CONSTRUCTION TIMING **************/
00026 #define NPY_IT_CONSTRUCTION_TIMING 0
00027 
00028 #if NPY_IT_CONSTRUCTION_TIMING
00029 #define NPY_IT_TIME_POINT(var) { \
00030             unsigned int hi, lo; \
00031             __asm__ __volatile__ ( \
00032                 "rdtsc" \
00033                 : "=d" (hi), "=a" (lo)); \
00034             var = (((unsigned long long)hi) << 32) | lo; \
00035         }
00036 #define NPY_IT_PRINT_TIME_START(var) { \
00037             printf("%30s: start\n", #var); \
00038             c_temp = var; \
00039         }
00040 #define NPY_IT_PRINT_TIME_VAR(var) { \
00041             printf("%30s: %6.0f clocks\n", #var, \
00042                     ((double)(var-c_temp))); \
00043             c_temp = var; \
00044         }
00045 #else
00046 #define NPY_IT_TIME_POINT(var)
00047 #endif
00048 
00049 /******************************************************/
00050 
00051 /********** PRINTF DEBUG TRACING **************/
00052 #define NPY_IT_DBG_TRACING 0
00053 
00054 #if NPY_IT_DBG_TRACING
00055 #define NPY_IT_DBG_PRINT(s) printf("%s", s)
00056 #define NPY_IT_DBG_PRINT1(s, p1) printf(s, p1)
00057 #define NPY_IT_DBG_PRINT2(s, p1, p2) printf(s, p1, p2)
00058 #define NPY_IT_DBG_PRINT3(s, p1, p2, p3) printf(s, p1, p2, p3)
00059 #else
00060 #define NPY_IT_DBG_PRINT(s)
00061 #define NPY_IT_DBG_PRINT1(s, p1)
00062 #define NPY_IT_DBG_PRINT2(s, p1, p2)
00063 #define NPY_IT_DBG_PRINT3(s, p1, p2, p3)
00064 #endif
00065 /**********************************************/
00066 
00067 /* Rounds up a number of bytes to be divisible by sizeof intp */
00068 #if NPY_SIZEOF_INTP == 4
00069 #define NPY_INTP_ALIGNED(size) ((size + 0x3)&(-0x4))
00070 #else
00071 #define NPY_INTP_ALIGNED(size) ((size + 0x7)&(-0x8))
00072 #endif
00073 
00074 /* Internal iterator flags */
00075 
00076 /* The perm is the identity */
00077 #define NPY_ITFLAG_IDENTPERM    0x0001
00078 /* The perm has negative entries (indicating flipped axes) */
00079 #define NPY_ITFLAG_NEGPERM      0x0002
00080 /* The iterator is tracking an index */
00081 #define NPY_ITFLAG_HASINDEX     0x0004
00082 /* The iterator is tracking a multi-index */
00083 #define NPY_ITFLAG_HASMULTIINDEX    0x0008
00084 /* The iteration order was forced on construction */
00085 #define NPY_ITFLAG_FORCEDORDER  0x0010
00086 /* The inner loop is handled outside the iterator */
00087 #define NPY_ITFLAG_EXLOOP      0x0020
00088 /* The iterator is ranged */
00089 #define NPY_ITFLAG_RANGE        0x0040
00090 /* The iterator is buffered */
00091 #define NPY_ITFLAG_BUFFER       0x0080
00092 /* The iterator should grow the buffered inner loop when possible */
00093 #define NPY_ITFLAG_GROWINNER    0x0100
00094 /* There is just one iteration, can specialize iternext for that */
00095 #define NPY_ITFLAG_ONEITERATION 0x0200
00096 /* Delay buffer allocation until first Reset* call */
00097 #define NPY_ITFLAG_DELAYBUF     0x0400
00098 /* Iteration needs API access during iternext */
00099 #define NPY_ITFLAG_NEEDSAPI     0x0800
00100 /* Iteration includes one or more operands being reduced */
00101 #define NPY_ITFLAG_REDUCE       0x1000
00102 /* Reduce iteration doesn't need to recalculate reduce loops next time */
00103 #define NPY_ITFLAG_REUSE_REDUCE_LOOPS 0x2000
00104 
00105 /* Internal iterator per-operand iterator flags */
00106 
00107 /* The operand will be written to */
00108 #define NPY_OP_ITFLAG_WRITE        0x01
00109 /* The operand will be read from */
00110 #define NPY_OP_ITFLAG_READ         0x02
00111 /* The operand needs type conversion/byte swapping/alignment */
00112 #define NPY_OP_ITFLAG_CAST         0x04
00113 /* The operand never needs buffering */
00114 #define NPY_OP_ITFLAG_BUFNEVER     0x08
00115 /* The operand is aligned */
00116 #define NPY_OP_ITFLAG_ALIGNED      0x10
00117 /* The operand is being reduced */
00118 #define NPY_OP_ITFLAG_REDUCE       0x20
00119 /* The operand is for temporary use, does not have a backing array */
00120 #define NPY_OP_ITFLAG_VIRTUAL      0x40
00121 /* The operand requires masking when copying buffer -> array */
00122 #define NPY_OP_ITFLAG_WRITEMASKED  0x80
00123 
00124 /*
00125  * The data layout of the iterator is fully specified by
00126  * a triple (itflags, ndim, nop).  These three variables
00127  * are expected to exist in all functions calling these macros,
00128  * either as true variables initialized to the correct values
00129  * from the iterator, or as constants in the case of specialized
00130  * functions such as the various iternext functions.
00131  */
00132 
00133 struct NpyIter_InternalOnly {
00134     /* Initial fixed position data */
00135     npy_uint32 itflags;
00136     npy_uint8 ndim, nop;
00137     npy_int8 maskop;
00138     npy_uint8 unused_padding;
00139     npy_intp itersize, iterstart, iterend;
00140     /* iterindex is only used if RANGED or BUFFERED is set */
00141     npy_intp iterindex;
00142     /* The rest is variable */
00143     char iter_flexdata;
00144 };
00145 
00146 typedef struct NpyIter_AD NpyIter_AxisData;
00147 typedef struct NpyIter_BD NpyIter_BufferData;
00148 
00149 /* Byte sizes of the iterator members */
00150 #define NIT_PERM_SIZEOF(itflags, ndim, nop) \
00151         NPY_INTP_ALIGNED(NPY_MAXDIMS)
00152 #define NIT_DTYPES_SIZEOF(itflags, ndim, nop) \
00153         ((NPY_SIZEOF_INTP)*(nop))
00154 #define NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop) \
00155         ((NPY_SIZEOF_INTP)*(nop+1))
00156 #define NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop) \
00157         ((NPY_SIZEOF_INTP)*(nop+1))
00158 #define NIT_OPERANDS_SIZEOF(itflags, ndim, nop) \
00159         ((NPY_SIZEOF_INTP)*(nop))
00160 #define NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop) \
00161         (NPY_INTP_ALIGNED(nop))
00162 #define NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop) \
00163         ((itflags&NPY_ITFLAG_BUFFER) ? ((NPY_SIZEOF_INTP)*(6 + 9*nop)) : 0)
00164 
00165 /* Byte offsets of the iterator members starting from iter->iter_flexdata */
00166 #define NIT_PERM_OFFSET() \
00167         (0)
00168 #define NIT_DTYPES_OFFSET(itflags, ndim, nop) \
00169         (NIT_PERM_OFFSET() + \
00170          NIT_PERM_SIZEOF(itflags, ndim, nop))
00171 #define NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) \
00172         (NIT_DTYPES_OFFSET(itflags, ndim, nop) + \
00173          NIT_DTYPES_SIZEOF(itflags, ndim, nop))
00174 #define NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) \
00175         (NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) + \
00176          NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop))
00177 #define NIT_OPERANDS_OFFSET(itflags, ndim, nop) \
00178         (NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) + \
00179          NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop))
00180 #define NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) \
00181         (NIT_OPERANDS_OFFSET(itflags, ndim, nop) + \
00182          NIT_OPERANDS_SIZEOF(itflags, ndim, nop))
00183 #define NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) \
00184         (NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) + \
00185          NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop))
00186 #define NIT_AXISDATA_OFFSET(itflags, ndim, nop) \
00187         (NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) + \
00188          NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop))
00189 
00190 /* Internal-only ITERATOR DATA MEMBER ACCESS */
00191 #define NIT_ITFLAGS(iter) \
00192         ((iter)->itflags)
00193 #define NIT_NDIM(iter) \
00194         ((iter)->ndim)
00195 #define NIT_NOP(iter) \
00196         ((iter)->nop)
00197 #define NIT_MASKOP(iter) \
00198         ((iter)->maskop)
00199 #define NIT_ITERSIZE(iter) \
00200         (iter->itersize)
00201 #define NIT_ITERSTART(iter) \
00202         (iter->iterstart)
00203 #define NIT_ITEREND(iter) \
00204         (iter->iterend)
00205 #define NIT_ITERINDEX(iter) \
00206         (iter->iterindex)
00207 #define NIT_PERM(iter)  ((npy_int8 *)( \
00208         &(iter)->iter_flexdata + NIT_PERM_OFFSET()))
00209 #define NIT_DTYPES(iter) ((PyArray_Descr **)( \
00210         &(iter)->iter_flexdata + NIT_DTYPES_OFFSET(itflags, ndim, nop)))
00211 #define NIT_RESETDATAPTR(iter) ((char **)( \
00212         &(iter)->iter_flexdata + NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop)))
00213 #define NIT_BASEOFFSETS(iter) ((npy_intp *)( \
00214         &(iter)->iter_flexdata + NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop)))
00215 #define NIT_OPERANDS(iter) ((PyArrayObject **)( \
00216         &(iter)->iter_flexdata + NIT_OPERANDS_OFFSET(itflags, ndim, nop)))
00217 #define NIT_OPITFLAGS(iter) ( \
00218         &(iter)->iter_flexdata + NIT_OPITFLAGS_OFFSET(itflags, ndim, nop))
00219 #define NIT_BUFFERDATA(iter) ((NpyIter_BufferData *)( \
00220         &(iter)->iter_flexdata + NIT_BUFFERDATA_OFFSET(itflags, ndim, nop)))
00221 #define NIT_AXISDATA(iter) ((NpyIter_AxisData *)( \
00222         &(iter)->iter_flexdata + NIT_AXISDATA_OFFSET(itflags, ndim, nop)))
00223 
00224 /* Internal-only BUFFERDATA MEMBER ACCESS */
00225 struct NpyIter_BD {
00226     npy_intp buffersize, size, bufiterend,
00227              reduce_pos, reduce_outersize, reduce_outerdim;
00228     npy_intp bd_flexdata;
00229 };
00230 #define NBF_BUFFERSIZE(bufferdata) ((bufferdata)->buffersize)
00231 #define NBF_SIZE(bufferdata) ((bufferdata)->size)
00232 #define NBF_BUFITEREND(bufferdata) ((bufferdata)->bufiterend)
00233 #define NBF_REDUCE_POS(bufferdata) ((bufferdata)->reduce_pos)
00234 #define NBF_REDUCE_OUTERSIZE(bufferdata) ((bufferdata)->reduce_outersize)
00235 #define NBF_REDUCE_OUTERDIM(bufferdata) ((bufferdata)->reduce_outerdim)
00236 #define NBF_STRIDES(bufferdata) ( \
00237         &(bufferdata)->bd_flexdata + 0)
00238 #define NBF_PTRS(bufferdata) ((char **) \
00239         (&(bufferdata)->bd_flexdata + 1*(nop)))
00240 #define NBF_REDUCE_OUTERSTRIDES(bufferdata) ( \
00241         (&(bufferdata)->bd_flexdata + 2*(nop)))
00242 #define NBF_REDUCE_OUTERPTRS(bufferdata) ((char **) \
00243         (&(bufferdata)->bd_flexdata + 3*(nop)))
00244 #define NBF_READTRANSFERFN(bufferdata) ((PyArray_StridedTransferFn **) \
00245         (&(bufferdata)->bd_flexdata + 4*(nop)))
00246 #define NBF_READTRANSFERDATA(bufferdata) ((NpyAuxData **) \
00247         (&(bufferdata)->bd_flexdata + 5*(nop)))
00248 #define NBF_WRITETRANSFERFN(bufferdata) ((PyArray_StridedTransferFn **) \
00249         (&(bufferdata)->bd_flexdata + 6*(nop)))
00250 #define NBF_WRITETRANSFERDATA(bufferdata) ((NpyAuxData **) \
00251         (&(bufferdata)->bd_flexdata + 7*(nop)))
00252 #define NBF_BUFFERS(bufferdata) ((char **) \
00253         (&(bufferdata)->bd_flexdata + 8*(nop)))
00254 
00255 /* Internal-only AXISDATA MEMBER ACCESS. */
00256 struct NpyIter_AD {
00257     npy_intp shape, index;
00258     npy_intp ad_flexdata;
00259 };
00260 #define NAD_SHAPE(axisdata) ((axisdata)->shape)
00261 #define NAD_INDEX(axisdata) ((axisdata)->index)
00262 #define NAD_STRIDES(axisdata) ( \
00263         &(axisdata)->ad_flexdata + 0)
00264 #define NAD_PTRS(axisdata) ((char **) \
00265         &(axisdata)->ad_flexdata + 1*(nop+1))
00266 
00267 #define NAD_NSTRIDES() \
00268         ((nop) + ((itflags&NPY_ITFLAG_HASINDEX) ? 1 : 0))
00269 
00270 /* Size of one AXISDATA struct within the iterator */
00271 #define NIT_AXISDATA_SIZEOF(itflags, ndim, nop) (( \
00272         /* intp shape */ \
00273         1 + \
00274         /* intp index */ \
00275         1 + \
00276         /* intp stride[nop+1] AND char* ptr[nop+1] */ \
00277         2*((nop)+1) \
00278         )*NPY_SIZEOF_INTP )
00279 
00280 /*
00281  * Macro to advance an AXISDATA pointer by a specified count.
00282  * Requires that sizeof_axisdata be previously initialized
00283  * to NIT_AXISDATA_SIZEOF(itflags, ndim, nop).
00284  */
00285 #define NIT_INDEX_AXISDATA(axisdata, index) ((NpyIter_AxisData *) \
00286         (((char *)(axisdata)) + (index)*sizeof_axisdata))
00287 #define NIT_ADVANCE_AXISDATA(axisdata, count) \
00288         axisdata = NIT_INDEX_AXISDATA(axisdata, count)
00289 
00290 /* Size of the whole iterator */
00291 #define NIT_SIZEOF_ITERATOR(itflags, ndim, nop) ( \
00292         sizeof(struct NpyIter_InternalOnly) + \
00293         NIT_AXISDATA_OFFSET(itflags, ndim, nop) + \
00294         NIT_AXISDATA_SIZEOF(itflags, ndim, nop)*(ndim))
00295 
00296 /* Internal helper functions shared between implementation files */
00297 NPY_NO_EXPORT void
00298 npyiter_coalesce_axes(NpyIter *iter);
00299 NPY_NO_EXPORT int
00300 npyiter_allocate_buffers(NpyIter *iter, char **errmsg);
00301 NPY_NO_EXPORT void
00302 npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex);
00303 NPY_NO_EXPORT void
00304 npyiter_copy_from_buffers(NpyIter *iter);
00305 NPY_NO_EXPORT void
00306 npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs);
00307 
00308 
00309 #endif