numpy 2.0.0
src/multiarray/lowlevel_strided_loops.c.src File Reference
#include "Python.h"
#include "structmember.h"
#include <numpy/ndarrayobject.h>
#include <numpy/npy_cpu.h>
#include <numpy/halffloat.h>
#include "lowlevel_strided_loops.h"

Defines

#define PY_SSIZE_T_CLEAN
#define NPY_NO_DEPRECATED_API
#define _MULTIARRAYMODULE
#define NPY_USE_UNALIGNED_ACCESS   0
#define _NPY_NOP1(x)   (x)
#define _NPY_NOP2(x)   (x)
#define _NPY_NOP4(x)   (x)
#define _NPY_NOP8(x)   (x)
#define _NPY_SWAP2(x)
#define _NPY_SWAP4(x)
#define _NPY_SWAP_PAIR4(x)
#define _NPY_SWAP8(x)
#define _NPY_SWAP_PAIR8(x)
#define _NPY_SWAP_INPLACE2(x)
#define _NPY_SWAP_INPLACE4(x)
#define _NPY_SWAP_INPLACE8(x)
#define _NPY_SWAP_INPLACE16(x)

Functions

static void _strided_to_strided (char *dst, npy_intp dst_stride, char *src, npy_intp src_stride, npy_intp N, npy_intp src_itemsize, NpyAuxData *NPY_UNUSED(data))
static void _swap_strided_to_strided (char *dst, npy_intp dst_stride, char *src, npy_intp src_stride, npy_intp N, npy_intp src_itemsize, NpyAuxData *NPY_UNUSED(data))
static void _swap_pair_strided_to_strided (char *dst, npy_intp dst_stride, char *src, npy_intp src_stride, npy_intp N, npy_intp src_itemsize, NpyAuxData *NPY_UNUSED(data))
static void _contig_to_contig (char *dst, npy_intp NPY_UNUSED(dst_stride), char *src, npy_intp NPY_UNUSED(src_stride), npy_intp N, npy_intp src_itemsize, NpyAuxData *NPY_UNUSED(data))
NPY_NO_EXPORT
PyArray_StridedTransferFn
PyArray_GetStridedCopyFn (int aligned, npy_intp src_stride, npy_intp dst_stride, npy_intp itemsize)
NPY_NO_EXPORT
PyArray_StridedTransferFn
function (int aligned, npy_intp src_stride, npy_intp dst_stride, npy_intp itemsize)
NPY_NO_EXPORT
PyArray_StridedTransferFn
PyArray_GetStridedNumericCastFn (int aligned, npy_intp src_stride, npy_intp dst_stride, int src_type_num, int dst_type_num)
NPY_NO_EXPORT npy_intp PyArray_TransferNDimToStrided (npy_intp ndim, char *dst, npy_intp dst_stride, char *src, npy_intp *src_strides, npy_intp src_strides_inc, npy_intp *coords, npy_intp coords_inc, npy_intp *shape, npy_intp shape_inc, npy_intp count, npy_intp src_itemsize, PyArray_StridedTransferFn *stransfer, NpyAuxData *data)
NPY_NO_EXPORT npy_intp PyArray_TransferStridedToNDim (npy_intp ndim, char *dst, npy_intp *dst_strides, npy_intp dst_strides_inc, char *src, npy_intp src_stride, npy_intp *coords, npy_intp coords_inc, npy_intp *shape, npy_intp shape_inc, npy_intp count, npy_intp src_itemsize, PyArray_StridedTransferFn *stransfer, NpyAuxData *data)
NPY_NO_EXPORT npy_intp PyArray_TransferMaskedStridedToNDim (npy_intp ndim, char *dst, npy_intp *dst_strides, npy_intp dst_strides_inc, char *src, npy_intp src_stride, npy_uint8 *mask, npy_intp mask_stride, npy_intp *coords, npy_intp coords_inc, npy_intp *shape, npy_intp shape_inc, npy_intp count, npy_intp src_itemsize, PyArray_MaskedStridedTransferFn *stransfer, NpyAuxData *data)

Define Documentation

#define _MULTIARRAYMODULE
#define _NPY_NOP1 (   x)    (x)
#define _NPY_NOP2 (   x)    (x)
#define _NPY_NOP4 (   x)    (x)
#define _NPY_NOP8 (   x)    (x)
#define _NPY_SWAP2 (   x)
Value:
(((((npy_uint16)x)&0xffu) << 8) | \
                       (((npy_uint16)x) >> 8))
#define _NPY_SWAP4 (   x)
Value:
(((((npy_uint32)x)&0xffu) << 24) | \
                       ((((npy_uint32)x)&0xff00u) << 8) | \
                       ((((npy_uint32)x)&0xff0000u) >> 8) | \
                       (((npy_uint32)x) >> 24))
#define _NPY_SWAP8 (   x)
Value:
(((((npy_uint64)x)&0xffULL) << 56) | \
                       ((((npy_uint64)x)&0xff00ULL) << 40) | \
                       ((((npy_uint64)x)&0xff0000ULL) << 24) | \
                       ((((npy_uint64)x)&0xff000000ULL) << 8) | \
                       ((((npy_uint64)x)&0xff00000000ULL) >> 8) | \
                       ((((npy_uint64)x)&0xff0000000000ULL) >> 24) | \
                       ((((npy_uint64)x)&0xff000000000000ULL) >> 40) | \
                       (((npy_uint64)x) >> 56))
#define _NPY_SWAP_INPLACE16 (   x)
Value:
{ \
        char a = (x)[0]; (x)[0] = (x)[15]; (x)[15] = a; \
        a = (x)[1]; (x)[1] = (x)[14]; (x)[14] = a; \
        a = (x)[2]; (x)[2] = (x)[13]; (x)[13] = a; \
        a = (x)[3]; (x)[3] = (x)[12]; (x)[12] = a; \
        a = (x)[4]; (x)[4] = (x)[11]; (x)[11] = a; \
        a = (x)[5]; (x)[5] = (x)[10]; (x)[10] = a; \
        a = (x)[6]; (x)[6] = (x)[9]; (x)[9] = a; \
        a = (x)[7]; (x)[7] = (x)[8]; (x)[8] = a; \
        }
#define _NPY_SWAP_INPLACE2 (   x)
Value:
{ \
        char a = (x)[0]; (x)[0] = (x)[1]; (x)[1] = a; \
        }
#define _NPY_SWAP_INPLACE4 (   x)
Value:
{ \
        char a = (x)[0]; (x)[0] = (x)[3]; (x)[3] = a; \
        a = (x)[1]; (x)[1] = (x)[2]; (x)[2] = a; \
        }
#define _NPY_SWAP_INPLACE8 (   x)
Value:
{ \
        char a = (x)[0]; (x)[0] = (x)[7]; (x)[7] = a; \
        a = (x)[1]; (x)[1] = (x)[6]; (x)[6] = a; \
        a = (x)[2]; (x)[2] = (x)[5]; (x)[5] = a; \
        a = (x)[3]; (x)[3] = (x)[4]; (x)[4] = a; \
        }
#define _NPY_SWAP_PAIR4 (   x)
Value:
(((((npy_uint32)x)&0xffu) << 8) | \
                       ((((npy_uint32)x)&0xff00u) >> 8) | \
                       ((((npy_uint32)x)&0xff0000u) << 8) | \
                       ((((npy_uint32)x)&0xff000000u) >> 8))
#define _NPY_SWAP_PAIR8 (   x)
Value:
(((((npy_uint64)x)&0xffULL) << 24) | \
                       ((((npy_uint64)x)&0xff00ULL) << 8) | \
                       ((((npy_uint64)x)&0xff0000ULL) >> 8) | \
                       ((((npy_uint64)x)&0xff000000ULL) >> 24) | \
                       ((((npy_uint64)x)&0xff00000000ULL) << 24) | \
                       ((((npy_uint64)x)&0xff0000000000ULL) << 8) | \
                       ((((npy_uint64)x)&0xff000000000000ULL) >> 8) | \
                       ((((npy_uint64)x)&0xff00000000000000ULL) >> 24))
#define NPY_NO_DEPRECATED_API
#define NPY_USE_UNALIGNED_ACCESS   0
x86 platform may work with unaligned access, except when the compiler uses aligned SSE instructions, which gcc does in some cases. This is disabled for the time being.
#define PY_SSIZE_T_CLEAN

Function Documentation

static void _contig_to_contig ( char *  dst,
npy_intp   NPY_UNUSEDdst_stride,
char *  src,
npy_intp   NPY_UNUSEDsrc_stride,
npy_intp  N,
npy_intp  src_itemsize,
NpyAuxData NPY_UNUSEDdata 
) [static]
static void _strided_to_strided ( char *  dst,
npy_intp  dst_stride,
char *  src,
npy_intp  src_stride,
npy_intp  N,
npy_intp  src_itemsize,
NpyAuxData NPY_UNUSEDdata 
) [static]
******* STRIDED COPYING/SWAPPING SPECIALIZED FUNCTIONS ********
begin repeat
#elsize = 1, 2, 4, 8, 16# #elsize_half = 0, 1, 2, 4, 8# type = npy_uint8, npy_uint16, npy_uint32, npy_uint64, npy_uint128#
begin repeat1
oper = strided_to_strided, strided_to_contig,
contig_to_strided, contig_to_contig#
System Message: WARNING/2 (<string>, line 4) Definition list ends without a blank line; unexpected unindent.

#src_contig = 0, 0, 1 ,1# #dst_contig = 0, 1, 0 ,1#

begin repeat2
#swap = _NPY_NOP, _NPY_NOP, _NPY_SWAP_INPLACE, _NPY_SWAP,
_NPY_SWAP_INPLACE, _NPY_SWAP_PAIR#
System Message: WARNING/2 (<string>, line 4) Definition list ends without a blank line; unexpected unindent.

#prefix = , _aligned, _swap, _aligned_swap, _swap_pair, _aligned_swap_pair# #is_aligned = 0, 1, 0, 1, 0, 1# #minelsize = 1, 1, 2, 2, 4, 4# #is_swap = 0, 0, 1, 1, 2, 2#

<

&#64;elsize&#64; >= &#64;minelsize&#64;
end repeat2*
end repeat1*
end repeat*

References c.

static void _swap_pair_strided_to_strided ( char *  dst,
npy_intp  dst_stride,
char *  src,
npy_intp  src_stride,
npy_intp  N,
npy_intp  src_itemsize,
NpyAuxData NPY_UNUSEDdata 
) [static]

general in-place swap
general in-place swap

static void _swap_strided_to_strided ( char *  dst,
npy_intp  dst_stride,
char *  src,
npy_intp  src_stride,
npy_intp  N,
npy_intp  src_itemsize,
NpyAuxData NPY_UNUSEDdata 
) [static]

general in-place swap

NPY_NO_EXPORT PyArray_StridedTransferFn* function ( int  aligned,
npy_intp  src_stride,
npy_intp  dst_stride,
npy_intp  itemsize 
)
PyArray_GetStridedCopySwapFn and PyArray_GetStridedCopySwapPairFn are nearly identical, so can do a repeat for them.
begin repeat
function = PyArray_GetStridedCopySwapFn, PyArray_GetStridedCopySwapPairFn# #tag = , _pair# #not_pair = 1, 0#

Skip the "unaligned" versions on CPUs which support unaligned memory accesses.

<

!NPY_USE_UNALIGNED_ACCESS
contiguous dst
constant src
begin repeat1
#elsize = 2, 4, 8, 16#
end repeat1*
contiguous src
begin repeat1
#elsize = 2, 4, 8, 16#
end repeat1*
general src
begin repeat1
#elsize = 2, 4, 8, 16#
end repeat1*
general dst
constant src
begin repeat1
#elsize = 2, 4, 8, 16#
end repeat1*
contiguous src
begin repeat1
#elsize = 2, 4, 8, 16#
end repeat1*
begin repeat1
#elsize = 2, 4, 8, 16#
end repeat1*
contiguous dst
contiguous src
begin repeat1
#elsize = 2, 4, 8, 16#
end repeat1*
general src
begin repeat1
#elsize = 2, 4, 8, 16#
end repeat1*
general dst
contiguous src
begin repeat1
#elsize = 2, 4, 8, 16#
end repeat1*
general src
begin repeat1
#elsize = 2, 4, 8, 16#
end repeat1*

Referenced by ufunc_frompyfunc().

NPY_NO_EXPORT PyArray_StridedTransferFn* PyArray_GetStridedCopyFn ( int  aligned,
npy_intp  src_stride,
npy_intp  dst_stride,
npy_intp  itemsize 
)
Gives back a function pointer to a specialized function for copying strided memory. Returns NULL if there is a problem with the inputs.

aligned:
Should be 1 if the src and dst pointers are always aligned, 0 otherwise.
src_stride:
Should be the src stride if it will always be the same, NPY_MAX_INTP otherwise.
dst_stride:
Should be the dst stride if it will always be the same, NPY_MAX_INTP otherwise.
itemsize:
Should be the item size if it will always be the same, 0 otherwise.

Skip the "unaligned" versions on CPUs which support unaligned memory accesses.

<

!NPY_USE_UNALIGNED_ACCESS
contiguous dst
constant src
begin repeat
#elsize = 1, 2, 4, 8, 16#
end repeat*
contiguous src
general src
begin repeat
#elsize = 1, 2, 4, 8, 16#
end repeat*
general dst
constant src
begin repeat
#elsize = 1, 2, 4, 8, 16#
end repeat*
contiguous src
begin repeat
#elsize = 1, 2, 4, 8, 16#
end repeat*
begin repeat
#elsize = 1, 2, 4, 8, 16#
end repeat*
contiguous dst
contiguous src
general src
begin repeat
#elsize = 2, 4, 8, 16#
end repeat*
general dst
contiguous src
begin repeat
#elsize = 2, 4, 8, 16#
end repeat*
general src
begin repeat
#elsize = 2, 4, 8, 16#
end repeat*

Referenced by _null_to_strided_set_bool_one().

NPY_NO_EXPORT PyArray_StridedTransferFn* PyArray_GetStridedNumericCastFn ( int  aligned,
npy_intp  src_stride,
npy_intp  dst_stride,
int  src_type_num,
int  dst_type_num 
)
end repeat*
******* STRIDED CASTING SPECIALIZED FUNCTIONS ********
begin repeat <blockquote>

#NAME1 = BOOL,
UBYTE, USHORT, UINT, ULONG, ULONGLONG, BYTE, SHORT, INT, LONG, LONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, CLONGDOUBLE#
#name1 = bool,
ubyte, ushort, uint, ulong, ulonglong, byte, short, int, long, longlong, half, float, double, longdouble, cfloat, cdouble, clongdouble#
#rname1 = bool,
ubyte, ushort, uint, ulong, ulonglong, byte, short, int, long, longlong, half, float, double, longdouble, float, double, longdouble#
System Message: WARNING/2 (<string>, line 18) Definition list ends without a blank line; unexpected unindent.
#is_bool1 = 1, 0*17# #is_half1 = 0*11, 1, 0*6# #is_float1 = 0*12, 1, 0, 0, 1, 0, 0# #is_double1 = 0*13, 1, 0, 0, 1, 0# #is_complex1 = 0*15, 1*3# </blockquote>
begin repeat1 <blockquote>

#NAME2 = BOOL,
UBYTE, USHORT, UINT, ULONG, ULONGLONG, BYTE, SHORT, INT, LONG, LONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, CLONGDOUBLE#
#name2 = bool,
ubyte, ushort, uint, ulong, ulonglong, byte, short, int, long, longlong, half, float, double, longdouble, cfloat, cdouble, clongdouble#
#rname2 = bool,
ubyte, ushort, uint, ulong, ulonglong, byte, short, int, long, longlong, half, float, double, longdouble, float, double, longdouble#
System Message: WARNING/2 (<string>, line 18) Definition list ends without a blank line; unexpected unindent.
#is_bool2 = 1, 0*17# #is_half2 = 0*11, 1, 0*6# #is_float2 = 0*12, 1, 0, 0, 1, 0, 0# #is_double2 = 0*13, 1, 0, 0, 1, 0# #is_complex2 = 0*15, 1*3# </blockquote>
begin repeat2
#prefix = _aligned,,_aligned_contig,_contig# #aligned = 1,0,1,0# #contig = 0,0,1,1#
end repeat2*
end repeat1*
end repeat*

begin repeat <blockquote>

#NAME1 = BOOL,
UBYTE, USHORT, UINT, ULONG, ULONGLONG, BYTE, SHORT, INT, LONG, LONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, CLONGDOUBLE#
#name1 = bool,
ubyte, ushort, uint, ulong, ulonglong, byte, short, int, long, longlong, half, float, double, longdouble, cfloat, cdouble, clongdouble#

</blockquote>

printf("test fn d - second dn", NPY_&#64;NAME1&#64;, dst_type_num);
begin repeat1 <blockquote>

#NAME2 = BOOL,
UBYTE, USHORT, UINT, ULONG, ULONGLONG, BYTE, SHORT, INT, LONG, LONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, CLONGDOUBLE#
#name2 = bool,
ubyte, ushort, uint, ulong, ulonglong, byte, short, int, long, longlong, half, float, double, longdouble, cfloat, cdouble, clongdouble#

</blockquote>

printf("ret fn d dn", NPY_&#64;NAME1&#64;, NPY_&#64;NAME2&#64;);
end repeat1*
printf("switched test fn d - second dn", NPY_&#64;NAME1&#64;, dst_type_num);
end repeat*

NPY_NO_EXPORT npy_intp PyArray_TransferMaskedStridedToNDim ( npy_intp  ndim,
char *  dst,
npy_intp dst_strides,
npy_intp  dst_strides_inc,
char *  src,
npy_intp  src_stride,
npy_uint8 *  mask,
npy_intp  mask_stride,
npy_intp coords,
npy_intp  coords_inc,
npy_intp shape,
npy_intp  shape_inc,
npy_intp  count,
npy_intp  src_itemsize,
PyArray_MaskedStridedTransferFn stransfer,
NpyAuxData data 
)
See documentation of arguments in lowlevel_strided_loops.h

Finish off dimension 0
If it's 1-dimensional, there's no more to copy
Adjust the src and dst pointers
Finish off dimension 1
If it's 2-dimensional, there's no more to copy
General-case loop for everything else
Iteration structure for dimensions 2 and up
Copy the coordinates and shape
Adjust the dst pointer from the dimension 0 and 1 loop
Increment to the next coordinate
If the last dimension rolled over, we're done
A loop for dimensions 0 and 1

Referenced by npyiter_coalesce_axes().

NPY_NO_EXPORT npy_intp PyArray_TransferNDimToStrided ( npy_intp  ndim,
char *  dst,
npy_intp  dst_stride,
char *  src,
npy_intp src_strides,
npy_intp  src_strides_inc,
npy_intp coords,
npy_intp  coords_inc,
npy_intp shape,
npy_intp  shape_inc,
npy_intp  count,
npy_intp  src_itemsize,
PyArray_StridedTransferFn stransfer,
NpyAuxData data 
)
************ PRIMITIVE FLAT TO/FROM NDIM FUNCTIONS *************
See documentation of arguments in lowlevel_strided_loops.h

Finish off dimension 0
If it's 1-dimensional, there's no more to copy
Adjust the src and dst pointers
Finish off dimension 1
If it's 2-dimensional, there's no more to copy
General-case loop for everything else
Iteration structure for dimensions 2 and up
Copy the coordinates and shape
Adjust the src pointer from the dimension 0 and 1 loop
Increment to the next coordinate
If the last dimension rolled over, we're done
A loop for dimensions 0 and 1

NPY_NO_EXPORT npy_intp PyArray_TransferStridedToNDim ( npy_intp  ndim,
char *  dst,
npy_intp dst_strides,
npy_intp  dst_strides_inc,
char *  src,
npy_intp  src_stride,
npy_intp coords,
npy_intp  coords_inc,
npy_intp shape,
npy_intp  shape_inc,
npy_intp  count,
npy_intp  src_itemsize,
PyArray_StridedTransferFn stransfer,
NpyAuxData data 
)
See documentation of arguments in lowlevel_strided_loops.h

Finish off dimension 0
If it's 1-dimensional, there's no more to copy
Adjust the src and dst pointers
Finish off dimension 1
If it's 2-dimensional, there's no more to copy
General-case loop for everything else
Iteration structure for dimensions 2 and up
Copy the coordinates and shape
Adjust the dst pointer from the dimension 0 and 1 loop
Increment to the next coordinate
If the last dimension rolled over, we're done
A loop for dimensions 0 and 1