BeBOP Optimized Sparse Kernel Interface Library
1.0.1h
|
Conversion between CSR and VBR format. More...
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <oski/config.h>
#include <oski/common.h>
#include <oski/modloader.h>
#include <oski/matrix.h>
#include <oski/CSR/format.h>
#include <oski/VBR/format.h>
#include <oski/VBR/module.h>
#include <oski/xforms_internal.h>
Defines | |
#define | MAX(a, b) ((a) > (b) ? (a) : (b)) |
Return max of two side-effect free values. | |
Typedefs | |
typedef unsigned char | flag_t |
Flag type (takes the values 0 or 1) | |
Functions | |
static int | ExpandSymm (const oski_matCSR_t *mat, const oski_matcommon_t *props, oski_matCSR_t **p_mat_full) |
static int | TransposeSymm (const oski_matCSR_t *mat, const oski_matcommon_t *props, oski_matCSR_t **p_mat_trans) |
static void | DestroyCSR (oski_matCSR_t *mat) |
static void | SetFlags (oski_index_t n_x, const oski_index_t *restrict x, flag_t value, oski_index_t n, flag_t *restrict f) |
Scatters a specified value to all elements of a dense flag vector ![]() ![]() | |
static oski_index_t | CountUnique (oski_index_t n_x, const oski_index_t *restrict x, oski_index_t n, flag_t *restrict workspace) |
Counts the number of unique structurally non-zero elements in a sparse vector ![]() | |
static oski_index_t | CountNumCommonSpVec (oski_index_t i1, oski_index_t n1, const oski_index_t *restrict ind1, oski_index_t i2, oski_index_t n2, const oski_index_t *restrict ind2, oski_index_t b, int has_unit_diag, oski_index_t n, flag_t *restrict workspace) |
Returns the number of unique elements that two sparse vectors, ![]() ![]() | |
static oski_index_t | FindBlockPart (oski_index_t m, const oski_index_t *restrict ptr, const oski_index_t *restrict ind, oski_index_t b, int has_unit_diag, double threshold, oski_index_t *b_start, oski_index_t n_max, flag_t *workspace) |
Determine a partitioning of a CSR structure into groups of consecutive rows that have similar non-zero patterns. | |
static void | MakeBlockMap (const oski_index_t *restrict I_starts, oski_index_t M, oski_index_t *restrict map) |
Computes a row-to-block-row lookup table. | |
static int | GetPart (oski_index_t m, oski_index_t n, const oski_index_t *ptr, const oski_index_t *ind, oski_index_t b, int has_unit_diag, int is_symm, const oski_index_t *Tptr, const oski_index_t *Tind, oski_index_t b_T, int has_unit_diag_T, double threshold_r, double threshold_c, oski_index_t *p_M, oski_index_t *p_N, oski_index_t **p_brow, oski_index_t **p_bcol, flag_t *work_f) |
static void | CountVBRSize (oski_index_t m, oski_index_t n, const oski_index_t *restrict ptr, const oski_index_t *restrict ind, oski_index_t b, int has_unit_diag, oski_index_t M, oski_index_t N, oski_index_t *restrict brow, oski_index_t *restrict bcol, oski_index_t *p_nb, oski_index_t *p_nnz, flag_t *restrict f_workspace, oski_index_t *restrict i_workspace) |
Given a CSR matrix and a candidate block row/column partitioning, counts the number of non-zero blocks and stored non-zero values required to represent a VBR version of the matrix. | |
static void | CopyToVBR (oski_index_t m, oski_index_t n, const oski_index_t *restrict ptr, const oski_index_t *restrict ind, const oski_value_t *restrict val, oski_index_t b, int has_unit_diag, oski_index_t M, oski_index_t N, oski_index_t *restrict brow, oski_index_t *restrict bcol, oski_index_t nb, oski_index_t nnz, oski_index_t *V_ptr, oski_index_t *V_ind, oski_index_t *V_valptr, oski_value_t *V_val, flag_t *restrict f_workspace, oski_index_t *restrict i_workspace) |
Given a CSR matrix and a candidate block row/column partitioning, counts the number of non-zero blocks and stored non-zero values required to represent a VBR version of the matrix. | |
static int | ConvertToVBR (oski_index_t m, oski_index_t n, const oski_index_t *ptr, const oski_index_t *ind, const oski_value_t *val, oski_index_t b, int has_unit_diag, int is_symm, const oski_index_t *Tptr, const oski_index_t *Tind, oski_index_t b_T, int has_unit_diag_T, double threshold_r, double threshold_c, oski_index_t *p_M, oski_index_t *p_N, oski_index_t **p_brow, oski_index_t **p_bcol, oski_index_t **p_V_ptr, oski_index_t **p_V_ind, oski_index_t **p_V_valptr, oski_value_t **p_V_val) |
void * | oski_CreateMatReprFromCSR (const oski_matCSR_t *mat, const oski_matcommon_t *props,...) |
The variable argument list must contain the following parameters in the order listed: | |
oski_matCSR_t * | oski_ConvertMatReprToCSR (const void *mat, const oski_matcommon_t *props) |
Method: Convert to CSR format. | |
void | oski_DestroyMatRepr (void *mat) |
Method: Destroy matrix type-specific representation. | |
void * | oski_CopyMatRepr (const void *mat, const oski_matcommon_t *props) |
Method: Duplicate a matrix representation. | |
int | oski_CreateLuaMatReprFromCSR (lua_State *L) |
The VBR implementation expects the following arguments on the stack: |
Conversion between CSR and VBR format.
static void CopyToVBR | ( | oski_index_t | m, |
oski_index_t | n, | ||
const oski_index_t *restrict | ptr, | ||
const oski_index_t *restrict | ind, | ||
const oski_value_t *restrict | val, | ||
oski_index_t | b, | ||
int | has_unit_diag, | ||
oski_index_t | M, | ||
oski_index_t | N, | ||
oski_index_t *restrict | brow, | ||
oski_index_t *restrict | bcol, | ||
oski_index_t | nb, | ||
oski_index_t | nnz, | ||
oski_index_t * | V_ptr, | ||
oski_index_t * | V_ind, | ||
oski_index_t * | V_valptr, | ||
oski_value_t * | V_val, | ||
flag_t *restrict | f_workspace, | ||
oski_index_t *restrict | i_workspace | ||
) | [static] |
Given a CSR matrix and a candidate block row/column partitioning, counts the number of non-zero blocks and stored non-zero values required to represent a VBR version of the matrix.
[in] | m | No. of rows. |
[in] | n | No. of columns. |
[in] | ptr | Row pointers, of size m+1. |
[in] | ind | Column indices, of size equal to the number of stored structural non-zero entries. |
[in] | b | Index base of CSR representation (0- or 1-based). |
[in] | has_unit_diag | Non-zero if the CSR representation assumes an implicit unit diagonal. |
[in] | M | No. of block rows in the VBR representation. |
[in] | N | No. of block columns in the VBR representation. |
[in] | brow | Starting row of each of the M block rows in the VBR representation. brow is of size M+1, and 0 = brow[0] < brow[1] < ... < brow[M] = m. |
[in] | bcol | Starting column of each of the M block columns in the VBR representation. bcol is of size M+1, and 0 = bcol[0] < bcol[1] < ... < bcol[N] = n. |
[out] | nb | Number of blocks needed to store the CSR matrix in VBR format. |
[out] | nnz | Number of non-zero elements needed to store the CSR matrix in VBR format. |
[in,out] | f_workspace | Temporary workspace, of size at least N. This workspace must be initialized to zero on entry, and will be returned as all zero on exit. |
[in,out] | i_workspace | Additional temporary workspace, of size at least 2*n. The values on entry are not used. On exit, the values are undefined. |
References MakeBlockMap(), oski_ZeroMem(), VAL_INC, and VAL_SET_ONE.
static oski_index_t CountNumCommonSpVec | ( | oski_index_t | i1, |
oski_index_t | n1, | ||
const oski_index_t *restrict | ind1, | ||
oski_index_t | i2, | ||
oski_index_t | n2, | ||
const oski_index_t *restrict | ind2, | ||
oski_index_t | b, | ||
int | has_unit_diag, | ||
oski_index_t | n, | ||
flag_t *restrict | workspace | ||
) | [static] |
Returns the number of unique elements that two sparse vectors, and
, have in common.
These vectors are assumed to be row (or column) vectors of a larger matrix, located at positions row (column) and
, respectively.
[in] | i1 | Index of vector ![]() |
[in] | n1 | No. of elements stored with ![]() |
[in] | ind1 | Non-zero indices associated with ![]() |
[in] | i2 | Index of vector ![]() |
[in] | n2 | No. of elements stored with ![]() |
[in] | ind2 | Non-zero indices associated with ![]() |
[in] | b | Index base (0- or 1-based) for i1, i2, and all values in ind1 and ind2. |
[in] | n | A value greater than or equal to the maximum value in either ind1 or ind2. That is, for all 0 <= k1 < n1, n >= ind1[k1] and for all 0 <= k2 < n2, n >= ind2[k2]. |
[in,out] | workspace | A buffer of size 2*(n+1), all of whose elements must be zero. |
References SetFlags().
Referenced by FindBlockPart().
static oski_index_t CountUnique | ( | oski_index_t | n_x, |
const oski_index_t *restrict | x, | ||
oski_index_t | n, | ||
flag_t *restrict | workspace | ||
) | [static] |
Counts the number of unique structurally non-zero elements in a sparse vector .
[in] | n_x | Number ![]() ![]() |
[in] | x | Sparse vector indices of ![]() |
[in] | n | Value >= maximum index in ![]() |
[out] | workspace | Buffer of size at least n+1, initialized to all zeros on entry. |
References SetFlags().
Referenced by FindBlockPart().
static void CountVBRSize | ( | oski_index_t | m, |
oski_index_t | n, | ||
const oski_index_t *restrict | ptr, | ||
const oski_index_t *restrict | ind, | ||
oski_index_t | b, | ||
int | has_unit_diag, | ||
oski_index_t | M, | ||
oski_index_t | N, | ||
oski_index_t *restrict | brow, | ||
oski_index_t *restrict | bcol, | ||
oski_index_t * | p_nb, | ||
oski_index_t * | p_nnz, | ||
flag_t *restrict | f_workspace, | ||
oski_index_t *restrict | i_workspace | ||
) | [static] |
Given a CSR matrix and a candidate block row/column partitioning, counts the number of non-zero blocks and stored non-zero values required to represent a VBR version of the matrix.
[in] | m | No. of rows. |
[in] | n | No. of columns. |
[in] | ptr | Row pointers, of size m+1. |
[in] | ind | Column indices, of size equal to the number of stored structural non-zero entries. |
[in] | b | Index base of CSR representation (0- or 1-based). |
[in] | has_unit_diag | Non-zero if the CSR representation assumes an implicit unit diagonal. |
[in] | M | No. of block rows in the VBR representation. |
[in] | N | No. of block columns in the VBR representation. |
[in] | brow | Starting row of each of the M block rows in the VBR representation. brow is of size M+1, and 0 = brow[0] < brow[1] < ... < brow[M] = m. |
[in] | bcol | Starting column of each of the M block columns in the VBR representation. bcol is of size M+1, and 0 = bcol[0] < bcol[1] < ... < bcol[N] = n. |
[out] | p_nb | Pointer to the number of non-zero blocks needed to store the CSR matrix in VBR format. If p_nb == NULL, then this value is not returned. |
[out] | p_nnz | Pointer to the number of non-zero elements needed to store the CSR matrix in VBR format. If p_nnz == NULL, then this value is not returned. |
[in,out] | f_workspace | Temporary workspace, of size at least N. This workspace must be initialized to zero on entry, and will be returned as all zero on exit. |
[in,out] | i_workspace | Additional temporary workspace, of size at least n. The values on entry are not used. On exit, the values are undefined. |
References MakeBlockMap().
static oski_index_t FindBlockPart | ( | oski_index_t | m, |
const oski_index_t *restrict | ptr, | ||
const oski_index_t *restrict | ind, | ||
oski_index_t | b, | ||
int | has_unit_diag, | ||
double | threshold, | ||
oski_index_t * | b_start, | ||
oski_index_t | n_max, | ||
flag_t * | workspace | ||
) | [static] |
Determine a partitioning of a CSR structure into groups of consecutive rows that have similar non-zero patterns.
This routine finds a block row partitioning, defined as a sequence of block row starting positions
, where the
block row consists of all rows starting at
and ending at
. The block rows satisfy the following property.
[in] | m | Number of rows, ![]() |
[in] | ptr | Row pointers, of size ![]() |
[in] | ind | Column indices, of size ptr[m]. |
[in] | b | Index base (0- or 1-based) |
[in] | has_unit_diag | Non-zero if matrix has an implicit unit diagonal. |
[in] | threshold | The similarity threshold, ![]() |
[out] | b_start | Buffer in which to store the block row starts, of size at least ![]() |
[in] | n_max | Maximum value of any column index in ind. |
[in,out] | workspace | A buffer of size at least 2*(n_max+1) to be used as temporary storage. The workspace must be all zero initially, and if so, will be returned in an all-zero state. |
The algorithm is greedy, starting at row 0 and growing each block row until the similarity property no longer holds.
References CountNumCommonSpVec(), CountUnique(), and MAX.
static void MakeBlockMap | ( | const oski_index_t *restrict | I_starts, |
oski_index_t | M, | ||
oski_index_t *restrict | map | ||
) | [static] |
Computes a row-to-block-row lookup table.
[in] | I_starts | Start of each row of a consecutive block row partitioning. |
[in] | M | Number of block rows. |
[in,out] | map | An array of size at least I_starts[M] to store the lookup table. |
Referenced by CopyToVBR(), and CountVBRSize().
int oski_CreateLuaMatReprFromCSR | ( | lua_State * | L | ) |
The VBR implementation expects the following arguments on the stack:
Matrix-type specific method to convert from a CSR matrix, with arguments passed on the Lua stack.
static void SetFlags | ( | oski_index_t | n_x, |
const oski_index_t *restrict | x, | ||
flag_t | value, | ||
oski_index_t | n, | ||
flag_t *restrict | f | ||
) | [static] |
Scatters a specified value to all elements of a dense flag vector corresponding to the structurally non-zero elements of a sparse vector
.
More specifically, sets for all
such that
.
[in] | n_x | Number ![]() ![]() |
[in] | x | Sparse vector indices of ![]() |
[in] | value | Value ![]() |
[in] | n | Maximum length of ![]() |
[out] | f | Flag vector ![]() |
Referenced by CountNumCommonSpVec(), and CountUnique().