BeBOP Optimized Sparse Kernel Interface Library
1.0.1h
|
Tuning module implementation. More...
#include <math.h>
#include <oski/common.h>
#include <oski/simplelist.h>
#include <oski/heur_internal.h>
#include <oski/heur_typedep.h>
#include <oski/heurexport.h>
#include <oski/trace.h>
#include <oski/tune.h>
#include <oski/timer.h>
#include <oski/matrix.h>
#include <oski/vecview.h>
#include <oski/xforms.h>
Defines | |
#define | BEST_FRAC_OBSERVED .25 |
Fraction of observed workload available for tuning. | |
#define | BEST_FRAC_WORKLOAD .25 |
Fraction of hint workload available for tuning. | |
Functions | |
static double | ComputeTuningBudget (oski_matrix_t A_tunable) |
Returns the estimated number of seconds available for tuning. | |
static int | ChooseFastest (oski_matrix_t A_tunable) |
Determines whether the heuristic-selected data structure leads to faster execution times than the input data structure. | |
static void | FreeTunedMat (oski_matrix_t A_tunable) |
int | oski_TuneMat (oski_matrix_t A_tunable) |
Basic outline of this routine's implementation: |
Tuning module implementation.
Current list of heuristics:
Basic structure of a heuristic:
FUNCTION HeurType :: EvaluateHeuristic( heur:HeurType, A:matrix ) IF this heuristic does not apply to A and its trace THEN RETURN NULL; ENDIF LET results = Choose tuning parameters for A RETURN results
FUNCTION RBSpMV :: EvaluateHeuristic( heur:HeurType, A:matrix ) IF A->trace not "dominated" by calls to SpMV THEN RETURN NULL; ENDIF LET EstFill[MAX_R, MAX_C] = EstimateFill( A ) LET Mflops_dense[MAX_R, MAX_C] = LoadRegProfile( A ) LET r, c = max_{r,c} Mflops_dense[r,c] / EstFill[r, c] IF r == 1 AND c == 1 THEN RETURN NULL; ELSE LET results = new RbSpMV_results( r, c ) RETURN results ENDIF
#define BEST_FRAC_OBSERVED .25 |
Fraction of observed workload available for tuning.
Referenced by ComputeTuningBudget().
#define BEST_FRAC_WORKLOAD .25 |
Fraction of hint workload available for tuning.
Referenced by ComputeTuningBudget().
static int ChooseFastest | ( | oski_matrix_t | A_tunable | ) | [static] |
Determines whether the heuristic-selected data structure leads to faster execution times than the input data structure.
References oski_vecstruct_t::colinc, INVALID_ID, INVALID_MAT, INVALID_TIMER, LAYOUT_ROWMAJ, oski_matcommon_t::num_cols, oski_vecstruct_t::num_cols, oski_matcommon_t::num_rows, oski_vecstruct_t::num_rows, OP_NORMAL, oski_vecstruct_t::orient, oski_CreateTimer(), oski_DestroyTimer(), oski_FreeAll(), oski_MultiMalloc(), oski_PrintDebugMessage(), oski_ReadElapsedTime(), oski_RestartTimer(), oski_StopTimer(), oski_ZeroMem(), oski_matstruct_t::props, oski_vecstruct_t::rowinc, oski_vecstruct_t::stride, oski_matstruct_t::tuned_mat, oski_matspecific_t::type_id, and oski_vecstruct_t::val.
Referenced by oski_TuneMat().
static double ComputeTuningBudget | ( | oski_matrix_t | A_tunable | ) | [static] |
Returns the estimated number of seconds available for tuning.
The estimate is based on the larger of the following two quantities:
References BEST_FRAC_OBSERVED, BEST_FRAC_WORKLOAD, INVALID_MAT, oski_PrintDebugMessage(), oski_matstruct_t::time_stream, oski_matstruct_t::trace, and oski_matstruct_t::workhints.
Referenced by oski_TuneMat().