libflame
revision_anchor
|
Go to the source code of this file.
Functions | |
FLA_Error | FLA_Apply_pivots_ln_blk_var1 (FLA_Obj p, FLA_Obj A, fla_appiv_t *cntl) |
FLA_Error | FLA_Apply_pivots_ln_blk_var2 (FLA_Obj p, FLA_Obj A, fla_appiv_t *cntl) |
FLA_Error | FLA_Apply_pivots_ln_opt_var1 (FLA_Obj p, FLA_Obj A) |
FLA_Error | FLA_Apply_pivots_ln_ops_var1 (int n, float *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp) |
FLA_Error | FLA_Apply_pivots_ln_opd_var1 (int n, double *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp) |
FLA_Error | FLA_Apply_pivots_ln_opc_var1 (int n, scomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp) |
FLA_Error | FLA_Apply_pivots_ln_opz_var1 (int n, dcomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp) |
FLA_Error FLA_Apply_pivots_ln_blk_var1 | ( | FLA_Obj | p, |
FLA_Obj | A, | ||
fla_appiv_t * | cntl | ||
) |
References FLA_Apply_pivots_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Determine_blocksize(), FLA_Obj_width(), FLA_Part_1x2(), and FLA_Repart_1x2_to_1x3().
Referenced by FLA_Apply_pivots_ln().
{ FLA_Obj AL, AR, A0, A1, A2; dim_t b; FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT ); while ( FLA_Obj_width( AL ) < FLA_Obj_width( A ) ) { b = FLA_Determine_blocksize( AR, FLA_RIGHT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &A1, &A2, b, FLA_RIGHT ); /*------------------------------------------------------------*/ /* Apply pivots to each column panel */ FLA_Apply_pivots_internal( FLA_LEFT, FLA_NO_TRANSPOSE, p, A1, FLA_Cntl_sub_appiv( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, A1, /**/ A2, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_pivots_ln_blk_var2 | ( | FLA_Obj | p, |
FLA_Obj | A, | ||
fla_appiv_t * | cntl | ||
) |
References FLA_Apply_pivots_internal(), FLA_Cont_with_3x1_to_2x1(), FLA_Determine_blocksize(), FLA_Obj_length(), FLA_Part_2x1(), and FLA_Repart_2x1_to_3x1().
Referenced by FLA_Apply_pivots_ln().
{ FLA_Obj AT, A0, AB, A1, A2; FLA_Obj pT, p0, pB, pi1, p2; dim_t b; FLA_Part_2x1( A, &AT, &AB, 0, FLA_TOP ); FLA_Part_2x1( p, &pT, &pB, 0, FLA_TOP ); while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ) { b = FLA_Determine_blocksize( AB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x1_to_3x1( AT, &A0, /* ** */ /* ** */ &A1, AB, &A2, b, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( pT, &p0, /* ** */ /* ** */ &pi1, pB, &p2, b, FLA_BOTTOM ); /*------------------------------------------------------------*/ /* Apply pivots to a block and matrix */ FLA_Apply_pivots_internal( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB, FLA_Cntl_sub_appiv( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &AT, A0, A1, /* ** */ /* ** */ &AB, A2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &pT, p0, pi1, /* ** */ /* ** */ &pB, p2, FLA_TOP ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_pivots_ln_opc_var1 | ( | int | n, |
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
int | k1, | ||
int | k2, | ||
int * | p, | ||
int | incp | ||
) |
Referenced by FLA_Apply_pivots_ln_opt_var1(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), and FLA_LU_piv_opc_var5().
{ scomplex temp; scomplex* a_i_0; scomplex* a_pi_0; scomplex* a_0_j; scomplex* a_i_j; scomplex* a_pi_j; int i, j; int i_begin, i_bound, i_inc; int p_inc; // Handle both positive and negative increments for the pivot vector. if ( incp > 0 ) { i_begin = k1; i_bound = k2 + 1; i_inc = 1; p_inc = 1*incp; } else // if ( incp < 0 ) { i_begin = k2; i_bound = k1 - 1; i_inc = -1; p_inc = -1*incp; } // Optimize memory accesses depending on whether A is stored in // column-major or row-major order. That is, for column-major // matrices, we interchange all the elements in a single column // at a time. But for row-major matrices, we perform an entire // row interchange before moving to the next interchange. For // general storage, we decide based on which stride is closer // to one. if ( a_rs == 1 || a_rs < a_cs ) { for ( j = 0; j < n; j++ ) { a_0_j = a + j*a_cs; for ( i = i_begin; i != i_bound; i += i_inc ) { a_i_j = a_0_j + ( i )*a_rs; // Add i to shift from relative to absolute index. a_pi_j = a_0_j + ( p[i*p_inc] + i )*a_rs; temp = *a_pi_j; *a_pi_j = *a_i_j; *a_i_j = temp; } } } else // if ( a_cs == 1 || a_cs < a_rs ) { for ( i = i_begin; i != i_bound; i += i_inc ) { a_i_0 = a + ( i )*a_rs; // Add i to shift from relative to absolute index. a_pi_0 = a + ( p[i*p_inc] + i )*a_rs; for ( j = 0; j < n; j++ ) { a_i_j = a_i_0 + j*a_cs; a_pi_j = a_pi_0 + j*a_cs; temp = *a_pi_j; *a_pi_j = *a_i_j; *a_i_j = temp; } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_pivots_ln_opd_var1 | ( | int | n, |
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
int | k1, | ||
int | k2, | ||
int * | p, | ||
int | incp | ||
) |
Referenced by FLA_Apply_pivots_ln_opt_var1(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), and FLA_LU_piv_opd_var5().
{ double temp; double* a_i_0; double* a_pi_0; double* a_0_j; double* a_i_j; double* a_pi_j; int i, j; int i_begin, i_bound, i_inc; int p_inc; // Handle both positive and negative increments for the pivot vector. if ( incp > 0 ) { i_begin = k1; i_bound = k2 + 1; i_inc = 1; p_inc = 1*incp; } else // if ( incp < 0 ) { i_begin = k2; i_bound = k1 - 1; i_inc = -1; p_inc = -1*incp; } // Optimize memory accesses depending on whether A is stored in // column-major or row-major order. That is, for column-major // matrices, we interchange all the elements in a single column // at a time. But for row-major matrices, we perform an entire // row interchange before moving to the next interchange. For // general storage, we decide based on which stride is closer // to one. if ( a_rs == 1 || a_rs < a_cs ) { for ( j = 0; j < n; j++ ) { a_0_j = a + j*a_cs; for ( i = i_begin; i != i_bound; i += i_inc ) { a_i_j = a_0_j + ( i )*a_rs; // Add i to shift from relative to absolute index. a_pi_j = a_0_j + ( p[i*p_inc] + i )*a_rs; temp = *a_pi_j; *a_pi_j = *a_i_j; *a_i_j = temp; } } } else // if ( a_cs == 1 || a_cs < a_rs ) { for ( i = i_begin; i != i_bound; i += i_inc ) { a_i_0 = a + ( i )*a_rs; // Add i to shift from relative to absolute index. a_pi_0 = a + ( p[i*p_inc] + i )*a_rs; for ( j = 0; j < n; j++ ) { a_i_j = a_i_0 + j*a_cs; a_pi_j = a_pi_0 + j*a_cs; temp = *a_pi_j; *a_pi_j = *a_i_j; *a_i_j = temp; } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_pivots_ln_ops_var1 | ( | int | n, |
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
int | k1, | ||
int | k2, | ||
int * | p, | ||
int | incp | ||
) |
Referenced by FLA_Apply_pivots_ln_opt_var1(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), and FLA_LU_piv_ops_var5().
{ float temp; float* a_i_0; float* a_pi_0; float* a_0_j; float* a_i_j; float* a_pi_j; int i, j; int i_begin, i_bound, i_inc; int p_inc; // Handle both positive and negative increments for the pivot vector. if ( incp > 0 ) { i_begin = k1; i_bound = k2 + 1; i_inc = 1; p_inc = 1*incp; } else // if ( incp < 0 ) { i_begin = k2; i_bound = k1 - 1; i_inc = -1; p_inc = -1*incp; } // Optimize memory accesses depending on whether A is stored in // column-major or row-major order. That is, for column-major // matrices, we interchange all the elements in a single column // at a time. But for row-major matrices, we perform an entire // row interchange before moving to the next interchange. For // general storage, we decide based on which stride is closer // to one. if ( a_rs == 1 || a_rs < a_cs ) { for ( j = 0; j < n; j++ ) { a_0_j = a + j*a_cs; for ( i = i_begin; i != i_bound; i += i_inc ) { a_i_j = a_0_j + ( i )*a_rs; // Add i to shift from relative to absolute index. a_pi_j = a_0_j + ( p[i*p_inc] + i )*a_rs; temp = *a_pi_j; *a_pi_j = *a_i_j; *a_i_j = temp; } } } else // if ( a_cs == 1 || a_cs < a_rs ) { for ( i = i_begin; i != i_bound; i += i_inc ) { a_i_0 = a + ( i )*a_rs; // Add i to shift from relative to absolute index. a_pi_0 = a + ( p[i*p_inc] + i )*a_rs; for ( j = 0; j < n; j++ ) { a_i_j = a_i_0 + j*a_cs; a_pi_j = a_pi_0 + j*a_cs; temp = *a_pi_j; *a_pi_j = *a_i_j; *a_i_j = temp; } } } return FLA_SUCCESS; }
References FLA_Apply_pivots_ln_opc_var1(), FLA_Apply_pivots_ln_opd_var1(), FLA_Apply_pivots_ln_ops_var1(), FLA_Apply_pivots_ln_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), and FLA_Obj_width().
Referenced by FLA_Apply_pivots_ln().
{ FLA_Datatype datatype; int n_A; int rs_A, cs_A; int inc_p; int k1_0, k2_0; datatype = FLA_Obj_datatype( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_p = FLA_Obj_vector_inc( p ); // Use zero-based indices. k1_0 = 0; k2_0 = ( int ) FLA_Obj_vector_dim( p ) - 1; switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_ops_var1( n_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_opd_var1( n_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_opc_var1( n_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); int* buff_p = FLA_INT_PTR( p ); FLA_Apply_pivots_ln_opz_var1( n_A, buff_A, rs_A, cs_A, k1_0, k2_0, buff_p, inc_p ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_pivots_ln_opz_var1 | ( | int | n, |
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
int | k1, | ||
int | k2, | ||
int * | p, | ||
int | incp | ||
) |
Referenced by FLA_Apply_pivots_ln_opt_var1(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), and FLA_LU_piv_opz_var5().
{ dcomplex temp; dcomplex* a_i_0; dcomplex* a_pi_0; dcomplex* a_0_j; dcomplex* a_i_j; dcomplex* a_pi_j; int i, j; int i_begin, i_bound, i_inc; int p_inc; // Handle both positive and negative increments for the pivot vector. if ( incp > 0 ) { i_begin = k1; i_bound = k2 + 1; i_inc = 1; p_inc = 1*incp; } else // if ( incp < 0 ) { i_begin = k2; i_bound = k1 - 1; i_inc = -1; p_inc = -1*incp; } // Optimize memory accesses depending on whether A is stored in // column-major or row-major order. That is, for column-major // matrices, we interchange all the elements in a single column // at a time. But for row-major matrices, we perform an entire // row interchange before moving to the next interchange. For // general storage, we decide based on which stride is closer // to one. if ( a_rs == 1 || a_rs < a_cs ) { for ( j = 0; j < n; j++ ) { a_0_j = a + j*a_cs; for ( i = i_begin; i != i_bound; i += i_inc ) { a_i_j = a_0_j + ( i )*a_rs; // Add i to shift from relative to absolute index. a_pi_j = a_0_j + ( p[i*p_inc] + i )*a_rs; temp = *a_pi_j; *a_pi_j = *a_i_j; *a_i_j = temp; } } } else // if ( a_cs == 1 || a_cs < a_rs ) { for ( i = i_begin; i != i_bound; i += i_inc ) { a_i_0 = a + ( i )*a_rs; // Add i to shift from relative to absolute index. a_pi_0 = a + ( p[i*p_inc] + i )*a_rs; for ( j = 0; j < n; j++ ) { a_i_j = a_i_0 + j*a_cs; a_pi_j = a_pi_0 + j*a_cs; temp = *a_pi_j; *a_pi_j = *a_i_j; *a_i_j = temp; } } } return FLA_SUCCESS; }