libflame revision_anchor
|
Functions | |
FLA_Error | FLA_SA_LU_unb (FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L) |
References bli_camax(), bli_ccopy(), bli_cger(), bli_cscal(), bli_cswap(), bli_damax(), bli_dcopy(), bli_dger(), bli_dscal(), bli_dswap(), bli_samax(), bli_scopy(), bli_sger(), bli_sscal(), bli_sswap(), bli_zamax(), bli_zcopy(), bli_zger(), bli_zscal(), bli_zswap(), FLA_Copy_external(), FLA_MINUS_ONE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Triangularize(), dcomplex::imag, scomplex::imag, dcomplex::real, and scomplex::real.
Referenced by FLA_SA_LU_blk().
{ FLA_Datatype datatype; int m_U, cs_U; int m_D, cs_D; int cs_L; // int rs_U; int rs_D; // int rs_L; int m_U_min_j, m_U_min_j_min_1; int j, ipiv; int* buff_p; if ( FLA_Obj_has_zero_dim( U ) ) return FLA_SUCCESS; datatype = FLA_Obj_datatype( U ); m_U = FLA_Obj_length( U ); // rs_U = FLA_Obj_row_stride( U ); cs_U = FLA_Obj_col_stride( U ); m_D = FLA_Obj_length( D ); rs_D = FLA_Obj_row_stride( D ); cs_D = FLA_Obj_col_stride( D ); // rs_L = FLA_Obj_row_stride( L ); cs_L = FLA_Obj_col_stride( L ); FLA_Copy_external( U, L ); FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, L ); buff_p = ( int * ) FLA_INT_PTR( p ); switch ( datatype ){ case FLA_FLOAT: { float* buff_U = ( float * ) FLA_FLOAT_PTR( U ); float* buff_D = ( float * ) FLA_FLOAT_PTR( D ); float* buff_L = ( float * ) FLA_FLOAT_PTR( L ); float* buff_minus1 = ( float * ) FLA_FLOAT_PTR( FLA_MINUS_ONE ); float L_tmp; float D_tmp; float d_inv_Ljj; for ( j = 0; j < m_U; ++j ) { bli_samax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( dabs( L_tmp ) < dabs( D_tmp ) ) { bli_sswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } d_inv_Ljj = 1.0F / buff_L[ j*cs_L + j ]; bli_sscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bli_sger( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bli_scopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } case FLA_DOUBLE: { double* buff_U = ( double * ) FLA_DOUBLE_PTR( U ); double* buff_D = ( double * ) FLA_DOUBLE_PTR( D ); double* buff_L = ( double * ) FLA_DOUBLE_PTR( L ); double* buff_minus1 = ( double * ) FLA_DOUBLE_PTR( FLA_MINUS_ONE ); double L_tmp; double D_tmp; double d_inv_Ljj; for ( j = 0; j < m_U; ++j ) { bli_damax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( dabs( L_tmp ) < dabs( D_tmp ) ) { bli_dswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } d_inv_Ljj = 1.0 / buff_L[ j*cs_L + j ]; bli_dscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bli_dger( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bli_dcopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } case FLA_COMPLEX: { scomplex* buff_U = ( scomplex * ) FLA_COMPLEX_PTR( U ); scomplex* buff_D = ( scomplex * ) FLA_COMPLEX_PTR( D ); scomplex* buff_L = ( scomplex * ) FLA_COMPLEX_PTR( L ); scomplex* buff_minus1 = ( scomplex * ) FLA_COMPLEX_PTR( FLA_MINUS_ONE ); scomplex L_tmp; scomplex D_tmp; scomplex d_inv_Ljj; scomplex Ljj; float temp; for ( j = 0; j < m_U; ++j ) { bli_camax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( dabs( L_tmp.real + L_tmp.imag ) < dabs( D_tmp.real + D_tmp.imag ) ) { bli_cswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } Ljj = buff_L[ j*cs_L + j ]; // d_inv_Ljj = 1.0 / Ljj temp = 1.0F / ( Ljj.real * Ljj.real + Ljj.imag * Ljj.imag ); d_inv_Ljj.real = Ljj.real * temp; d_inv_Ljj.imag = Ljj.imag * -temp; bli_cscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bli_cger( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bli_ccopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_U = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( U ); dcomplex* buff_D = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( D ); dcomplex* buff_L = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( L ); dcomplex* buff_minus1 = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE ); dcomplex L_tmp; dcomplex D_tmp; dcomplex d_inv_Ljj; dcomplex Ljj; double temp; for ( j = 0; j < m_U; ++j ) { bli_zamax( m_D, buff_D + j*cs_D + 0*rs_D, rs_D, &ipiv ); L_tmp = buff_L[ j*cs_L + j ]; D_tmp = buff_D[ j*cs_D + ipiv ]; if ( dabs( L_tmp.real + L_tmp.imag ) < dabs( D_tmp.real + D_tmp.imag ) ) { bli_zswap( m_U, buff_L + 0*cs_L + j, cs_L, buff_D + 0*cs_D + ipiv, cs_D ); buff_p[ j ] = ipiv + m_U - j; } else { buff_p[ j ] = 0; } Ljj = buff_L[ j*cs_L + j ]; // d_inv_Ljj = 1.0 / Ljj temp = 1.0 / ( Ljj.real * Ljj.real + Ljj.imag * Ljj.imag ); d_inv_Ljj.real = Ljj.real * temp; d_inv_Ljj.imag = Ljj.imag * -temp; bli_zscal( m_D, &d_inv_Ljj, buff_D + j*cs_D + 0, rs_D ); m_U_min_j_min_1 = m_U - j - 1; if ( m_U_min_j_min_1 > 0 ) { bli_zger( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, m_D, m_U_min_j_min_1, buff_minus1, buff_D + (j+0)*cs_D + 0, rs_D, buff_L + (j+1)*cs_L + j, cs_L, buff_D + (j+1)*cs_D + 0, rs_D, cs_D ); } m_U_min_j = m_U - j; if ( m_U_min_j > 0 ) { bli_zcopy( m_U_min_j, buff_L + j*cs_L + j, cs_L, buff_U + j*cs_U + j, cs_U ); } } break; } } return FLA_SUCCESS; }