libflame revision_anchor
Functions
FLA_LU_incpiv_aux.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_SA_Apply_pivots (FLA_Obj C, FLA_Obj E, FLA_Obj p)
FLA_Error FLA_SA_LU_blk (FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L, dim_t nb_alg)
FLA_Error FLA_SA_LU_unb (FLA_Obj U, FLA_Obj D, FLA_Obj p, FLA_Obj L)
FLA_Error FLA_SA_FS_blk (FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg)
FLA_Error FLASH_LU_incpiv_var1 (FLA_Obj A, FLA_Obj p, FLA_Obj L, dim_t nb_alg, fla_lu_t *cntl)
FLA_Error FLASH_LU_incpiv_var2 (FLA_Obj A, FLA_Obj p, FLA_Obj L, FLA_Obj U, dim_t nb_alg, fla_lu_t *cntl)
FLA_Error FLASH_Trsm_piv (FLA_Obj A, FLA_Obj B, FLA_Obj p, fla_trsm_t *cntl)
FLA_Error FLASH_SA_LU (FLA_Obj B, FLA_Obj C, FLA_Obj D, FLA_Obj E, FLA_Obj p, FLA_Obj L, dim_t nb_alg, fla_lu_t *cntl)
FLA_Error FLASH_SA_FS (FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg, fla_gemm_t *cntl)
FLA_Error FLASH_FS_incpiv_aux1 (FLA_Obj A, FLA_Obj p, FLA_Obj L, FLA_Obj b, dim_t nb_alg)
FLA_Error FLASH_FS_incpiv_aux2 (FLA_Obj L, FLA_Obj D, FLA_Obj p, FLA_Obj C, FLA_Obj E, dim_t nb_alg)

Function Documentation

FLA_Error FLA_SA_Apply_pivots ( FLA_Obj  C,
FLA_Obj  E,
FLA_Obj  p 
)

References bli_cswap(), bli_dswap(), bli_sswap(), bli_zswap(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLA_SA_FS_blk(), and FLA_SA_LU_blk().

{
  FLA_Datatype datatype;
  int          m_C, n_C, cs_C;
  int                    cs_E;
  // int                    rs_C;
  // int                    rs_E;
  int          m_p;
  int          i;
  int*         buff_p;

  if ( FLA_Obj_has_zero_dim( C ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( C );

  m_C    = FLA_Obj_length( C );
  n_C    = FLA_Obj_width( C );
  cs_C   = FLA_Obj_col_stride( C );
  // rs_C   = FLA_Obj_row_stride( C );

  cs_E   = FLA_Obj_col_stride( E );
  // rs_E   = FLA_Obj_row_stride( E );

  m_p    = FLA_Obj_length( p );
  
  buff_p = ( int * ) FLA_INT_PTR( p );


  switch ( datatype ){

  case FLA_FLOAT:
  {
    float* buff_C = ( float * ) FLA_FLOAT_PTR( C );
    float* buff_E = ( float * ) FLA_FLOAT_PTR( E );

    for ( i = 0; i < m_p; ++i )
    {
      if ( buff_p[ i ] != 0 ) 
        bli_sswap( n_C, 
                   buff_C + 0*cs_C + i,                         cs_C, 
                   buff_E + 0*cs_E + buff_p[ i ] - ( m_C - i ), cs_E );
    }
    break;
  }

  case FLA_DOUBLE:
  {
    double* buff_C = ( double * ) FLA_DOUBLE_PTR( C );
    double* buff_E = ( double * ) FLA_DOUBLE_PTR( E );

    for ( i = 0; i < m_p; ++i )
    {
      if ( buff_p[ i ] != 0 ) 
        bli_dswap( n_C, 
                   buff_C + 0*cs_C + i,                         cs_C, 
                   buff_E + 0*cs_E + buff_p[ i ] - ( m_C - i ), cs_E );
    }
    break;
  }

  case FLA_COMPLEX:
  {
    scomplex* buff_C = ( scomplex * ) FLA_COMPLEX_PTR( C );
    scomplex* buff_E = ( scomplex * ) FLA_COMPLEX_PTR( E );

    for ( i = 0; i < m_p; ++i )
    {
      if ( buff_p[ i ] != 0 ) 
        bli_cswap( n_C, 
                   buff_C + 0*cs_C + i,                         cs_C, 
                   buff_E + 0*cs_E + buff_p[ i ] - ( m_C - i ), cs_E );
    }
    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex* buff_C = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( C );
    dcomplex* buff_E = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( E );

    for ( i = 0; i < m_p; ++i )
    {
      if ( buff_p[ i ] != 0 ) 
        bli_zswap( n_C, 
                   buff_C + 0*cs_C + i,                         cs_C, 
                   buff_E + 0*cs_E + buff_p[ i ] - ( m_C - i ), cs_E );
    }
    break;
  }

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_SA_FS_blk ( FLA_Obj  L,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  C,
FLA_Obj  E,
dim_t  nb_alg 
)

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_SA_Apply_pivots(), and FLA_Trsm_external().

Referenced by FLA_SA_FS_task(), and FLASH_FS_incpiv_aux2().

{
  FLA_Obj LT,              L0,
          LB,              L1,
                           L2;

  FLA_Obj DL,    DR,       D0,  D1,  D2;

  FLA_Obj pT,              p0,
          pB,              p1,
                           p2;

  FLA_Obj CT,              C0,
          CB,              C1,
                           C2;

  FLA_Obj L1_sqr, L1_rest;

  dim_t b;

  FLA_Part_2x1( L,    &LT, 
                      &LB,            0, FLA_TOP );

  FLA_Part_1x2( D,    &DL,  &DR,      0, FLA_LEFT );

  FLA_Part_2x1( p,    &pT, 
                      &pB,            0, FLA_TOP );

  FLA_Part_2x1( C,    &CT, 
                      &CB,            0, FLA_TOP );

  while ( FLA_Obj_length( LT ) < FLA_Obj_length( L ) )
  {
    b = min( FLA_Obj_length( LB ), nb_alg );

    FLA_Repart_2x1_to_3x1( LT,                &L0, 
                        /* ** */            /* ** */
                                              &L1, 
                           LB,                &L2,        b, FLA_BOTTOM );

    FLA_Repart_1x2_to_1x3( DL,  /**/ DR,      &D0, /**/ &D1, &D2,
                           b, FLA_RIGHT );

    FLA_Repart_2x1_to_3x1( pT,                &p0, 
                        /* ** */            /* ** */
                                              &p1, 
                           pB,                &p2,        b, FLA_BOTTOM );

    FLA_Repart_2x1_to_3x1( CT,                &C0, 
                        /* ** */            /* ** */
                                              &C1, 
                           CB,                &C2,        b, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    FLA_Part_1x2( L1,    &L1_sqr, &L1_rest,      b, FLA_LEFT );


    FLA_SA_Apply_pivots( C1,
                         E, p1 );

    FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
                       FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
                       FLA_ONE, L1_sqr, C1 );

    FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 
                       FLA_MINUS_ONE, D1, C1, FLA_ONE, E );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x1_to_2x1( &LT,                L0, 
                                                  L1, 
                            /* ** */           /* ** */
                              &LB,                L2,     FLA_TOP );

    FLA_Cont_with_1x3_to_1x2( &DL,  /**/ &DR,     D0, D1, /**/ D2,
                              FLA_LEFT );

    FLA_Cont_with_3x1_to_2x1( &pT,                p0, 
                                                  p1, 
                            /* ** */           /* ** */
                              &pB,                p2,     FLA_TOP );

    FLA_Cont_with_3x1_to_2x1( &CT,                C0, 
                                                  C1, 
                            /* ** */           /* ** */
                              &CB,                C2,     FLA_TOP );
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_SA_LU_blk ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  L,
dim_t  nb_alg 
)

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_SA_Apply_pivots(), FLA_SA_LU_unb(), and FLA_Trsm_external().

Referenced by FLA_SA_LU_task().

{
  FLA_Obj UTL,   UTR,      U00, U01, U02, 
          UBL,   UBR,      U10, U11, U12,
                           U20, U21, U22;

  FLA_Obj DL,    DR,       D0,  D1,  D2;

  FLA_Obj pT,              p0,
          pB,              p1,
                           p2;

  FLA_Obj LT,              L0,
          LB,              L1,
                           L2;

  FLA_Obj L1_sqr, L1_rest;

  dim_t b;

  FLA_Part_2x2( U,    &UTL, &UTR,
                      &UBL, &UBR,     0, 0, FLA_TL );

  FLA_Part_1x2( D,    &DL,  &DR,      0, FLA_LEFT );

  FLA_Part_2x1( p,    &pT, 
                      &pB,            0, FLA_TOP );

  FLA_Part_2x1( L,    &LT, 
                      &LB,            0, FLA_TOP );

  while ( FLA_Obj_length( UTL ) < FLA_Obj_length( U ) )
  {
    b = min( FLA_Obj_length( UBR ), nb_alg );

    FLA_Repart_2x2_to_3x3( UTL, /**/ UTR,       &U00, /**/ &U01, &U02,
                        /* ************* */   /* ******************** */
                                                &U10, /**/ &U11, &U12,
                           UBL, /**/ UBR,       &U20, /**/ &U21, &U22,
                           b, b, FLA_BR );

    FLA_Repart_1x2_to_1x3( DL,  /**/ DR,        &D0, /**/ &D1, &D2,
                           b, FLA_RIGHT );

    FLA_Repart_2x1_to_3x1( pT,                  &p0, 
                        /* ** */              /* ** */
                                                &p1, 
                           pB,                  &p2,        b, FLA_BOTTOM );

    FLA_Repart_2x1_to_3x1( LT,                  &L0, 
                        /* ** */              /* ** */
                                                &L1, 
                           LB,                  &L2,        b, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    FLA_Part_1x2( L1,    &L1_sqr, &L1_rest,      b, FLA_LEFT );


    FLA_SA_LU_unb( U11,
                   D1, p1, L1_sqr );

    FLA_SA_Apply_pivots( U12,
                         D2, p1 );

    FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
                       FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
                       FLA_ONE, L1_sqr, U12 );

    FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, 
                       FLA_MINUS_ONE, D1, U12, FLA_ONE, D2 );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR,       U00, U01, /**/ U02,
                                                     U10, U11, /**/ U12,
                            /* ************** */  /* ****************** */
                              &UBL, /**/ &UBR,       U20, U21, /**/ U22,
                              FLA_TL );

    FLA_Cont_with_1x3_to_1x2( &DL,  /**/ &DR,        D0, D1, /**/ D2,
                              FLA_LEFT );

    FLA_Cont_with_3x1_to_2x1( &pT,                   p0, 
                                                     p1, 
                            /* ** */              /* ** */
                              &pB,                   p2,     FLA_TOP );

    FLA_Cont_with_3x1_to_2x1( &LT,                   L0, 
                                                     L1, 
                            /* ** */              /* ** */
                              &LB,                   L2,     FLA_TOP );
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_SA_LU_unb ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  L 
)

References bli_camax(), bli_ccopy(), bli_cger(), bli_cscal(), bli_cswap(), bli_damax(), bli_dcopy(), bli_dger(), bli_dscal(), bli_dswap(), bli_samax(), bli_scopy(), bli_sger(), bli_sscal(), bli_sswap(), bli_zamax(), bli_zcopy(), bli_zger(), bli_zscal(), bli_zswap(), FLA_Copy_external(), FLA_MINUS_ONE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Triangularize(), dcomplex::imag, scomplex::imag, dcomplex::real, and scomplex::real.

Referenced by FLA_SA_LU_blk().

{
  FLA_Datatype datatype;
  int          m_U, cs_U;
  int          m_D, cs_D;
  int               cs_L;
  // int               rs_U;
  int               rs_D;
  // int               rs_L;
  int          m_U_min_j, m_U_min_j_min_1; 
  int          j, ipiv;
  int*         buff_p;

  if ( FLA_Obj_has_zero_dim( U ) ) return FLA_SUCCESS;
  
  datatype = FLA_Obj_datatype( U );

  m_U      = FLA_Obj_length( U );
  // rs_U     = FLA_Obj_row_stride( U );
  cs_U     = FLA_Obj_col_stride( U );

  m_D      = FLA_Obj_length( D );
  rs_D     = FLA_Obj_row_stride( D );
  cs_D     = FLA_Obj_col_stride( D );
  
  // rs_L     = FLA_Obj_row_stride( L );
  cs_L     = FLA_Obj_col_stride( L );

  FLA_Copy_external( U, L );
  FLA_Triangularize( FLA_UPPER_TRIANGULAR, FLA_NONUNIT_DIAG, L );

  buff_p     = ( int * ) FLA_INT_PTR( p );

  switch ( datatype ){

  case FLA_FLOAT:
  {
    float* buff_U      = ( float * ) FLA_FLOAT_PTR( U );
    float* buff_D      = ( float * ) FLA_FLOAT_PTR( D );
    float* buff_L      = ( float * ) FLA_FLOAT_PTR( L );
    float* buff_minus1 = ( float * ) FLA_FLOAT_PTR( FLA_MINUS_ONE );
    float  L_tmp;
    float  D_tmp;
    float  d_inv_Ljj;

    for ( j = 0; j < m_U; ++j )
    {
      bli_samax( m_D, 
                 buff_D + j*cs_D + 0*rs_D,
                 rs_D,
                 &ipiv );

      L_tmp = buff_L[ j*cs_L + j    ];
      D_tmp = buff_D[ j*cs_D + ipiv ];

      if ( dabs( L_tmp ) < dabs( D_tmp ) )
      {
        bli_sswap( m_U,
                   buff_L + 0*cs_L + j,    cs_L,
                   buff_D + 0*cs_D + ipiv, cs_D ); 

        buff_p[ j ] = ipiv + m_U - j;
      }        
      else
      {
        buff_p[ j ] = 0;
      }

      d_inv_Ljj = 1.0F / buff_L[ j*cs_L + j ];

      bli_sscal( m_D,
                 &d_inv_Ljj,
                 buff_D + j*cs_D + 0, rs_D ); 

      m_U_min_j_min_1 = m_U - j - 1;

      if ( m_U_min_j_min_1 > 0  )
      {
        bli_sger( BLIS_NO_CONJUGATE,
                  BLIS_NO_CONJUGATE,
                  m_D,
                  m_U_min_j_min_1,
                  buff_minus1, 
                  buff_D + (j+0)*cs_D + 0, rs_D,
                  buff_L + (j+1)*cs_L + j, cs_L,
                  buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
      }

      m_U_min_j = m_U - j;

      if ( m_U_min_j > 0 ) 
      {
        bli_scopy( m_U_min_j,
                   buff_L + j*cs_L + j, cs_L,
                   buff_U + j*cs_U + j, cs_U );
      }
    }                 
    break;
  }

  case FLA_DOUBLE:
  {
    double* buff_U      = ( double * ) FLA_DOUBLE_PTR( U );
    double* buff_D      = ( double * ) FLA_DOUBLE_PTR( D );
    double* buff_L      = ( double * ) FLA_DOUBLE_PTR( L );
    double* buff_minus1 = ( double * ) FLA_DOUBLE_PTR( FLA_MINUS_ONE );
    double  L_tmp;
    double  D_tmp;
    double  d_inv_Ljj;

    for ( j = 0; j < m_U; ++j )
    {
      bli_damax( m_D, 
                 buff_D + j*cs_D + 0*rs_D,
                 rs_D,
                 &ipiv );

      L_tmp = buff_L[ j*cs_L + j    ];
      D_tmp = buff_D[ j*cs_D + ipiv ];

      if ( dabs( L_tmp ) < dabs( D_tmp ) )
      {
        bli_dswap( m_U,
                   buff_L + 0*cs_L + j,    cs_L,
                   buff_D + 0*cs_D + ipiv, cs_D ); 

        buff_p[ j ] = ipiv + m_U - j;
      }        
      else
      {
        buff_p[ j ] = 0;
      }

      d_inv_Ljj = 1.0 / buff_L[ j*cs_L + j ];

      bli_dscal( m_D,
                 &d_inv_Ljj,
                 buff_D + j*cs_D + 0, rs_D ); 

      m_U_min_j_min_1 = m_U - j - 1;

      if ( m_U_min_j_min_1 > 0  )
      {
        bli_dger( BLIS_NO_CONJUGATE,
                  BLIS_NO_CONJUGATE,
                  m_D,
                  m_U_min_j_min_1,
                  buff_minus1, 
                  buff_D + (j+0)*cs_D + 0, rs_D,
                  buff_L + (j+1)*cs_L + j, cs_L,
                  buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
      }

      m_U_min_j = m_U - j;

      if ( m_U_min_j > 0 ) 
      {
        bli_dcopy( m_U_min_j,
                   buff_L + j*cs_L + j, cs_L,
                   buff_U + j*cs_U + j, cs_U );
      }
    }                 
    break;
  }

  case FLA_COMPLEX:
  {
    scomplex* buff_U      = ( scomplex * ) FLA_COMPLEX_PTR( U );
    scomplex* buff_D      = ( scomplex * ) FLA_COMPLEX_PTR( D );
    scomplex* buff_L      = ( scomplex * ) FLA_COMPLEX_PTR( L );
    scomplex* buff_minus1 = ( scomplex * ) FLA_COMPLEX_PTR( FLA_MINUS_ONE );
    scomplex  L_tmp;
    scomplex  D_tmp;
    scomplex  d_inv_Ljj;
    scomplex  Ljj;
    float     temp;

    for ( j = 0; j < m_U; ++j )
    {
      bli_camax( m_D, 
                 buff_D + j*cs_D + 0*rs_D,
                 rs_D,
                 &ipiv );

      L_tmp = buff_L[ j*cs_L + j    ];
      D_tmp = buff_D[ j*cs_D + ipiv ];

      if ( dabs( L_tmp.real + L_tmp.imag ) < dabs( D_tmp.real + D_tmp.imag ) )
      {
        bli_cswap( m_U,
                   buff_L + 0*cs_L + j,    cs_L,
                   buff_D + 0*cs_D + ipiv, cs_D ); 

        buff_p[ j ] = ipiv + m_U - j;
      }        
      else
      {
        buff_p[ j ] = 0;
      }

      Ljj = buff_L[ j*cs_L + j ];

      // d_inv_Ljj = 1.0 / Ljj
      temp = 1.0F / ( Ljj.real * Ljj.real +
                      Ljj.imag * Ljj.imag );
      d_inv_Ljj.real = Ljj.real *  temp;
      d_inv_Ljj.imag = Ljj.imag * -temp;

      bli_cscal( m_D,
                 &d_inv_Ljj,
                 buff_D + j*cs_D + 0, rs_D ); 

      m_U_min_j_min_1 = m_U - j - 1;

      if ( m_U_min_j_min_1 > 0  )
      {
        bli_cger( BLIS_NO_CONJUGATE,
                  BLIS_NO_CONJUGATE,
                  m_D,
                  m_U_min_j_min_1,
                  buff_minus1, 
                  buff_D + (j+0)*cs_D + 0, rs_D,
                  buff_L + (j+1)*cs_L + j, cs_L,
                  buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
      }

      m_U_min_j = m_U - j;

      if ( m_U_min_j > 0 ) 
      {
        bli_ccopy( m_U_min_j,
                   buff_L + j*cs_L + j, cs_L,
                   buff_U + j*cs_U + j, cs_U );
      }
    }                 
    break;
  }

  case FLA_DOUBLE_COMPLEX:
  {
    dcomplex* buff_U      = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( U );
    dcomplex* buff_D      = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( D );
    dcomplex* buff_L      = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( L );
    dcomplex* buff_minus1 = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
    dcomplex  L_tmp;
    dcomplex  D_tmp;
    dcomplex  d_inv_Ljj;
    dcomplex  Ljj;
    double    temp;

    for ( j = 0; j < m_U; ++j )
    {
      bli_zamax( m_D, 
                 buff_D + j*cs_D + 0*rs_D,
                 rs_D,
                 &ipiv );

      L_tmp = buff_L[ j*cs_L + j    ];
      D_tmp = buff_D[ j*cs_D + ipiv ];

      if ( dabs( L_tmp.real + L_tmp.imag ) < dabs( D_tmp.real + D_tmp.imag ) )
      {
        bli_zswap( m_U,
                   buff_L + 0*cs_L + j,    cs_L,
                   buff_D + 0*cs_D + ipiv, cs_D ); 

        buff_p[ j ] = ipiv + m_U - j;
      }        
      else
      {
        buff_p[ j ] = 0;
      }

      Ljj = buff_L[ j*cs_L + j ];

      // d_inv_Ljj = 1.0 / Ljj
      temp = 1.0  / ( Ljj.real * Ljj.real +
                      Ljj.imag * Ljj.imag );
      d_inv_Ljj.real = Ljj.real *  temp;
      d_inv_Ljj.imag = Ljj.imag * -temp;

      bli_zscal( m_D,
                 &d_inv_Ljj,
                 buff_D + j*cs_D + 0, rs_D ); 

      m_U_min_j_min_1 = m_U - j - 1;

      if ( m_U_min_j_min_1 > 0  )
      {
        bli_zger( BLIS_NO_CONJUGATE,
                  BLIS_NO_CONJUGATE,
                  m_D,
                  m_U_min_j_min_1,
                  buff_minus1, 
                  buff_D + (j+0)*cs_D + 0, rs_D,
                  buff_L + (j+1)*cs_L + j, cs_L,
                  buff_D + (j+1)*cs_D + 0, rs_D, cs_D );
      }

      m_U_min_j = m_U - j;

      if ( m_U_min_j > 0 ) 
      {
        bli_zcopy( m_U_min_j,
                   buff_L + j*cs_L + j, cs_L,
                   buff_U + j*cs_U + j, cs_U );
      }
    }                 
    break;
  }

  }

  return FLA_SUCCESS;
}
FLA_Error FLASH_FS_incpiv_aux1 ( FLA_Obj  A,
FLA_Obj  p,
FLA_Obj  L,
FLA_Obj  b,
dim_t  nb_alg 
)

References FLA_Apply_pivots(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Trsv_external(), and FLASH_FS_incpiv_aux2().

Referenced by FLASH_FS_incpiv().

{
   FLA_Obj ATL,   ATR,      A00, A01, A02,
           ABL,   ABR,      A10, A11, A12,
                            A20, A21, A22;

   FLA_Obj pTL,   pTR,      p00, p01, p02,
           pBL,   pBR,      p10, p11, p12,
                            p20, p21, p22;

   FLA_Obj LTL,   LTR,      L00, L01, L02,
           LBL,   LBR,      L10, L11, L12,
                            L20, L21, L22;

   FLA_Obj bT,              b0,
           bB,              b1,
                            b2;

   FLA_Obj p11_conf,
           p11_rest;

   FLA_Part_2x2( A,    &ATL, &ATR,
                       &ABL, &ABR,     0, 0, FLA_TL );

   FLA_Part_2x2( p,    &pTL, &pTR,
                       &pBL, &pBR,     0, 0, FLA_TL );

   FLA_Part_2x2( L,    &LTL, &LTR,
                       &LBL, &LBR,     0, 0, FLA_TL );

   FLA_Part_2x1( b,    &bT,
                       &bB,            0, FLA_TOP );

   while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
           FLA_Obj_width ( ATL ) < FLA_Obj_width ( A ) )
   {
      FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                          /* ************* */   /* ******************** */
                                                  &A10, /**/ &A11, &A12,
                             ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                             1, 1, FLA_BR );

      FLA_Repart_2x2_to_3x3( pTL, /**/ pTR,       &p00, /**/ &p01, &p02,
                          /* ************* */   /* ******************** */
                                                  &p10, /**/ &p11, &p12,
                             pBL, /**/ pBR,       &p20, /**/ &p21, &p22,
                             1, 1, FLA_BR );

      FLA_Repart_2x2_to_3x3( LTL, /**/ LTR,       &L00, /**/ &L01, &L02,
                          /* ************* */   /* ******************** */
                                                  &L10, /**/ &L11, &L12,
                             LBL, /**/ LBR,       &L20, /**/ &L21, &L22,
                             1, 1, FLA_BR );

      FLA_Repart_2x1_to_3x1( bT,                  &b0,
                          /* ** */              /* ** */
                                                  &b1,
                             bB,                  &b2,        1, FLA_BOTTOM );

      /*------------------------------------------------------------*/

      FLA_Part_2x1( *FLASH_OBJ_PTR_AT( p11 ),   &p11_conf,
                                                &p11_rest,
                    FLA_Obj_length( *FLASH_OBJ_PTR_AT( b1 ) ), FLA_TOP );


      FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE,
                        p11_conf,
                        *FLASH_OBJ_PTR_AT( b1 ) );

      FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
                         *FLASH_OBJ_PTR_AT( A11 ),
                         *FLASH_OBJ_PTR_AT( b1 ) );

      FLASH_FS_incpiv_aux2( L21,
                            A21, p21, b1,
                                      b2, nb_alg );

      /*------------------------------------------------------------*/

      FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                       A10, A11, /**/ A12,
                             /* ************** */  /* ****************** */
                                &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                                FLA_TL );

      FLA_Cont_with_3x3_to_2x2( &pTL, /**/ &pTR,       p00, p01, /**/ p02,
                                                       p10, p11, /**/ p12,
                             /* ************** */  /* ****************** */
                                &pBL, /**/ &pBR,       p20, p21, /**/ p22,
                                FLA_TL );

      FLA_Cont_with_3x3_to_2x2( &LTL, /**/ &LTR,       L00, L01, /**/ L02,
                                                       L10, L11, /**/ L12,
                             /* ************** */  /* ****************** */
                                &LBL, /**/ &LBR,       L20, L21, /**/ L22,
                                FLA_TL );

      FLA_Cont_with_3x1_to_2x1( &bT,                   b0,
                                                       b1,
                              /* ** */              /* ** */
                                &bB,                   b2,     FLA_TOP );
   }
   
   return FLA_SUCCESS;
}
FLA_Error FLASH_FS_incpiv_aux2 ( FLA_Obj  L,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  C,
FLA_Obj  E,
dim_t  nb_alg 
)

References FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_2x1(), FLA_Repart_2x1_to_3x1(), and FLA_SA_FS_blk().

Referenced by FLASH_FS_incpiv_aux1().

{
   FLA_Obj LT,              L0,
           LB,              L1,
                            L2;

   FLA_Obj DT,              D0,
           DB,              D1,
                            D2;

   FLA_Obj pT,              p0,
           pB,              p1,
                            p2;

   FLA_Obj ET,              E0,
           EB,              E1,
                            E2;

   FLA_Part_2x1( L,    &LT,
                       &LB,            0, FLA_TOP );

   FLA_Part_2x1( D,    &DT,
                       &DB,            0, FLA_TOP );

   FLA_Part_2x1( p,    &pT,
                       &pB,            0, FLA_TOP );

   FLA_Part_2x1( E,    &ET,
                       &EB,            0, FLA_TOP );

   while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) )
   {
      FLA_Repart_2x1_to_3x1( LT,                &L0,
                          /* ** */            /* ** */
                                                &L1,
                             LB,                &L2,        1, FLA_BOTTOM );

      FLA_Repart_2x1_to_3x1( DT,                &D0,
                          /* ** */            /* ** */
                                                &D1,
                             DB,                &D2,        1, FLA_BOTTOM );

      FLA_Repart_2x1_to_3x1( pT,                &p0,
                          /* ** */            /* ** */
                                                &p1,
                             pB,                &p2,        1, FLA_BOTTOM );

      FLA_Repart_2x1_to_3x1( ET,                &E0,
                          /* ** */            /* ** */
                                                &E1,
                             EB,                &E2,        1, FLA_BOTTOM );

      /*------------------------------------------------------------*/
      
      FLA_SA_FS_blk( *FLASH_OBJ_PTR_AT( L1 ),
                     *FLASH_OBJ_PTR_AT( D1 ),
                     *FLASH_OBJ_PTR_AT( p1 ),
                     *FLASH_OBJ_PTR_AT( C ),
                     *FLASH_OBJ_PTR_AT( E1 ),
                     nb_alg );
      
      /*------------------------------------------------------------*/

      FLA_Cont_with_3x1_to_2x1( &LT,                L0,
                                                    L1,
                              /* ** */           /* ** */
                                &LB,                L2,     FLA_TOP );

      FLA_Cont_with_3x1_to_2x1( &DT,                D0,
                                                    D1,
                              /* ** */           /* ** */
                                &DB,                D2,     FLA_TOP );

      FLA_Cont_with_3x1_to_2x1( &pT,                p0,
                                                    p1,
                              /* ** */           /* ** */
                                &pB,                p2,     FLA_TOP );

      FLA_Cont_with_3x1_to_2x1( &ET,                E0,
                                                    E1,
                              /* ** */           /* ** */
                                &EB,                E2,     FLA_TOP );
   }
   
   return FLA_SUCCESS;
}
FLA_Error FLASH_LU_incpiv_var1 ( FLA_Obj  A,
FLA_Obj  p,
FLA_Obj  L,
dim_t  nb_alg,
fla_lu_t cntl 
)

References FLA_Cont_with_3x3_to_2x2(), FLA_LU_piv_task(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLASH_Queue_get_enabled(), FLASH_SA_LU(), and FLASH_Trsm_piv().

Referenced by FLASH_LU_incpiv_noopt().

{
   FLA_Obj ATL,   ATR,      A00, A01, A02,
           ABL,   ABR,      A10, A11, A12,
                            A20, A21, A22;

   FLA_Obj pTL,   pTR,      p00, p01, p02,
           pBL,   pBR,      p10, p11, p12,
                            p20, p21, p22;

   FLA_Obj LTL,   LTR,      L00, L01, L02,
           LBL,   LBR,      L10, L11, L12,
                            L20, L21, L22;

   FLA_Part_2x2( A,    &ATL, &ATR,
                       &ABL, &ABR,     0, 0, FLA_TL );

   FLA_Part_2x2( p,    &pTL, &pTR,
                       &pBL, &pBR,     0, 0, FLA_TL );

   FLA_Part_2x2( L,    &LTL, &LTR,
                       &LBL, &LBR,     0, 0, FLA_TL );

   while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
           FLA_Obj_width ( ATL ) < FLA_Obj_width ( A ) )
   {
      FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                          /* ************* */   /* ******************** */
                                                  &A10, /**/ &A11, &A12,
                             ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                             1, 1, FLA_BR );

      FLA_Repart_2x2_to_3x3( pTL, /**/ pTR,       &p00, /**/ &p01, &p02,
                          /* ************* */   /* ******************** */
                                                  &p10, /**/ &p11, &p12,
                             pBL, /**/ pBR,       &p20, /**/ &p21, &p22,
                             1, 1, FLA_BR );

      FLA_Repart_2x2_to_3x3( LTL, /**/ LTR,       &L00, /**/ &L01, &L02,
                          /* ************* */   /* ******************** */
                                                  &L10, /**/ &L11, &L12,
                             LBL, /**/ LBR,       &L20, /**/ &L21, &L22,
                             1, 1, FLA_BR );

      /*------------------------------------------------------------*/

      if ( FLASH_Queue_get_enabled( ) )
      {
         // Enqueue
         ENQUEUE_FLASH_LU_piv( *FLASH_OBJ_PTR_AT( A11 ),
                               *FLASH_OBJ_PTR_AT( p11 ),
                               FLA_Cntl_sub_lu( cntl ) );
      }
      else
      {
         // Execute leaf
         FLA_LU_piv_task( *FLASH_OBJ_PTR_AT( A11 ), 
                          *FLASH_OBJ_PTR_AT( p11 ),
                          FLA_Cntl_sub_lu( cntl ) );
      }

      FLASH_Trsm_piv( A11, A12, p11, 
                      FLA_Cntl_sub_trsm1( cntl ) );

      FLASH_SA_LU( A11, A12, 
                   A21, A22, p21, L21, nb_alg, cntl );

      /*------------------------------------------------------------*/

      FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                       A10, A11, /**/ A12,
                             /* ************** */  /* ****************** */
                                &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                                FLA_TL );

      FLA_Cont_with_3x3_to_2x2( &pTL, /**/ &pTR,       p00, p01, /**/ p02,
                                                       p10, p11, /**/ p12,
                             /* ************** */  /* ****************** */
                                &pBL, /**/ &pBR,       p20, p21, /**/ p22,
                                FLA_TL );

      FLA_Cont_with_3x3_to_2x2( &LTL, /**/ &LTR,       L00, L01, /**/ L02,
                                                       L10, L11, /**/ L12,
                             /* ************** */  /* ****************** */
                                &LBL, /**/ &LBR,       L20, L21, /**/ L22,
                                FLA_TL );
   }
   
   return FLA_SUCCESS;
}
FLA_Error FLASH_LU_incpiv_var2 ( FLA_Obj  A,
FLA_Obj  p,
FLA_Obj  L,
FLA_Obj  U,
dim_t  nb_alg,
fla_lu_t cntl 
)

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_LU_piv_copy_task(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), FLASH_Queue_get_enabled(), FLASH_SA_LU(), and FLASH_Trsm_piv().

Referenced by FLASH_LU_incpiv_opt1().

{
   FLA_Obj ATL,   ATR,      A00, A01, A02,
           ABL,   ABR,      A10, A11, A12,
                            A20, A21, A22;

   FLA_Obj pTL,   pTR,      p00, p01, p02,
           pBL,   pBR,      p10, p11, p12,
                            p20, p21, p22;

   FLA_Obj LTL,   LTR,      L00, L01, L02,
           LBL,   LBR,      L10, L11, L12,
                            L20, L21, L22;

   FLA_Obj UL,    UR,       U0,  U1,  U2;

   FLA_Part_2x2( A,    &ATL, &ATR,
                       &ABL, &ABR,     0, 0, FLA_TL );

   FLA_Part_2x2( p,    &pTL, &pTR,
                       &pBL, &pBR,     0, 0, FLA_TL );

   FLA_Part_2x2( L,    &LTL, &LTR,
                       &LBL, &LBR,     0, 0, FLA_TL );

   FLA_Part_1x2( U,    &UL,  &UR,      0, FLA_LEFT );

   while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
           FLA_Obj_width ( ATL ) < FLA_Obj_width ( A ) )
   {
      FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                          /* ************* */   /* ******************** */
                                                  &A10, /**/ &A11, &A12,
                             ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                             1, 1, FLA_BR );

      FLA_Repart_2x2_to_3x3( pTL, /**/ pTR,       &p00, /**/ &p01, &p02,
                          /* ************* */   /* ******************** */
                                                  &p10, /**/ &p11, &p12,
                             pBL, /**/ pBR,       &p20, /**/ &p21, &p22,
                             1, 1, FLA_BR );

      FLA_Repart_2x2_to_3x3( LTL, /**/ LTR,       &L00, /**/ &L01, &L02,
                          /* ************* */   /* ******************** */
                                                  &L10, /**/ &L11, &L12,
                             LBL, /**/ LBR,       &L20, /**/ &L21, &L22,
                             1, 1, FLA_BR );

      FLA_Repart_1x2_to_1x3( UL,  /**/ UR,        &U0,  /**/ &U1,  &U2,
                             1, FLA_RIGHT );

      /*------------------------------------------------------------*/

      if ( FLASH_Queue_get_enabled( ) )
      {
         // Enqueue
         ENQUEUE_FLASH_LU_piv_copy( *FLASH_OBJ_PTR_AT( A11 ),
                                    *FLASH_OBJ_PTR_AT( p11 ),
                                    *FLASH_OBJ_PTR_AT( U1 ),
                                    FLA_Cntl_sub_lu( cntl ) );
      }
      else
      {
         // Execute leaf
         FLA_LU_piv_copy_task( *FLASH_OBJ_PTR_AT( A11 ), 
                               *FLASH_OBJ_PTR_AT( p11 ),
                               *FLASH_OBJ_PTR_AT( U1 ),
                               FLA_Cntl_sub_lu( cntl ) );
      }

      FLASH_Trsm_piv( U1, A12, p11,
                      FLA_Cntl_sub_trsm1( cntl ) );

      FLASH_SA_LU( A11, A12, 
                   A21, A22, p21, L21, nb_alg, cntl );

      /*------------------------------------------------------------*/

      FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                       A10, A11, /**/ A12,
                             /* ************** */  /* ****************** */
                                &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                                FLA_TL );

      FLA_Cont_with_3x3_to_2x2( &pTL, /**/ &pTR,       p00, p01, /**/ p02,
                                                       p10, p11, /**/ p12,
                             /* ************** */  /* ****************** */
                                &pBL, /**/ &pBR,       p20, p21, /**/ p22,
                                FLA_TL );

      FLA_Cont_with_3x3_to_2x2( &LTL, /**/ &LTR,       L00, L01, /**/ L02,
                                                       L10, L11, /**/ L12,
                             /* ************** */  /* ****************** */
                                &LBL, /**/ &LBR,       L20, L21, /**/ L22,
                                FLA_TL );

      FLA_Cont_with_1x3_to_1x2( &UL,  /**/ &UR,        U0,  U1,  /**/ U2,
                                FLA_LEFT );
   }
   
   return FLA_SUCCESS;
}
FLA_Error FLASH_SA_FS ( FLA_Obj  L,
FLA_Obj  D,
FLA_Obj  p,
FLA_Obj  C,
FLA_Obj  E,
dim_t  nb_alg,
fla_gemm_t cntl 
)

References FLA_Cont_with_1x3_to_1x2(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Repart_1x2_to_1x3(), FLA_SA_FS_task(), and FLASH_Queue_get_enabled().

Referenced by FLASH_SA_LU().

{
   FLA_Obj CL,    CR,       C0,  C1,  C2;

   FLA_Obj EL,    ER,       E0,  E1,  E2;

   FLA_Part_1x2( C,    &CL,  &CR,      0, FLA_LEFT );

   FLA_Part_1x2( E,    &EL,  &ER,      0, FLA_LEFT );

   while ( FLA_Obj_width( CL ) < FLA_Obj_width( C ) )
   {
      FLA_Repart_1x2_to_1x3( CL,  /**/ CR,        &C0, /**/ &C1, &C2,
                             1, FLA_RIGHT );

      FLA_Repart_1x2_to_1x3( EL,  /**/ ER,        &E0, /**/ &E1, &E2,
                             1, FLA_RIGHT );

      /*------------------------------------------------------------*/

      if ( FLASH_Queue_get_enabled( ) )
      {
         // Enqueue
         ENQUEUE_FLASH_SA_FS( *FLASH_OBJ_PTR_AT( L ),
                              *FLASH_OBJ_PTR_AT( D ),
                              *FLASH_OBJ_PTR_AT( p ),
                              *FLASH_OBJ_PTR_AT( C1 ),
                              *FLASH_OBJ_PTR_AT( E1 ),
                              nb_alg,
                              FLA_Cntl_sub_gemm( cntl ) );
      }
      else
      {
         // Execute leaf
         FLA_SA_FS_task( *FLASH_OBJ_PTR_AT( L ),
                         *FLASH_OBJ_PTR_AT( D ),
                         *FLASH_OBJ_PTR_AT( p ),
                         *FLASH_OBJ_PTR_AT( C1 ),
                         *FLASH_OBJ_PTR_AT( E1 ),
                         nb_alg,
                         FLA_Cntl_sub_gemm( cntl ) );
      }
      
      /*------------------------------------------------------------*/

      FLA_Cont_with_1x3_to_1x2( &CL,  /**/ &CR,        C0, C1, /**/ C2,
                                FLA_LEFT );

      FLA_Cont_with_1x3_to_1x2( &EL,  /**/ &ER,        E0, E1, /**/ E2,
                                FLA_LEFT );
   }
   
   return FLA_SUCCESS;
}
FLA_Error FLASH_SA_LU ( FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  D,
FLA_Obj  E,
FLA_Obj  p,
FLA_Obj  L,
dim_t  nb_alg,
fla_lu_t cntl 
)

References FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_2x1(), FLA_Repart_2x1_to_3x1(), FLA_SA_LU_task(), FLASH_Queue_get_enabled(), and FLASH_SA_FS().

Referenced by FLASH_LU_incpiv_var1(), and FLASH_LU_incpiv_var2().

{
   FLA_Obj DT,              D0,
           DB,              D1,
                            D2;

   FLA_Obj ET,              E0,
           EB,              E1,
                            E2;

   FLA_Obj pT,              p0,
           pB,              p1,
                            p2;

   FLA_Obj LT,              L0,
           LB,              L1,
                            L2;

   FLA_Part_2x1( D,    &DT,
                       &DB,            0, FLA_TOP );

   FLA_Part_2x1( E,    &ET,
                       &EB,            0, FLA_TOP );

   FLA_Part_2x1( p,    &pT,
                       &pB,            0, FLA_TOP );

   FLA_Part_2x1( L,    &LT,
                       &LB,            0, FLA_TOP );

   while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) )
   {
      FLA_Repart_2x1_to_3x1( DT,                &D0,
                          /* ** */            /* ** */
                                                &D1,
                             DB,                &D2,        1, FLA_BOTTOM );

      FLA_Repart_2x1_to_3x1( ET,                &E0,
                          /* ** */            /* ** */
                                                &E1,
                             EB,                &E2,        1, FLA_BOTTOM );

      FLA_Repart_2x1_to_3x1( pT,                &p0,
                          /* ** */            /* ** */
                                                &p1,
                             pB,                &p2,        1, FLA_BOTTOM );

      FLA_Repart_2x1_to_3x1( LT,                &L0,
                          /* ** */            /* ** */
                                                &L1,
                             LB,                &L2,        1, FLA_BOTTOM );

      /*------------------------------------------------------------*/

      if ( FLASH_Queue_get_enabled( ) )
      {
         // Enqueue
         ENQUEUE_FLASH_SA_LU( *FLASH_OBJ_PTR_AT( B ),
                              *FLASH_OBJ_PTR_AT( D1 ),
                              *FLASH_OBJ_PTR_AT( p1 ),
                              *FLASH_OBJ_PTR_AT( L1 ),
                              nb_alg,
                              FLA_Cntl_sub_lu( cntl ) );
      }
      else
      {
         // Execute leaf
         FLA_SA_LU_task( *FLASH_OBJ_PTR_AT( B ),
                         *FLASH_OBJ_PTR_AT( D1 ),
                         *FLASH_OBJ_PTR_AT( p1 ),
                         *FLASH_OBJ_PTR_AT( L1 ),
                         nb_alg,
                         FLA_Cntl_sub_lu( cntl ) );
      }
      
      FLASH_SA_FS( L1,
                   D1, p1, C,
                           E1, nb_alg, FLA_Cntl_sub_gemm1( cntl ) );

      /*------------------------------------------------------------*/

      FLA_Cont_with_3x1_to_2x1( &DT,                D0,
                                                    D1,
                              /* ** */           /* ** */
                                &DB,                D2,     FLA_TOP );

      FLA_Cont_with_3x1_to_2x1( &ET,                E0,
                                                    E1,
                              /* ** */           /* ** */
                                &EB,                E2,     FLA_TOP );

      FLA_Cont_with_3x1_to_2x1( &pT,                p0,
                                                    p1,
                              /* ** */           /* ** */
                                &pB,                p2,     FLA_TOP );

      FLA_Cont_with_3x1_to_2x1( &LT,                L0,
                                                    L1,
                              /* ** */           /* ** */
                                &LB,                L2,     FLA_TOP );
   }
   
   return FLA_SUCCESS;
}
FLA_Error FLASH_Trsm_piv ( FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  p,
fla_trsm_t cntl 
)

References FLA_Cont_with_1x3_to_1x2(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Repart_1x2_to_1x3(), FLA_Trsm_piv_task(), and FLASH_Queue_get_enabled().

Referenced by FLASH_LU_incpiv_var1(), and FLASH_LU_incpiv_var2().

{
   FLA_Obj BL,    BR,       B0,  B1,  B2;

   FLA_Part_1x2( B,    &BL,  &BR,      0, FLA_LEFT );

   while ( FLA_Obj_width( BL ) < FLA_Obj_width( B ) )
   {
      FLA_Repart_1x2_to_1x3( BL,  /**/ BR,        &B0, /**/ &B1, &B2,
                             1, FLA_RIGHT );

      /*------------------------------------------------------------*/

      if ( FLASH_Queue_get_enabled( ) )
      {
         // Enqueue
         ENQUEUE_FLASH_Trsm_piv( *FLASH_OBJ_PTR_AT( A ),
                                 *FLASH_OBJ_PTR_AT( B1 ),
                                 *FLASH_OBJ_PTR_AT( p ),
                                 FLA_Cntl_sub_trsm( cntl ) );
      }
      else
      {
         // Execute leaf
         FLA_Trsm_piv_task( *FLASH_OBJ_PTR_AT( A ),
                            *FLASH_OBJ_PTR_AT( B1 ),
                            *FLASH_OBJ_PTR_AT( p ),
                            FLA_Cntl_sub_trsm( cntl ) );
      }

      /*------------------------------------------------------------*/

      FLA_Cont_with_1x3_to_1x2( &BL,  /**/ &BR,        B0, B1, /**/ B2,
                                FLA_LEFT );
   }
   
   return FLA_SUCCESS;
}