libflame  revision_anchor
Functions
FLA_Apply_H2_UT_l.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Apply_H2_UT_l_unb_var1 (FLA_Obj tau, FLA_Obj u2, FLA_Obj a1t, FLA_Obj A2)
FLA_Error FLA_Apply_H2_UT_l_opt_var1 (FLA_Obj tau, FLA_Obj u2, FLA_Obj a1t, FLA_Obj A2)
FLA_Error FLA_Apply_H2_UT_l_ops_var1 (int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int rs_A2, int cs_A2)
FLA_Error FLA_Apply_H2_UT_l_opd_var1 (int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int rs_A2, int cs_A2)
FLA_Error FLA_Apply_H2_UT_l_opc_var1 (int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int rs_A2, int cs_A2)
FLA_Error FLA_Apply_H2_UT_l_opz_var1 (int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int rs_A2, int cs_A2)

Function Documentation

FLA_Error FLA_Apply_H2_UT_l_opc_var1 ( int  m_u2_A2,
int  n_a1t,
scomplex tau,
scomplex u2,
int  inc_u2,
scomplex a1t,
int  inc_a1t,
scomplex A2,
int  rs_A2,
int  cs_A2 
)

References bli_caxpyv(), bli_ccopyv(), bli_cgemv(), bli_cger(), bli_cinvscalv(), BLIS_CONJUGATE, BLIS_NO_CONJUGATE, BLIS_TRANSPOSE, FLA_free(), FLA_malloc(), FLA_MINUS_ONE, and FLA_ONE.

Referenced by FLA_Apply_H2_UT_l_opt_var1(), FLA_Bidiag_UT_u_step_opc_var1(), FLA_CAQR2_UT_opc_var1(), FLA_Hess_UT_step_opc_var1(), FLA_QR2_UT_opc_var1(), FLA_QR_UT_form_Q_opc_var1(), FLA_QR_UT_opc_var1(), and FLA_QR_UT_opc_var2().

{
  scomplex* one_p       = FLA_COMPLEX_PTR( FLA_ONE );
  scomplex* minus_one_p = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
  int       inc_w1t;

  // FLA_Obj w1t;
  scomplex* w1t;

  // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;
  if ( n_a1t == 0 ) return FLA_SUCCESS;

  // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t );
  w1t = ( scomplex* ) FLA_malloc( n_a1t * sizeof( *a1t ) );
  inc_w1t = 1;

  // // w1t = a1t;
  // FLA_Copy_external( a1t, w1t );
  bli_ccopyv( BLIS_NO_CONJUGATE,
              n_a1t,
              a1t, inc_a1t, 
              w1t, inc_w1t ); 

  // // w1t = w1t + u2' * A2;
  // // w1t = w1t + A2^T * conj(u2);
  // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t );
  bli_cgemv( BLIS_TRANSPOSE,
             BLIS_CONJUGATE,
             m_u2_A2,
             n_a1t,
             one_p,
             A2, rs_A2, cs_A2,
             u2, inc_u2,
             one_p,
             w1t, inc_w1t );

  // // w1t = w1t / tau;
  // FLA_Inv_scalc_external( FLA_NO_CONJUGATE, tau, w1t );
  bli_cinvscalv( BLIS_NO_CONJUGATE,
                 n_a1t,
                 tau,
                 w1t, inc_w1t );

  // // a1t = - w1t + a1t;
  // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t );
  bli_caxpyv( BLIS_NO_CONJUGATE,
              n_a1t,
              minus_one_p,
              w1t, inc_w1t,
              a1t, inc_a1t );

  // // A2 = - u2 * w1t + A2;
  // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 );
  bli_cger( BLIS_NO_CONJUGATE,
            BLIS_NO_CONJUGATE,
            m_u2_A2,
            n_a1t,
            minus_one_p,
            u2, inc_u2,
            w1t, inc_w1t,
            A2, rs_A2, cs_A2 );

  // FLA_Obj_free( &w1t );
  FLA_free( w1t );

  return FLA_SUCCESS;
}
FLA_Error FLA_Apply_H2_UT_l_opd_var1 ( int  m_u2_A2,
int  n_a1t,
double *  tau,
double *  u2,
int  inc_u2,
double *  a1t,
int  inc_a1t,
double *  A2,
int  rs_A2,
int  cs_A2 
)

References bli_daxpyv(), bli_dcopyv(), bli_dgemv(), bli_dger(), bli_dinvscalv(), BLIS_CONJUGATE, BLIS_NO_CONJUGATE, BLIS_TRANSPOSE, FLA_free(), FLA_malloc(), FLA_MINUS_ONE, and FLA_ONE.

Referenced by FLA_Apply_H2_UT_l_opt_var1(), FLA_Bidiag_UT_u_step_opd_var1(), FLA_CAQR2_UT_opd_var1(), FLA_Hess_UT_step_opd_var1(), FLA_QR2_UT_opd_var1(), FLA_QR_UT_form_Q_opd_var1(), FLA_QR_UT_opd_var1(), and FLA_QR_UT_opd_var2().

{
  double*   one_p       = FLA_DOUBLE_PTR( FLA_ONE );
  double*   minus_one_p = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
  int       inc_w1t;

  // FLA_Obj w1t;
  double*   w1t;

  // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;
  if ( n_a1t == 0 ) return FLA_SUCCESS;

  // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t );
  w1t = ( double* ) FLA_malloc( n_a1t * sizeof( *a1t ) );
  inc_w1t = 1;

  // // w1t = a1t;
  // FLA_Copy_external( a1t, w1t );
  bli_dcopyv( BLIS_NO_CONJUGATE,
              n_a1t,
              a1t, inc_a1t, 
              w1t, inc_w1t ); 

  // // w1t = w1t + u2' * A2;
  // // w1t = w1t + A2^T * conj(u2);
  // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t );
  bli_dgemv( BLIS_TRANSPOSE,
             BLIS_CONJUGATE,
             m_u2_A2,
             n_a1t,
             one_p,
             A2, rs_A2, cs_A2,
             u2, inc_u2,
             one_p,
             w1t, inc_w1t );

  // // w1t = w1t / tau;
  // FLA_Inv_scalc_external( FLA_NO_CONJUGATE, tau, w1t );
  bli_dinvscalv( BLIS_NO_CONJUGATE,
                 n_a1t,
                 tau,
                 w1t, inc_w1t );

  // // a1t = - w1t + a1t;
  // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t );
  bli_daxpyv( BLIS_NO_CONJUGATE,
              n_a1t,
              minus_one_p,
              w1t, inc_w1t,
              a1t, inc_a1t );

  // // A2 = - u2 * w1t + A2;
  // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 );
  bli_dger( BLIS_NO_CONJUGATE,
            BLIS_NO_CONJUGATE,
            m_u2_A2,
            n_a1t,
            minus_one_p,
            u2, inc_u2,
            w1t, inc_w1t,
            A2, rs_A2, cs_A2 );

  // FLA_Obj_free( &w1t );
  FLA_free( w1t );

  return FLA_SUCCESS;
}
FLA_Error FLA_Apply_H2_UT_l_ops_var1 ( int  m_u2_A2,
int  n_a1t,
float *  tau,
float *  u2,
int  inc_u2,
float *  a1t,
int  inc_a1t,
float *  A2,
int  rs_A2,
int  cs_A2 
)

References bli_saxpyv(), bli_scopyv(), bli_sgemv(), bli_sger(), bli_sinvscalv(), BLIS_CONJUGATE, BLIS_NO_CONJUGATE, BLIS_TRANSPOSE, FLA_free(), FLA_malloc(), FLA_MINUS_ONE, and FLA_ONE.

Referenced by FLA_Apply_H2_UT_l_opt_var1(), FLA_Bidiag_UT_u_step_ops_var1(), FLA_CAQR2_UT_ops_var1(), FLA_Hess_UT_step_ops_var1(), FLA_QR2_UT_ops_var1(), FLA_QR_UT_form_Q_ops_var1(), FLA_QR_UT_ops_var1(), and FLA_QR_UT_ops_var2().

{
  float*    one_p       = FLA_FLOAT_PTR( FLA_ONE );
  float*    minus_one_p = FLA_FLOAT_PTR( FLA_MINUS_ONE );
  int       inc_w1t;

  // FLA_Obj w1t;
  float*    w1t;

  // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;
  if ( n_a1t == 0 ) return FLA_SUCCESS;

  // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t );
  w1t = ( float* ) FLA_malloc( n_a1t * sizeof( *a1t ) );
  inc_w1t = 1;

  // // w1t = a1t;
  // FLA_Copy_external( a1t, w1t );
  bli_scopyv( BLIS_NO_CONJUGATE,
              n_a1t,
              a1t, inc_a1t, 
              w1t, inc_w1t ); 

  // // w1t = w1t + u2' * A2;
  // // w1t = w1t + A2^T * conj(u2);
  // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t );
  bli_sgemv( BLIS_TRANSPOSE,
             BLIS_CONJUGATE,
             m_u2_A2,
             n_a1t,
             one_p,
             A2, rs_A2, cs_A2,
             u2, inc_u2,
             one_p,
             w1t, inc_w1t );

  // // w1t = w1t / tau;
  // FLA_Inv_scalc_external( FLA_NO_CONJUGATE, tau, w1t );
  bli_sinvscalv( BLIS_NO_CONJUGATE,
                 n_a1t,
                 tau,
                 w1t, inc_w1t );

  // // a1t = - w1t + a1t;
  // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t );
  bli_saxpyv( BLIS_NO_CONJUGATE,
              n_a1t,
              minus_one_p,
              w1t, inc_w1t,
              a1t, inc_a1t );

  // // A2 = - u2 * w1t + A2;
  // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 );
  bli_sger( BLIS_NO_CONJUGATE,
            BLIS_NO_CONJUGATE,
            m_u2_A2,
            n_a1t,
            minus_one_p,
            u2, inc_u2,
            w1t, inc_w1t,
            A2, rs_A2, cs_A2 );

  // FLA_Obj_free( &w1t );
  FLA_free( w1t );

  return FLA_SUCCESS;
}

References FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_l_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

Referenced by FLA_Apply_H2_UT_internal().

{
  FLA_Datatype datatype;
  int          m_u2_A2;
  int          n_a1t;
  int          inc_u2;
  int          inc_a1t;
  int          rs_A2;
  int          cs_A2;

  if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;

  datatype = FLA_Obj_datatype( A2 );

  m_u2_A2  = FLA_Obj_length( A2 );
  n_a1t    = FLA_Obj_width( a1t );
  inc_u2   = FLA_Obj_vector_inc( u2 );
  inc_a1t  = FLA_Obj_vector_inc( a1t );
  rs_A2    = FLA_Obj_row_stride( A2 );
  cs_A2    = FLA_Obj_col_stride( A2 );

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* tau_p = ( float* ) FLA_FLOAT_PTR( tau );
      float* u2_p  = ( float* ) FLA_FLOAT_PTR( u2 );
      float* a1t_p = ( float* ) FLA_FLOAT_PTR( a1t );
      float* A2_p  = ( float* ) FLA_FLOAT_PTR( A2 );

      FLA_Apply_H2_UT_l_ops_var1( m_u2_A2, n_a1t,
                                  tau_p,
                                  u2_p, inc_u2,
                                  a1t_p, inc_a1t,
                                  A2_p, rs_A2, cs_A2 );
      break;
    }

    case FLA_DOUBLE:
    {
      double* tau_p = ( double* ) FLA_DOUBLE_PTR( tau );
      double* u2_p  = ( double* ) FLA_DOUBLE_PTR( u2 );
      double* a1t_p = ( double* ) FLA_DOUBLE_PTR( a1t );
      double* A2_p  = ( double* ) FLA_DOUBLE_PTR( A2 );

      FLA_Apply_H2_UT_l_opd_var1( m_u2_A2, n_a1t,
                                  tau_p,
                                  u2_p, inc_u2,
                                  a1t_p, inc_a1t,
                                  A2_p, rs_A2, cs_A2 );
      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* tau_p = ( scomplex* ) FLA_COMPLEX_PTR( tau );
      scomplex* u2_p  = ( scomplex* ) FLA_COMPLEX_PTR( u2 );
      scomplex* a1t_p = ( scomplex* ) FLA_COMPLEX_PTR( a1t );
      scomplex* A2_p  = ( scomplex* ) FLA_COMPLEX_PTR( A2 );

      FLA_Apply_H2_UT_l_opc_var1( m_u2_A2, n_a1t,
                                  tau_p,
                                  u2_p, inc_u2,
                                  a1t_p, inc_a1t,
                                  A2_p, rs_A2, cs_A2 );
      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* tau_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( tau );
      dcomplex* u2_p  = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( u2 );
      dcomplex* a1t_p = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( a1t );
      dcomplex* A2_p  = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A2 );

      FLA_Apply_H2_UT_l_opz_var1( m_u2_A2, n_a1t,
                                  tau_p,
                                  u2_p, inc_u2,
                                  a1t_p, inc_a1t,
                                  A2_p, rs_A2, cs_A2 );
      break;
    }
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Apply_H2_UT_l_opz_var1 ( int  m_u2_A2,
int  n_a1t,
dcomplex tau,
dcomplex u2,
int  inc_u2,
dcomplex a1t,
int  inc_a1t,
dcomplex A2,
int  rs_A2,
int  cs_A2 
)

References bli_zaxpyv(), bli_zcopyv(), bli_zgemv(), bli_zger(), bli_zinvscalv(), BLIS_CONJUGATE, BLIS_NO_CONJUGATE, BLIS_TRANSPOSE, FLA_free(), FLA_malloc(), FLA_MINUS_ONE, and FLA_ONE.

Referenced by FLA_Apply_H2_UT_l_opt_var1(), FLA_Bidiag_UT_u_step_opz_var1(), FLA_CAQR2_UT_opz_var1(), FLA_Hess_UT_step_opz_var1(), FLA_QR2_UT_opz_var1(), FLA_QR_UT_form_Q_opz_var1(), FLA_QR_UT_opz_var1(), and FLA_QR_UT_opz_var2().

{
  dcomplex* one_p       = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
  dcomplex* minus_one_p = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
  int       inc_w1t;

  // FLA_Obj w1t;
  dcomplex* w1t;

  // if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;
  if ( n_a1t == 0 ) return FLA_SUCCESS;

  // FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t );
  w1t = ( dcomplex* ) FLA_malloc( n_a1t * sizeof( *a1t ) );
  inc_w1t = 1;

  // // w1t = a1t;
  // FLA_Copy_external( a1t, w1t );
  bli_zcopyv( BLIS_NO_CONJUGATE,
              n_a1t,
              a1t, inc_a1t, 
              w1t, inc_w1t ); 

  // // w1t = w1t + u2' * A2;
  // // w1t = w1t + A2^T * conj(u2);
  // FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t );
  bli_zgemv( BLIS_TRANSPOSE,
             BLIS_CONJUGATE,
             m_u2_A2,
             n_a1t,
             one_p,
             A2, rs_A2, cs_A2,
             u2, inc_u2,
             one_p,
             w1t, inc_w1t );

  // // w1t = w1t / tau;
  // FLA_Inv_scalc_external( FLA_NO_CONJUGATE, tau, w1t );
  bli_zinvscalv( BLIS_NO_CONJUGATE,
                 n_a1t,
                 tau,
                 w1t, inc_w1t );

  // // a1t = - w1t + a1t;
  // FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t );
  bli_zaxpyv( BLIS_NO_CONJUGATE,
              n_a1t,
              minus_one_p,
              w1t, inc_w1t,
              a1t, inc_a1t );

  // // A2 = - u2 * w1t + A2;
  // FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 );
  bli_zger( BLIS_NO_CONJUGATE,
            BLIS_NO_CONJUGATE,
            m_u2_A2,
            n_a1t,
            minus_one_p,
            u2, inc_u2,
            w1t, inc_w1t,
            A2, rs_A2, cs_A2 );

  // FLA_Obj_free( &w1t );
  FLA_free( w1t );

  return FLA_SUCCESS;
}

References FLA_Axpy_external(), FLA_Copy_external(), FLA_Gemvc_external(), FLA_Ger_external(), FLA_Inv_scalc_external(), FLA_MINUS_ONE, FLA_Obj_create_conf_to(), FLA_Obj_free(), FLA_Obj_has_zero_dim(), and FLA_ONE.

{
  FLA_Obj w1t;

  if ( FLA_Obj_has_zero_dim( a1t ) ) return FLA_SUCCESS;

  // w1t = a1t;
  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, a1t, &w1t );
  FLA_Copy_external( a1t, w1t );

  // w1t = w1t + u2' * A2;
  // w1t = w1t + A2^T * conj(u2);
  FLA_Gemvc_external( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A2, u2, FLA_ONE, w1t );

  // w1t = w1t / tau;
  FLA_Inv_scalc_external( FLA_NO_CONJUGATE, tau, w1t );

  // a1t = a1t - w1t;
  FLA_Axpy_external( FLA_MINUS_ONE, w1t, a1t );

  // A2 = A2 - u2 * w1t;
  FLA_Ger_external( FLA_MINUS_ONE, u2, w1t, A2 );

  FLA_Obj_free( &w1t );

  return FLA_SUCCESS;
}