libflame revision_anchor
Functions
FLA_Fused_Gerc2_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Fused_Gerc2_opt_var1 (FLA_Obj alpha, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj v, FLA_Obj A)
FLA_Error FLA_Fused_Gerc2_ops_var1 (int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Fused_Gerc2_opd_var1 (int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Fused_Gerc2_opc_var1 (int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Fused_Gerc2_opz_var1 (int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)

Function Documentation

FLA_Error FLA_Fused_Gerc2_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_alpha,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z,
scomplex buff_v,
int  inc_v,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

References F77_caxpy().

Referenced by FLA_Bidiag_UT_u_step_ofc_var2(), and FLA_Fused_Gerc2_opt_var1().

{
  int       i;

  for ( i = 0; i < n_A; ++i )
  {
    scomplex* a1       = buff_A + (i  )*cs_A + (0  )*rs_A;
    scomplex* u        = buff_u;
    scomplex* psi1     = buff_y + (i  )*inc_y;
    scomplex* z        = buff_z;
    scomplex* nu1      = buff_v + (i  )*inc_v;
    scomplex* alpha    = buff_alpha;
    scomplex  psi1_conj;
    scomplex  nu1_conj;
    scomplex  temp1;
    scomplex  temp2;

    /*------------------------------------------------------------*/

    bli_ccopyconj( psi1, &psi1_conj );
    bli_cmult3( alpha, &psi1_conj, &temp1 );

    bli_ccopyconj( nu1, &nu1_conj );
    bli_cmult3( alpha, &nu1_conj, &temp2 );

    // bli_caxpyv( BLIS_NO_CONJUGATE,
    //             m_A,
    //             &temp1,
    //             u,  inc_u,
    //             a1, rs_A );
    F77_caxpy( &m_A,
               &temp1,
               u,  &inc_u,
               a1, &rs_A );

    // bli_caxpyv( BLIS_NO_CONJUGATE,
    //             m_A,
    //             &temp2,
    //             z,  inc_z,
    //             a1, rs_A );
    F77_caxpy( &m_A,
               &temp2,
               z,  &inc_z,
               a1, &rs_A );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Gerc2_opd_var1 ( int  m_A,
int  n_A,
double *  buff_alpha,
double *  buff_u,
int  inc_u,
double *  buff_y,
int  inc_y,
double *  buff_z,
int  inc_z,
double *  buff_v,
int  inc_v,
double *  buff_A,
int  rs_A,
int  cs_A 
)

References F77_daxpy().

Referenced by FLA_Bidiag_UT_u_step_ofd_var2(), and FLA_Fused_Gerc2_opt_var1().

{
  int       i;

  for ( i = 0; i < n_A; ++i )
  {
    double*   a1       = buff_A + (i  )*cs_A + (0  )*rs_A;
    double*   u        = buff_u;
    double*   psi1     = buff_y + (i  )*inc_y;
    double*   z        = buff_z;
    double*   nu1      = buff_v + (i  )*inc_v;
    double*   alpha    = buff_alpha;
    double    temp1;
    double    temp2;

    /*------------------------------------------------------------*/

    // bli_dmult3( alpha, psi1, &temp1 );
    temp1 = *alpha * *psi1;

    // bli_dmult3( alpha, nu1, &temp2 );
    temp2 = *alpha * *nu1;

    // bli_daxpyv( BLIS_NO_CONJUGATE,
    //             m_A,
    //             &temp1,
    //             u,  inc_u,
    //             a1, rs_A );
    F77_daxpy( &m_A,
               &temp1,
               u,  &inc_u,
               a1, &rs_A );

    // bli_daxpyv( BLIS_NO_CONJUGATE,
    //             m_A,
    //             &temp2,
    //             z,  inc_z,
    //             a1, rs_A );
    F77_daxpy( &m_A,
               &temp2,
               z,  &inc_z,
               a1, &rs_A );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Gerc2_ops_var1 ( int  m_A,
int  n_A,
float *  buff_alpha,
float *  buff_u,
int  inc_u,
float *  buff_y,
int  inc_y,
float *  buff_z,
int  inc_z,
float *  buff_v,
int  inc_v,
float *  buff_A,
int  rs_A,
int  cs_A 
)

References F77_saxpy().

Referenced by FLA_Bidiag_UT_u_step_ofs_var2(), and FLA_Fused_Gerc2_opt_var1().

{
  int       i;

  for ( i = 0; i < n_A; ++i )
  {
    float*    a1       = buff_A + (i  )*cs_A + (0  )*rs_A;
    float*    u        = buff_u;
    float*    psi1     = buff_y + (i  )*inc_y;
    float*    z        = buff_z;
    float*    nu1      = buff_v + (i  )*inc_v;
    float*    alpha    = buff_alpha;
    float     temp1;
    float     temp2;

    /*------------------------------------------------------------*/

    // bli_smult3( alpha, psi1, &temp1 );
    temp1 = *alpha * *psi1;

    // bli_smult3( alpha, nu1, &temp2 );
    temp2 = *alpha * *nu1;

    // bli_saxpyv( BLIS_NO_CONJUGATE,
    //             m_A,
    //             &temp1,
    //             u,  inc_u,
    //             a1, rs_A );
    F77_saxpy( &m_A,
               &temp1,
               u,  &inc_u,
               a1, &rs_A );

    // bli_saxpyv( BLIS_NO_CONJUGATE,
    //             m_A,
    //             &temp2,
    //             z,  inc_z,
    //             a1, rs_A );
    F77_saxpy( &m_A,
               &temp2,
               z,  &inc_z,
               a1, &rs_A );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Gerc2_opt_var1 ( FLA_Obj  alpha,
FLA_Obj  u,
FLA_Obj  y,
FLA_Obj  z,
FLA_Obj  v,
FLA_Obj  A 
)

References FLA_Fused_Gerc2_opc_var1(), FLA_Fused_Gerc2_opd_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Fused_Gerc2_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

{
/*
   Effective computation:
   A = A + alpha * ( u * y' + z * v' );
*/
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          rs_A, cs_A;
  int          inc_u, inc_y, inc_z, inc_v;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_u    = FLA_Obj_vector_inc( u );
  inc_y    = FLA_Obj_vector_inc( y );
  inc_z    = FLA_Obj_vector_inc( z );
  inc_v    = FLA_Obj_vector_inc( v );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_A = FLA_FLOAT_PTR( A );
      float* buff_u = FLA_FLOAT_PTR( u );
      float* buff_y = FLA_FLOAT_PTR( y );
      float* buff_z = FLA_FLOAT_PTR( z );
      float* buff_v = FLA_FLOAT_PTR( v );
      float* buff_alpha = FLA_FLOAT_PTR( alpha );

      FLA_Fused_Gerc2_ops_var1( m_A,
                                n_A,
                                buff_alpha,
                                buff_u, inc_u,
                                buff_y, inc_y,
                                buff_z, inc_z,
                                buff_v, inc_v,
                                buff_A, rs_A, cs_A );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_A = FLA_DOUBLE_PTR( A );
      double* buff_u = FLA_DOUBLE_PTR( u );
      double* buff_y = FLA_DOUBLE_PTR( y );
      double* buff_z = FLA_DOUBLE_PTR( z );
      double* buff_v = FLA_DOUBLE_PTR( v );
      double* buff_alpha = FLA_DOUBLE_PTR( alpha );

      FLA_Fused_Gerc2_opd_var1( m_A,
                                n_A,
                                buff_alpha,
                                buff_u, inc_u,
                                buff_y, inc_y,
                                buff_z, inc_z,
                                buff_v, inc_v,
                                buff_A, rs_A, cs_A );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_u = FLA_COMPLEX_PTR( u );
      scomplex* buff_y = FLA_COMPLEX_PTR( y );
      scomplex* buff_z = FLA_COMPLEX_PTR( z );
      scomplex* buff_v = FLA_COMPLEX_PTR( v );
      scomplex* buff_alpha = FLA_COMPLEX_PTR( alpha );

      FLA_Fused_Gerc2_opc_var1( m_A,
                                n_A,
                                buff_alpha,
                                buff_u, inc_u,
                                buff_y, inc_y,
                                buff_z, inc_z,
                                buff_v, inc_v,
                                buff_A, rs_A, cs_A );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u );
      dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( y );
      dcomplex* buff_z = FLA_DOUBLE_COMPLEX_PTR( z );
      dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v );
      dcomplex* buff_alpha = FLA_DOUBLE_COMPLEX_PTR( alpha );

      FLA_Fused_Gerc2_opz_var1( m_A,
                                n_A,
                                buff_alpha,
                                buff_u, inc_u,
                                buff_y, inc_y,
                                buff_z, inc_z,
                                buff_v, inc_v,
                                buff_A, rs_A, cs_A );

      break;
    }
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_Gerc2_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_alpha,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z,
dcomplex buff_v,
int  inc_v,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

References F77_zaxpy().

Referenced by FLA_Bidiag_UT_u_step_ofz_var2(), and FLA_Fused_Gerc2_opt_var1().

{
  int       i;

  for ( i = 0; i < n_A; ++i )
  {
    dcomplex* a1       = buff_A + (i  )*cs_A + (0  )*rs_A;
    dcomplex* u        = buff_u;
    dcomplex* psi1     = buff_y + (i  )*inc_y;
    dcomplex* z        = buff_z;
    dcomplex* nu1      = buff_v + (i  )*inc_v;
    dcomplex* alpha    = buff_alpha;
    dcomplex  psi1_conj;
    dcomplex  nu1_conj;
    dcomplex  temp1;
    dcomplex  temp2;

    /*------------------------------------------------------------*/

    bli_zcopyconj( psi1, &psi1_conj );
    bli_zmult3( alpha, &psi1_conj, &temp1 );

    bli_zcopyconj( nu1, &nu1_conj );
    bli_zmult3( alpha, &nu1_conj, &temp2 );

    // bli_zaxpyv( BLIS_NO_CONJUGATE,
    //             m_A,
    //             &temp1,
    //             u,  inc_u,
    //             a1, rs_A );
    F77_zaxpy( &m_A,
               &temp1,
               u,  &inc_u,
               a1, &rs_A );

    // bli_zaxpyv( BLIS_NO_CONJUGATE,
    //             m_A,
    //             &temp2,
    //             z,  inc_z,
    //             a1, rs_A );
    F77_zaxpy( &m_A,
               &temp2,
               z,  &inc_z,
               a1, &rs_A );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}