libflame revision_anchor
Functions
FLA_Fused_UZhu_ZUhu_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Fused_UZhu_ZUhu_opt_var1 (FLA_Obj delta, FLA_Obj U, FLA_Obj Z, FLA_Obj u, FLA_Obj w)
FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1 (int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_u, int inc_u, float *buff_w, int inc_w)
FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1 (int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_u, int inc_u, double *buff_w, int inc_w)
FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1 (int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_u, int inc_u, scomplex *buff_w, int inc_w)
FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1 (int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_u, int inc_u, dcomplex *buff_w, int inc_w)

Function Documentation

FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1 ( int  m_U,
int  n_U,
scomplex buff_delta,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_u,
int  inc_u,
scomplex buff_w,
int  inc_w 
)

References bli_cdot(), and F77_caxpy().

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofc_var3().

{
  int i;

  for ( i = 0; i < n_U; ++i )
  {
    scomplex* u1       = buff_U + (i  )*cs_U + (0  )*rs_U;
    scomplex* z1       = buff_Z + (i  )*cs_Z + (0  )*rs_Z;
    scomplex* delta    = buff_delta;
    scomplex* u        = buff_u;
    scomplex* w        = buff_w;
    scomplex  alpha;
    scomplex  beta;

    /*------------------------------------------------------------*/

    bli_cdot( BLIS_CONJUGATE,
              m_U,
              z1, rs_Z,
              u,  inc_u,
              &alpha );

    bli_cdot( BLIS_CONJUGATE,
              m_U,
              u1, rs_U,
              u,  inc_u,
              &beta );

    bli_cscals( delta, &alpha );
    bli_cscals( delta, &beta );

    // bli_caxpyv( BLIS_NO_CONJUGATE,
    //             m_U,
    //             &alpha,
    //             u1, rs_U,
    //             w,  inc_w );
    F77_caxpy( &m_U,
               &alpha,
               u1, &rs_U,
               w,  &inc_w );

    // bli_caxpyv( BLIS_NO_CONJUGATE,
    //             m_U,
    //             &beta,
    //             z1, rs_U,
    //             w,  inc_w );
    F77_caxpy( &m_U,
               &beta,
               z1, &rs_Z,
               w,  &inc_w );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1 ( int  m_U,
int  n_U,
double *  buff_delta,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_u,
int  inc_u,
double *  buff_w,
int  inc_w 
)

References F77_daxpy(), and F77_ddot().

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofd_var3().

{
  int i;

  for ( i = 0; i < n_U; ++i )
  {
    double*   u1       = buff_U + (i  )*cs_U + (0  )*rs_U;
    double*   z1       = buff_Z + (i  )*cs_Z + (0  )*rs_Z;
    double*   delta    = buff_delta;
    double*   u        = buff_u;
    double*   w        = buff_w;
    double    alpha;
    double    beta;

    /*------------------------------------------------------------*/

    // bli_ddot( BLIS_CONJUGATE,
    //           m_U,
    //           z1, rs_Z,
    //           u,  inc_u,
    //           &alpha );
    alpha = F77_ddot( &m_U,
                      z1, &rs_Z,
                      u,  &inc_u );

    // bli_ddot( BLIS_CONJUGATE,
    //           m_U,
    //           u1, rs_U,
    //           u,  inc_u,
    //           &beta );
    beta = F77_ddot( &m_U,
                     u1, &rs_U,
                     u,  &inc_u );

    // bli_dscals( delta, &alpha );
    // bli_dscals( delta, &beta );
    alpha *= *delta;
    beta  *= *delta;

    // bli_daxpyv( BLIS_NO_CONJUGATE,
    //             m_U,
    //             &alpha,
    //             u1, rs_U,
    //             w,  inc_w );
    F77_daxpy( &m_U,
               &alpha,
               u1, &rs_U,
               w,  &inc_w );

    // bli_daxpyv( BLIS_NO_CONJUGATE,
    //             m_U,
    //             &beta,
    //             z1, rs_U,
    //             w,  inc_w );
    F77_daxpy( &m_U,
               &beta,
               z1, &rs_Z,
               w,  &inc_w );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1 ( int  m_U,
int  n_U,
float *  buff_delta,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_u,
int  inc_u,
float *  buff_w,
int  inc_w 
)

References F77_saxpy(), and F77_sdot().

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofs_var3().

{
  int i;

  for ( i = 0; i < n_U; ++i )
  {
    float*    u1       = buff_U + (i  )*cs_U + (0  )*rs_U;
    float*    z1       = buff_Z + (i  )*cs_Z + (0  )*rs_Z;
    float*    delta    = buff_delta;
    float*    u        = buff_u;
    float*    w        = buff_w;
    float     alpha;
    float     beta;

    /*------------------------------------------------------------*/

    // bli_sdot( BLIS_CONJUGATE,
    //           m_U,
    //           z1, rs_Z,
    //           u,  inc_u,
    //           &alpha );
    alpha = F77_sdot( &m_U,
                      z1, &rs_Z,
                      u,  &inc_u );

    // bli_sdot( BLIS_CONJUGATE,
    //           m_U,
    //           u1, rs_U,
    //           u,  inc_u,
    //           &beta );
    beta = F77_sdot( &m_U,
                     u1, &rs_U,
                     u,  &inc_u );

    // bli_sscals( delta, &alpha );
    // bli_sscals( delta, &beta );
    alpha *= *delta;
    beta  *= *delta;

    // bli_saxpyv( BLIS_NO_CONJUGATE,
    //             m_U,
    //             &alpha,
    //             u1, rs_U,
    //             w,  inc_w );
    F77_saxpy( &m_U,
               &alpha,
               u1, &rs_U,
               w,  &inc_w );

    // bli_saxpyv( BLIS_NO_CONJUGATE,
    //             m_U,
    //             &beta,
    //             z1, rs_U,
    //             w,  inc_w );
    F77_saxpy( &m_U,
               &beta,
               z1, &rs_Z,
               w,  &inc_w );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_UZhu_ZUhu_opt_var1 ( FLA_Obj  delta,
FLA_Obj  U,
FLA_Obj  Z,
FLA_Obj  u,
FLA_Obj  w 
)

References FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Fused_UZhu_ZUhu_ops_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

{
/*
   Effective computation:
   w = w + delta * ( U ( Z' u  ) + Z ( U' a  ) );
*/
  FLA_Datatype datatype;
  int          m_U, n_U;
  int          rs_U, cs_U;
  int          rs_Z, cs_Z;
  int          inc_u, inc_w;

  datatype = FLA_Obj_datatype( U );

  m_U      = FLA_Obj_length( U );
  n_U      = FLA_Obj_width( U );

  rs_U     = FLA_Obj_row_stride( U );
  cs_U     = FLA_Obj_col_stride( U );

  rs_Z     = FLA_Obj_row_stride( Z );
  cs_Z     = FLA_Obj_col_stride( Z );

  inc_u    = FLA_Obj_vector_inc( u );
  
  inc_w    = FLA_Obj_vector_inc( w );
  

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float*    buff_U     = FLA_FLOAT_PTR( U );
      float*    buff_Z     = FLA_FLOAT_PTR( Z );
      float*    buff_u     = FLA_FLOAT_PTR( u );
      float*    buff_w     = FLA_FLOAT_PTR( w );
      float*    buff_delta = FLA_FLOAT_PTR( delta );

      FLA_Fused_UZhu_ZUhu_ops_var1( m_U,
                                    n_U,
                                    buff_delta,
                                    buff_U, rs_U, cs_U,
                                    buff_Z, rs_Z, cs_Z,
                                    buff_u, inc_u,
                                    buff_w, inc_w );

      break;
    }

    case FLA_DOUBLE:
    {
      double*   buff_U     = FLA_DOUBLE_PTR( U );
      double*   buff_Z     = FLA_DOUBLE_PTR( Z );
      double*   buff_u     = FLA_DOUBLE_PTR( u );
      double*   buff_w     = FLA_DOUBLE_PTR( w );
      double*   buff_delta = FLA_DOUBLE_PTR( delta );

      FLA_Fused_UZhu_ZUhu_opd_var1( m_U,
                                    n_U,
                                    buff_delta,
                                    buff_U, rs_U, cs_U,
                                    buff_Z, rs_Z, cs_Z,
                                    buff_u, inc_u,
                                    buff_w, inc_w );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_U     = FLA_COMPLEX_PTR( U );
      scomplex* buff_Z     = FLA_COMPLEX_PTR( Z );
      scomplex* buff_u     = FLA_COMPLEX_PTR( u );
      scomplex* buff_w     = FLA_COMPLEX_PTR( w );
      scomplex* buff_delta = FLA_COMPLEX_PTR( delta );

      FLA_Fused_UZhu_ZUhu_opc_var1( m_U,
                                    n_U,
                                    buff_delta,
                                    buff_U, rs_U, cs_U,
                                    buff_Z, rs_Z, cs_Z,
                                    buff_u, inc_u,
                                    buff_w, inc_w );

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_U     = FLA_DOUBLE_COMPLEX_PTR( U );
      dcomplex* buff_Z     = FLA_DOUBLE_COMPLEX_PTR( Z );
      dcomplex* buff_u     = FLA_DOUBLE_COMPLEX_PTR( u );
      dcomplex* buff_w     = FLA_DOUBLE_COMPLEX_PTR( w );
      dcomplex* buff_delta = FLA_DOUBLE_COMPLEX_PTR( delta );

      FLA_Fused_UZhu_ZUhu_opz_var1( m_U,
                                    n_U,
                                    buff_delta,
                                    buff_U, rs_U, cs_U,
                                    buff_Z, rs_Z, cs_Z,
                                    buff_u, inc_u,
                                    buff_w, inc_w );

      break;
    }
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1 ( int  m_U,
int  n_U,
dcomplex buff_delta,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_u,
int  inc_u,
dcomplex buff_w,
int  inc_w 
)

References bli_zdot(), and F77_zaxpy().

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofz_var3().

{
  int i;

  for ( i = 0; i < n_U; ++i )
  {
    dcomplex* u1       = buff_U + (i  )*cs_U + (0  )*rs_U;
    dcomplex* z1       = buff_Z + (i  )*cs_Z + (0  )*rs_Z;
    dcomplex* delta    = buff_delta;
    dcomplex* u        = buff_u;
    dcomplex* w        = buff_w;
    dcomplex  alpha;
    dcomplex  beta;

    /*------------------------------------------------------------*/

    bli_zdot( BLIS_CONJUGATE,
              m_U,
              z1, rs_Z,
              u,  inc_u,
              &alpha );

    bli_zdot( BLIS_CONJUGATE,
              m_U,
              u1, rs_U,
              u,  inc_u,
              &beta );

    bli_zscals( delta, &alpha );
    bli_zscals( delta, &beta );

    // bli_zaxpyv( BLIS_NO_CONJUGATE,
    //             m_U,
    //             &alpha,
    //             u1, rs_U,
    //             w,  inc_w );
    F77_zaxpy( &m_U,
               &alpha,
               u1, &rs_U,
               w,  &inc_w );

    // bli_zaxpyv( BLIS_NO_CONJUGATE,
    //             m_U,
    //             &beta,
    //             z1, rs_U,
    //             w,  inc_w );
    F77_zaxpy( &m_U,
               &beta,
               z1, &rs_Z,
               w,  &inc_w );

    /*------------------------------------------------------------*/

  }

  return FLA_SUCCESS;
}