libflame  revision_anchor
Functions
FLA_Apply_G_rf_opt_var2.c File Reference

(r)

Functions

FLA_Error FLA_Apply_G_rf_opt_var2 (FLA_Obj G, FLA_Obj A)
FLA_Error FLA_Apply_G_rf_ops_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Apply_G_rf_opd_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Apply_G_rf_opc_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Apply_G_rf_opz_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)

Function Documentation

FLA_Error FLA_Apply_G_rf_opc_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

References bli_s0(), bli_s1(), FLA_Apply_G_rf_opc_var1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Apply_G_rf_opt_var2().

{
    float     one  = bli_s1();
    float     zero = bli_s0();
    float     gamma;
    float     sigma;
    scomplex* a1;
    scomplex* a2;
    scomplex* g11;
    int       j, g, k;
    int       nG, nG_app;
    int       k_minus_1;

    k_minus_1 = k_G - 1;
    nG        = n_A - 1;

    // Use the simple variant for nG < 2(k - 1).
    if ( nG < k_minus_1 || k_G == 1 )
    {
        FLA_Apply_G_rf_opc_var1( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A );
        return FLA_SUCCESS;
    }


    // Start-up phase.

    for ( j = 0; j < k_minus_1; ++j )
    {
        nG_app = j + 1;

        for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
        {
            g11   = buff_G + (g    )*rs_G + (k  )*cs_G;
            a1    = buff_A + (g    )*cs_A;
            a2    = buff_A + (g + 1)*cs_A;

            gamma = g11->real;
            sigma = g11->imag;

            // Skip the current iteration if the rotation is identity.
            if ( gamma == one && sigma == zero ) continue;

            MAC_Apply_G_mx2_opc( m_A,
                                 &gamma,
                                 &sigma,
                                 a1, rs_A,
                                 a2, rs_A );
        }
    }

    // Pipeline stage

    for ( j = k_minus_1; j < nG; ++j )
    {
        nG_app = k_G;

        for ( k = 0, g = j; k < nG_app; ++k, --g )
        {
            g11   = buff_G + (g    )*rs_G + (k  )*cs_G;
            a1    = buff_A + (g    )*cs_A;
            a2    = buff_A + (g + 1)*cs_A;

            gamma = g11->real;
            sigma = g11->imag;

            // Skip the current iteration if the rotation is identity.
            if ( gamma == one && sigma == zero ) continue;

            MAC_Apply_G_mx2_opc( m_A,
                                 &gamma,
                                 &sigma,
                                 a1, rs_A,
                                 a2, rs_A );
        }
    }

    // Shutdown stage

    for ( j = nG - k_minus_1; j < nG; ++j )
    {
        nG_app = nG - j;

        for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
        {
            g11   = buff_G + (g    )*rs_G + (k  )*cs_G;
            a1    = buff_A + (g    )*cs_A;
            a2    = buff_A + (g + 1)*cs_A;

            gamma = g11->real;
            sigma = g11->imag;

            // Skip the current iteration if the rotation is identity.
            if ( gamma == one && sigma == zero ) continue;

            MAC_Apply_G_mx2_opc( m_A,
                                 &gamma,
                                 &sigma,
                                 a1, rs_A,
                                 a2, rs_A );
        }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_opd_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

References bli_d0(), bli_d1(), FLA_Apply_G_rf_opd_var1(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Apply_G_rf_opt_var2().

{
    double    one  = bli_d1();
    double    zero = bli_d0();
    double    gamma;
    double    sigma;
    double*   a1;
    double*   a2;
    dcomplex* g11;
    int       j, g, k;
    int       nG, nG_app;
    int       k_minus_1;

    k_minus_1 = k_G - 1;
    nG        = n_A - 1;

    // Use the simple variant for nG < 2(k - 1).
    if ( nG < k_minus_1 || k_G == 1 )
    {
        FLA_Apply_G_rf_opd_var1( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A );
        return FLA_SUCCESS;
    }


    // Start-up phase.

    for ( j = 0; j < k_minus_1; ++j )
    {
        nG_app = j + 1;

        for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
        {
            g11   = buff_G + (g    )*rs_G + (k  )*cs_G;
            a1    = buff_A + (g    )*cs_A;
            a2    = buff_A + (g + 1)*cs_A;

            gamma = g11->real;
            sigma = g11->imag;

            // Skip the current iteration if the rotation is identity.
            if ( gamma == one && sigma == zero ) continue;

            MAC_Apply_G_mx2_opd( m_A,
                                 &gamma,
                                 &sigma,
                                 a1, rs_A,
                                 a2, rs_A );
        }
    }

    // Pipeline stage

    for ( j = k_minus_1; j < nG; ++j )
    {
        nG_app = k_G;

        for ( k = 0, g = j; k < nG_app; ++k, --g )
        {
            g11   = buff_G + (g    )*rs_G + (k  )*cs_G;
            a1    = buff_A + (g    )*cs_A;
            a2    = buff_A + (g + 1)*cs_A;

            gamma = g11->real;
            sigma = g11->imag;

            // Skip the current iteration if the rotation is identity.
            if ( gamma == one && sigma == zero ) continue;

            MAC_Apply_G_mx2_opd( m_A,
                                 &gamma,
                                 &sigma,
                                 a1, rs_A,
                                 a2, rs_A );
        }
    }

    // Shutdown stage

    for ( j = nG - k_minus_1; j < nG; ++j )
    {
        nG_app = nG - j;

        for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
        {
            g11   = buff_G + (g    )*rs_G + (k  )*cs_G;
            a1    = buff_A + (g    )*cs_A;
            a2    = buff_A + (g + 1)*cs_A;

            gamma = g11->real;
            sigma = g11->imag;

            // Skip the current iteration if the rotation is identity.
            if ( gamma == one && sigma == zero ) continue;

            MAC_Apply_G_mx2_opd( m_A,
                                 &gamma,
                                 &sigma,
                                 a1, rs_A,
                                 a2, rs_A );
        }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_ops_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

References bli_s0(), bli_s1(), FLA_Apply_G_rf_ops_var1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Apply_G_rf_opt_var2().

{
    float     one  = bli_s1();
    float     zero = bli_s0();
    float     gamma;
    float     sigma;
    float*    a1;
    float*    a2;
    scomplex* g11;
    int       j, g, k;
    int       nG, nG_app;
    int       k_minus_1;

    k_minus_1 = k_G - 1;
    nG        = n_A - 1;

    // Use the simple variant for nG < 2(k - 1).
    if ( nG < k_minus_1 || k_G == 1 )
    {
        FLA_Apply_G_rf_ops_var1( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A );
        return FLA_SUCCESS;
    }


    // Start-up phase.

    for ( j = 0; j < k_minus_1; ++j )
    {
        nG_app = j + 1;

        for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
        {
            g11   = buff_G + (g    )*rs_G + (k  )*cs_G;
            a1    = buff_A + (g    )*cs_A;
            a2    = buff_A + (g + 1)*cs_A;

            gamma = g11->real;
            sigma = g11->imag;

            // Skip the current iteration if the rotation is identity.
            if ( gamma == one && sigma == zero ) continue;

            MAC_Apply_G_mx2_ops( m_A,
                                 &gamma,
                                 &sigma,
                                 a1, rs_A,
                                 a2, rs_A );
        }
    }

    // Pipeline stage

    for ( j = k_minus_1; j < nG; ++j )
    {
        nG_app = k_G;

        for ( k = 0, g = j; k < nG_app; ++k, --g )
        {
            g11   = buff_G + (g    )*rs_G + (k  )*cs_G;
            a1    = buff_A + (g    )*cs_A;
            a2    = buff_A + (g + 1)*cs_A;

            gamma = g11->real;
            sigma = g11->imag;

            // Skip the current iteration if the rotation is identity.
            if ( gamma == one && sigma == zero ) continue;

            MAC_Apply_G_mx2_ops( m_A,
                                 &gamma,
                                 &sigma,
                                 a1, rs_A,
                                 a2, rs_A );
        }
    }

    // Shutdown stage

    for ( j = nG - k_minus_1; j < nG; ++j )
    {
        nG_app = nG - j;

        for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
        {
            g11   = buff_G + (g    )*rs_G + (k  )*cs_G;
            a1    = buff_A + (g    )*cs_A;
            a2    = buff_A + (g + 1)*cs_A;

            gamma = g11->real;
            sigma = g11->imag;

            // Skip the current iteration if the rotation is identity.
            if ( gamma == one && sigma == zero ) continue;

            MAC_Apply_G_mx2_ops( m_A,
                                 &gamma,
                                 &sigma,
                                 a1, rs_A,
                                 a2, rs_A );
        }
    }

    return FLA_SUCCESS;
}

References FLA_Apply_G_rf_opc_var2(), FLA_Apply_G_rf_opd_var2(), FLA_Apply_G_rf_ops_var2(), FLA_Apply_G_rf_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

{
    FLA_Datatype datatype;
    int          k_G, m_A, n_A;
    int          rs_G, cs_G;
    int          rs_A, cs_A;

    datatype = FLA_Obj_datatype( A );

    k_G      = FLA_Obj_width( G );
    m_A      = FLA_Obj_length( A );
    n_A      = FLA_Obj_width( A );

    rs_G     = FLA_Obj_row_stride( G );
    cs_G     = FLA_Obj_col_stride( G );

    rs_A     = FLA_Obj_row_stride( A );
    cs_A     = FLA_Obj_col_stride( A );

    switch ( datatype )
    {
        case FLA_FLOAT:
        {
            scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
            float*    buff_A = ( float*    ) FLA_FLOAT_PTR( A );

            FLA_Apply_G_rf_ops_var2( k_G,
                                     m_A,
                                     n_A,
                                     buff_G, rs_G, cs_G,
                                     buff_A, rs_A, cs_A );

            break;
        }

        case FLA_DOUBLE:
        {
            dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
            double*   buff_A = ( double*   ) FLA_DOUBLE_PTR( A );

            FLA_Apply_G_rf_opd_var2( k_G,
                                     m_A,
                                     n_A,
                                     buff_G, rs_G, cs_G,
                                     buff_A, rs_A, cs_A );

            break;
        }

        case FLA_COMPLEX:
        {
            scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
            scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );

            FLA_Apply_G_rf_opc_var2( k_G,
                                     m_A,
                                     n_A,
                                     buff_G, rs_G, cs_G,
                                     buff_A, rs_A, cs_A );

            break;
        }

        case FLA_DOUBLE_COMPLEX:
        {
            dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
            dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );

            FLA_Apply_G_rf_opz_var2( k_G,
                                     m_A,
                                     n_A,
                                     buff_G, rs_G, cs_G,
                                     buff_A, rs_A, cs_A );

            break;
        }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_opz_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

References bli_d0(), bli_d1(), FLA_Apply_G_rf_opz_var1(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Apply_G_rf_opt_var2().

{
    double    one  = bli_d1();
    double    zero = bli_d0();
    double    gamma;
    double    sigma;
    dcomplex* a1;
    dcomplex* a2;
    dcomplex* g11;
    int       j, g, k;
    int       nG, nG_app;
    int       k_minus_1;

    k_minus_1 = k_G - 1;
    nG        = n_A - 1;

    // Use the simple variant for nG < 2(k - 1).
    if ( nG < k_minus_1 || k_G == 1 )
    {
        FLA_Apply_G_rf_opz_var1( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A );
        return FLA_SUCCESS;
    }


    // Start-up phase.

    for ( j = 0; j < k_minus_1; ++j )
    {
        nG_app = j + 1;

        for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
        {
            g11   = buff_G + (g    )*rs_G + (k  )*cs_G;
            a1    = buff_A + (g    )*cs_A;
            a2    = buff_A + (g + 1)*cs_A;

            gamma = g11->real;
            sigma = g11->imag;

            // Skip the current iteration if the rotation is identity.
            if ( gamma == one && sigma == zero ) continue;

            MAC_Apply_G_mx2_opz( m_A,
                                 &gamma,
                                 &sigma,
                                 a1, rs_A,
                                 a2, rs_A );
        }
    }

    // Pipeline stage

    for ( j = k_minus_1; j < nG; ++j )
    {
        nG_app = k_G;

        for ( k = 0, g = j; k < nG_app; ++k, --g )
        {
            g11   = buff_G + (g    )*rs_G + (k  )*cs_G;
            a1    = buff_A + (g    )*cs_A;
            a2    = buff_A + (g + 1)*cs_A;

            gamma = g11->real;
            sigma = g11->imag;

            // Skip the current iteration if the rotation is identity.
            if ( gamma == one && sigma == zero ) continue;

            MAC_Apply_G_mx2_opz( m_A,
                                 &gamma,
                                 &sigma,
                                 a1, rs_A,
                                 a2, rs_A );
        }
    }

    // Shutdown stage

    for ( j = nG - k_minus_1; j < nG; ++j )
    {
        nG_app = nG - j;

        for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
        {
            g11   = buff_G + (g    )*rs_G + (k  )*cs_G;
            a1    = buff_A + (g    )*cs_A;
            a2    = buff_A + (g + 1)*cs_A;

            gamma = g11->real;
            sigma = g11->imag;

            // Skip the current iteration if the rotation is identity.
            if ( gamma == one && sigma == zero ) continue;

            MAC_Apply_G_mx2_opz( m_A,
                                 &gamma,
                                 &sigma,
                                 a1, rs_A,
                                 a2, rs_A );
        }
    }

    return FLA_SUCCESS;
}