libflame  revision_anchor
Functions
FLA_Apply_G_rf_asm_var3.c File Reference

(r)

Functions

FLA_Error FLA_Apply_G_rf_asm_var3 (FLA_Obj G, FLA_Obj A)
FLA_Error FLA_Apply_G_rf_ass_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Apply_G_rf_asd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Apply_G_rf_asc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Apply_G_rf_asz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)

Function Documentation

FLA_Error FLA_Apply_G_rf_asc_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

References bli_s0(), bli_s1(), FLA_Apply_G_rf_asc_var1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Apply_G_rf_asm_var3(), and FLA_Apply_G_rf_blc_var3().

{
    float     one  = bli_s1();
    float     zero = bli_s0();
    float     gamma23_k1;
    float     sigma23_k1;
    float     gamma34_k1;
    float     sigma34_k1;
    float     gamma12_k2;
    float     sigma12_k2;
    float     gamma23_k2;
    float     sigma23_k2;
    scomplex* a1;
    scomplex* a2;
    scomplex* a3;
    scomplex* a4;
    scomplex* g23_k1;
    scomplex* g34_k1;
    scomplex* g12_k2;
    scomplex* g23_k2;
    int       i, j, g, k;
    int       nG, nG_app;
    int       n_iter;
    int       n_left;
    int       k_minus_1;
    int       n_fuse;
    int       k_fuse;
    int       is_ident23_k1, is_ident34_k1;
    int       is_ident12_k2, is_ident23_k2;
    int       has_ident;

    k_minus_1 = k_G - 1;
    nG        = n_A - 1;
    n_fuse    = 2;
    k_fuse    = 2;

    // Use the simple variant for nG < (k - 1) or k == 1.
    if ( nG < 2*k_minus_1 || k_G == 1 )
    {
        FLA_Apply_G_rf_asc_var1( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A );
        return FLA_SUCCESS;
    }


    // Start-up phase.

    for ( j = -1; j < k_minus_1; j += n_fuse )
    {
        nG_app = j + 2;
        n_iter = nG_app / k_fuse;
        //n_iter = nG_app % k_fuse;
        n_left = 1;

        for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
            g23_k2 = buff_G + (g    )*rs_G + (k + 1)*cs_G;
            a1     = buff_A + (g - 1)*cs_A;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;
            gamma12_k2 = g12_k2->real;
            sigma12_k2 = g12_k2->imag;
            gamma23_k2 = g23_k2->real;
            sigma23_k2 = g23_k2->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
            is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
            is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
            has_ident     = ( is_ident23_k1 || is_ident34_k1 ||
                              is_ident12_k2 || is_ident23_k2 );

            if      ( has_ident )
            {
                // Apply to pairs of columns as needed.

                if ( !is_ident23_k1 )
                    MAC_Apply_G_mx2_asc( m_A,
                                         &gamma23_k1,
                                         &sigma23_k1,
                                         a2, 1,
                                         a3, 1 );

                if ( !is_ident34_k1 )
                    MAC_Apply_G_mx2_asc( m_A,
                                         &gamma34_k1,
                                         &sigma34_k1,
                                         a3, 1,
                                         a4, 1 );

                if ( !is_ident12_k2 )
                    MAC_Apply_G_mx2_asc( m_A,
                                         &gamma12_k2,
                                         &sigma12_k2,
                                         a1, 1,
                                         a2, 1 );

                if ( !is_ident23_k2 )
                    MAC_Apply_G_mx2_asc( m_A,
                                         &gamma23_k2,
                                         &sigma23_k2,
                                         a2, 1,
                                         a3, 1 );
            }
            else
            {
                // Apply to all four columns.

                MAC_Apply_G_mx4s_asc( m_A,
                                      &gamma23_k1,
                                      &sigma23_k1,
                                      &gamma34_k1,
                                      &sigma34_k1,
                                      &gamma12_k2,
                                      &sigma12_k2,
                                      &gamma23_k2,
                                      &sigma23_k2,
                                      a1, 1,
                                      a2, 1,
                                      a3, 1,
                                      a4, 1 );
            }
        }

        if ( n_left == 1 )
        {
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;

            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );

            if ( !is_ident34_k1 )
                MAC_Apply_G_mx2_asc( m_A,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a3, 1,
                                     a4, 1 );
        }
    }

    // Pipeline stage

    for ( ; j < nG - 1; j += n_fuse )
    {
        nG_app = k_G;
        n_iter = nG_app / k_fuse;
        n_left = nG_app % k_fuse;

        for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
            g23_k2 = buff_G + (g    )*rs_G + (k + 1)*cs_G;
            a1     = buff_A + (g - 1)*cs_A;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;
            gamma12_k2 = g12_k2->real;
            sigma12_k2 = g12_k2->imag;
            gamma23_k2 = g23_k2->real;
            sigma23_k2 = g23_k2->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
            is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
            is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
            has_ident     = ( is_ident23_k1 || is_ident34_k1 ||
                              is_ident12_k2 || is_ident23_k2 );

            if      ( has_ident )
            {
                // Apply to pairs of columns as needed.

                if ( !is_ident23_k1 )
                    MAC_Apply_G_mx2_asc( m_A,
                                         &gamma23_k1,
                                         &sigma23_k1,
                                         a2, 1,
                                         a3, 1 );

                if ( !is_ident34_k1 )
                    MAC_Apply_G_mx2_asc( m_A,
                                         &gamma34_k1,
                                         &sigma34_k1,
                                         a3, 1,
                                         a4, 1 );

                if ( !is_ident12_k2 )
                    MAC_Apply_G_mx2_asc( m_A,
                                         &gamma12_k2,
                                         &sigma12_k2,
                                         a1, 1,
                                         a2, 1 );

                if ( !is_ident23_k2 )
                    MAC_Apply_G_mx2_asc( m_A,
                                         &gamma23_k2,
                                         &sigma23_k2,
                                         a2, 1,
                                         a3, 1 );
            }
            else
            {
                // Apply to all four columns.

                MAC_Apply_G_mx4s_asc( m_A,
                                      &gamma23_k1,
                                      &sigma23_k1,
                                      &gamma34_k1,
                                      &sigma34_k1,
                                      &gamma12_k2,
                                      &sigma12_k2,
                                      &gamma23_k2,
                                      &sigma23_k2,
                                      a1, 1,
                                      a2, 1,
                                      a3, 1,
                                      a4, 1 );
            }
        }

        if ( n_left == 1 )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );

            if ( !is_ident23_k1 && is_ident34_k1 )
            {
                MAC_Apply_G_mx2_asc( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     a2, 1,
                                     a3, 1 );
            }
            else if ( is_ident23_k1 && !is_ident34_k1 )
            {
                MAC_Apply_G_mx2_asc( m_A,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a3, 1,
                                     a4, 1 );
            }
            else
            {
                MAC_Apply_G_mx3_asc( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a2, 1,
                                     a3, 1,
                                     a4, 1 );
            }
        }
    }

    // Shutdown stage

    for ( j = nG % n_fuse; j < k_G; j += n_fuse )
    {
        g = nG - 1;
        k = j;

        //n_left = 1;
        //if ( n_left == 1 )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );

            if ( !is_ident23_k1 )
                MAC_Apply_G_mx2_asc( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     a2, 1,
                                     a3, 1 );
            ++k;
            --g;
        }

        nG_app = k_minus_1 - j;
        n_iter = nG_app / k_fuse;
        n_left = nG_app % k_fuse;

        for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
            g23_k2 = buff_G + (g    )*rs_G + (k + 1)*cs_G;
            a1     = buff_A + (g - 1)*cs_A;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;
            gamma12_k2 = g12_k2->real;
            sigma12_k2 = g12_k2->imag;
            gamma23_k2 = g23_k2->real;
            sigma23_k2 = g23_k2->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
            is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
            is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
            has_ident     = ( is_ident23_k1 || is_ident34_k1 ||
                              is_ident12_k2 || is_ident23_k2 );

            if      ( has_ident )
            {
                // Apply to pairs of columns as needed.

                if ( !is_ident23_k1 )
                    MAC_Apply_G_mx2_asc( m_A,
                                         &gamma23_k1,
                                         &sigma23_k1,
                                         a2, 1,
                                         a3, 1 );

                if ( !is_ident34_k1 )
                    MAC_Apply_G_mx2_asc( m_A,
                                         &gamma34_k1,
                                         &sigma34_k1,
                                         a3, 1,
                                         a4, 1 );

                if ( !is_ident12_k2 )
                    MAC_Apply_G_mx2_asc( m_A,
                                         &gamma12_k2,
                                         &sigma12_k2,
                                         a1, 1,
                                         a2, 1 );

                if ( !is_ident23_k2 )
                    MAC_Apply_G_mx2_asc( m_A,
                                         &gamma23_k2,
                                         &sigma23_k2,
                                         a2, 1,
                                         a3, 1 );
            }
            else
            {
                // Apply to all four columns.

                MAC_Apply_G_mx4s_asc( m_A,
                                      &gamma23_k1,
                                      &sigma23_k1,
                                      &gamma34_k1,
                                      &sigma34_k1,
                                      &gamma12_k2,
                                      &sigma12_k2,
                                      &gamma23_k2,
                                      &sigma23_k2,
                                      a1, 1,
                                      a2, 1,
                                      a3, 1,
                                      a4, 1 );
            }
        }

        if ( n_left == 1 )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );

            if ( !is_ident23_k1 && is_ident34_k1 )
            {
                MAC_Apply_G_mx2_asc( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     a2, 1,
                                     a3, 1 );
            }
            else if ( is_ident23_k1 && !is_ident34_k1 )
            {
                MAC_Apply_G_mx2_asc( m_A,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a3, 1,
                                     a4, 1 );
            }
            else
            {
                MAC_Apply_G_mx3_asc( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a2, 1,
                                     a3, 1,
                                     a4, 1 );
            }
        }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_asd_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var3(), and FLA_Apply_G_rf_bld_var3().

{
    double    one  = bli_d1();
    double    zero = bli_d0();
    double    gamma23_k1;
    double    sigma23_k1;
    double    gamma34_k1;
    double    sigma34_k1;
    double    gamma12_k2;
    double    sigma12_k2;
    double    gamma23_k2;
    double    sigma23_k2;
    double*   a1;
    double*   a2;
    double*   a3;
    double*   a4;
    dcomplex* g23_k1;
    dcomplex* g34_k1;
    dcomplex* g12_k2;
    dcomplex* g23_k2;
    int       i, j, g, k;
    int       nG, nG_app;
    int       n_iter;
    int       n_left;
    int       k_minus_1;
    int       n_fuse;
    int       k_fuse;
    int       is_ident23_k1, is_ident34_k1;
    int       is_ident12_k2, is_ident23_k2;
    int       has_ident;

    k_minus_1 = k_G - 1;
    nG        = n_A - 1;
    n_fuse    = 2;
    k_fuse    = 2;

    // Use the simple variant for nG < (k - 1) or k == 1.
    if ( nG < 2*k_minus_1 || k_G == 1 )
    {
        FLA_Apply_G_rf_asd_var1( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A );
        return FLA_SUCCESS;
    }


    // Start-up phase.

    for ( j = -1; j < k_minus_1; j += n_fuse )
    {
        nG_app = j + 2;
        n_iter = nG_app / k_fuse;
        n_left = 1;

        for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
            g23_k2 = buff_G + (g    )*rs_G + (k + 1)*cs_G;
            a1     = buff_A + (g - 1)*cs_A;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;
            gamma12_k2 = g12_k2->real;
            sigma12_k2 = g12_k2->imag;
            gamma23_k2 = g23_k2->real;
            sigma23_k2 = g23_k2->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
            is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
            is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
            has_ident     = ( is_ident23_k1 || is_ident34_k1 ||
                              is_ident12_k2 || is_ident23_k2 );

            if      ( has_ident )
            {
                // Apply to pairs of columns as needed.

                if ( !is_ident23_k1 )
                    MAC_Apply_G_mx2_asd( m_A,
                                         &gamma23_k1,
                                         &sigma23_k1,
                                         a2, 1,
                                         a3, 1 );

                if ( !is_ident34_k1 )
                    MAC_Apply_G_mx2_asd( m_A,
                                         &gamma34_k1,
                                         &sigma34_k1,
                                         a3, 1,
                                         a4, 1 );

                if ( !is_ident12_k2 )
                    MAC_Apply_G_mx2_asd( m_A,
                                         &gamma12_k2,
                                         &sigma12_k2,
                                         a1, 1,
                                         a2, 1 );

                if ( !is_ident23_k2 )
                    MAC_Apply_G_mx2_asd( m_A,
                                         &gamma23_k2,
                                         &sigma23_k2,
                                         a2, 1,
                                         a3, 1 );
            }
            else
            {
                // Apply to all four columns.

                MAC_Apply_G_mx4s_asd( m_A,
                                      &gamma23_k1,
                                      &sigma23_k1,
                                      &gamma34_k1,
                                      &sigma34_k1,
                                      &gamma12_k2,
                                      &sigma12_k2,
                                      &gamma23_k2,
                                      &sigma23_k2,
                                      a1, 1,
                                      a2, 1,
                                      a3, 1,
                                      a4, 1 );
            }
        }

        if ( n_left == 1 )
        {
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;

            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );

            if ( !is_ident34_k1 )
                MAC_Apply_G_mx2_asd( m_A,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a3, 1,
                                     a4, 1 );
        }
    }

    // Pipeline stage

    for ( ; j < nG - 1; j += n_fuse )
    {
        nG_app = k_G;
        n_iter = nG_app / k_fuse;
        n_left = nG_app % k_fuse;

        for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
            g23_k2 = buff_G + (g    )*rs_G + (k + 1)*cs_G;
            a1     = buff_A + (g - 1)*cs_A;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;
            gamma12_k2 = g12_k2->real;
            sigma12_k2 = g12_k2->imag;
            gamma23_k2 = g23_k2->real;
            sigma23_k2 = g23_k2->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
            is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
            is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
            has_ident     = ( is_ident23_k1 || is_ident34_k1 ||
                              is_ident12_k2 || is_ident23_k2 );

            if      ( has_ident )
            {
                // Apply to pairs of columns as needed.

                if ( !is_ident23_k1 )
                    MAC_Apply_G_mx2_asd( m_A,
                                         &gamma23_k1,
                                         &sigma23_k1,
                                         a2, 1,
                                         a3, 1 );

                if ( !is_ident34_k1 )
                    MAC_Apply_G_mx2_asd( m_A,
                                         &gamma34_k1,
                                         &sigma34_k1,
                                         a3, 1,
                                         a4, 1 );

                if ( !is_ident12_k2 )
                    MAC_Apply_G_mx2_asd( m_A,
                                         &gamma12_k2,
                                         &sigma12_k2,
                                         a1, 1,
                                         a2, 1 );

                if ( !is_ident23_k2 )
                    MAC_Apply_G_mx2_asd( m_A,
                                         &gamma23_k2,
                                         &sigma23_k2,
                                         a2, 1,
                                         a3, 1 );
            }
            else
            {
                // Apply to all four columns.

                MAC_Apply_G_mx4s_asd( m_A,
                                      &gamma23_k1,
                                      &sigma23_k1,
                                      &gamma34_k1,
                                      &sigma34_k1,
                                      &gamma12_k2,
                                      &sigma12_k2,
                                      &gamma23_k2,
                                      &sigma23_k2,
                                      a1, 1,
                                      a2, 1,
                                      a3, 1,
                                      a4, 1 );
            }
        }

        if ( n_left == 1 )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );

            if ( !is_ident23_k1 && is_ident34_k1 )
            {
                MAC_Apply_G_mx2_asd( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     a2, 1,
                                     a3, 1 );
            }
            else if ( is_ident23_k1 && !is_ident34_k1 )
            {
                MAC_Apply_G_mx2_asd( m_A,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a3, 1,
                                     a4, 1 );
            }
            else
            {
                MAC_Apply_G_mx3_asd( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a2, 1,
                                     a3, 1,
                                     a4, 1 );
            }
        }
    }

    // Shutdown stage

    for ( j = nG % n_fuse; j < k_G; j += n_fuse )
    {
        g = nG - 1;
        k = j;

        //n_left = 1;
        //if ( n_left == 1 )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );

            if ( !is_ident23_k1 )
                MAC_Apply_G_mx2_asd( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     a2, 1,
                                     a3, 1 );
            ++k;
            --g;
        }

        nG_app = k_minus_1 - j;
        n_iter = nG_app / k_fuse;
        n_left = nG_app % k_fuse;

        for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
            g23_k2 = buff_G + (g    )*rs_G + (k + 1)*cs_G;
            a1     = buff_A + (g - 1)*cs_A;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;
            gamma12_k2 = g12_k2->real;
            sigma12_k2 = g12_k2->imag;
            gamma23_k2 = g23_k2->real;
            sigma23_k2 = g23_k2->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
            is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
            is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
            has_ident     = ( is_ident23_k1 || is_ident34_k1 ||
                              is_ident12_k2 || is_ident23_k2 );

            if      ( has_ident )
            {
                // Apply to pairs of columns as needed.

                if ( !is_ident23_k1 )
                    MAC_Apply_G_mx2_asd( m_A,
                                         &gamma23_k1,
                                         &sigma23_k1,
                                         a2, 1,
                                         a3, 1 );

                if ( !is_ident34_k1 )
                    MAC_Apply_G_mx2_asd( m_A,
                                         &gamma34_k1,
                                         &sigma34_k1,
                                         a3, 1,
                                         a4, 1 );

                if ( !is_ident12_k2 )
                    MAC_Apply_G_mx2_asd( m_A,
                                         &gamma12_k2,
                                         &sigma12_k2,
                                         a1, 1,
                                         a2, 1 );

                if ( !is_ident23_k2 )
                    MAC_Apply_G_mx2_asd( m_A,
                                         &gamma23_k2,
                                         &sigma23_k2,
                                         a2, 1,
                                         a3, 1 );
            }
            else
            {
                // Apply to all four columns.

                MAC_Apply_G_mx4s_asd( m_A,
                                      &gamma23_k1,
                                      &sigma23_k1,
                                      &gamma34_k1,
                                      &sigma34_k1,
                                      &gamma12_k2,
                                      &sigma12_k2,
                                      &gamma23_k2,
                                      &sigma23_k2,
                                      a1, 1,
                                      a2, 1,
                                      a3, 1,
                                      a4, 1 );
            }
        }

        if ( n_left == 1 )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );

            if ( !is_ident23_k1 && is_ident34_k1 )
            {
                MAC_Apply_G_mx2_asd( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     a2, 1,
                                     a3, 1 );
            }
            else if ( is_ident23_k1 && !is_ident34_k1 )
            {
                MAC_Apply_G_mx2_asd( m_A,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a3, 1,
                                     a4, 1 );
            }
            else
            {
                MAC_Apply_G_mx3_asd( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a2, 1,
                                     a3, 1,
                                     a4, 1 );
            }
        }
    }

    return FLA_SUCCESS;
}

References FLA_Apply_G_rf_asc_var3(), FLA_Apply_G_rf_asd_var3(), FLA_Apply_G_rf_ass_var3(), FLA_Apply_G_rf_asz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

{
    FLA_Datatype datatype;
    int          k_G, m_A, n_A;
    int          rs_G, cs_G;
    int          rs_A, cs_A;

    datatype = FLA_Obj_datatype( A );

    k_G      = FLA_Obj_width( G );
    m_A      = FLA_Obj_length( A );
    n_A      = FLA_Obj_width( A );

    rs_G     = FLA_Obj_row_stride( G );
    cs_G     = FLA_Obj_col_stride( G );

    rs_A     = FLA_Obj_row_stride( A );
    cs_A     = FLA_Obj_col_stride( A );

    switch ( datatype )
    {
        case FLA_FLOAT:
        {
            scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
            float*    buff_A = ( float*    ) FLA_FLOAT_PTR( A );

            FLA_Apply_G_rf_ass_var3( k_G,
                                     m_A,
                                     n_A,
                                     buff_G, rs_G, cs_G,
                                     buff_A, rs_A, cs_A );

            break;
        }

        case FLA_DOUBLE:
        {
            dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
            double*   buff_A = ( double*   ) FLA_DOUBLE_PTR( A );

            FLA_Apply_G_rf_asd_var3( k_G,
                                     m_A,
                                     n_A,
                                     buff_G, rs_G, cs_G,
                                     buff_A, rs_A, cs_A );

            break;
        }

        case FLA_COMPLEX:
        {
            scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
            scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );

            FLA_Apply_G_rf_asc_var3( k_G,
                                     m_A,
                                     n_A,
                                     buff_G, rs_G, cs_G,
                                     buff_A, rs_A, cs_A );

            break;
        }

        case FLA_DOUBLE_COMPLEX:
        {
            dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
            dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );

            FLA_Apply_G_rf_asz_var3( k_G,
                                     m_A,
                                     n_A,
                                     buff_G, rs_G, cs_G,
                                     buff_A, rs_A, cs_A );

            break;
        }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_ass_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

References bli_s0(), bli_s1(), FLA_Apply_G_rf_ass_var1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Apply_G_rf_asm_var3(), and FLA_Apply_G_rf_bls_var3().

{
    float     one  = bli_s1();
    float     zero = bli_s0();
    float     gamma23_k1;
    float     sigma23_k1;
    float     gamma34_k1;
    float     sigma34_k1;
    float     gamma12_k2;
    float     sigma12_k2;
    float     gamma23_k2;
    float     sigma23_k2;
    float*    a1;
    float*    a2;
    float*    a3;
    float*    a4;
    scomplex* g23_k1;
    scomplex* g34_k1;
    scomplex* g12_k2;
    scomplex* g23_k2;
    int       i, j, g, k;
    int       nG, nG_app;
    int       n_iter;
    int       n_left;
    int       k_minus_1;
    int       n_fuse;
    int       k_fuse;
    int       is_ident23_k1, is_ident34_k1;
    int       is_ident12_k2, is_ident23_k2;
    int       has_ident;

    k_minus_1 = k_G - 1;
    nG        = n_A - 1;
    n_fuse    = 2;
    k_fuse    = 2;

    // Use the simple variant for nG < (k - 1) or k == 1.
    if ( nG < 2*k_minus_1 || k_G == 1 )
    {
        FLA_Apply_G_rf_ass_var1( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A );
        return FLA_SUCCESS;
    }


    // Start-up phase.

    for ( j = -1; j < k_minus_1; j += n_fuse )
    {
        nG_app = j + 2;
        n_iter = nG_app / k_fuse;
        n_left = 1;

        for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
            g23_k2 = buff_G + (g    )*rs_G + (k + 1)*cs_G;
            a1     = buff_A + (g - 1)*cs_A;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;
            gamma12_k2 = g12_k2->real;
            sigma12_k2 = g12_k2->imag;
            gamma23_k2 = g23_k2->real;
            sigma23_k2 = g23_k2->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
            is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
            is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
            has_ident     = ( is_ident23_k1 || is_ident34_k1 ||
                              is_ident12_k2 || is_ident23_k2 );

            if      ( has_ident )
            {
                // Apply to pairs of columns as needed.

                if ( !is_ident23_k1 )
                    MAC_Apply_G_mx2_ass( m_A,
                                         &gamma23_k1,
                                         &sigma23_k1,
                                         a2, 1,
                                         a3, 1 );

                if ( !is_ident34_k1 )
                    MAC_Apply_G_mx2_ass( m_A,
                                         &gamma34_k1,
                                         &sigma34_k1,
                                         a3, 1,
                                         a4, 1 );

                if ( !is_ident12_k2 )
                    MAC_Apply_G_mx2_ass( m_A,
                                         &gamma12_k2,
                                         &sigma12_k2,
                                         a1, 1,
                                         a2, 1 );

                if ( !is_ident23_k2 )
                    MAC_Apply_G_mx2_ass( m_A,
                                         &gamma23_k2,
                                         &sigma23_k2,
                                         a2, 1,
                                         a3, 1 );
            }
            else
            {
                // Apply to all four columns.

                MAC_Apply_G_mx4s_ass( m_A,
                                      &gamma23_k1,
                                      &sigma23_k1,
                                      &gamma34_k1,
                                      &sigma34_k1,
                                      &gamma12_k2,
                                      &sigma12_k2,
                                      &gamma23_k2,
                                      &sigma23_k2,
                                      a1, 1,
                                      a2, 1,
                                      a3, 1,
                                      a4, 1 );
            }
        }

        if ( n_left == 1 )
        {
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;

            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );

            if ( !is_ident34_k1 )
                MAC_Apply_G_mx2_ass( m_A,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a3, 1,
                                     a4, 1 );
        }
    }

    // Pipeline stage

    for ( ; j < nG - 1; j += n_fuse )
    {
        nG_app = k_G;
        n_iter = nG_app / k_fuse;
        n_left = nG_app % k_fuse;

        for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
            g23_k2 = buff_G + (g    )*rs_G + (k + 1)*cs_G;
            a1     = buff_A + (g - 1)*cs_A;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;
            gamma12_k2 = g12_k2->real;
            sigma12_k2 = g12_k2->imag;
            gamma23_k2 = g23_k2->real;
            sigma23_k2 = g23_k2->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
            is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
            is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
            has_ident     = ( is_ident23_k1 || is_ident34_k1 ||
                              is_ident12_k2 || is_ident23_k2 );

            if      ( has_ident )
            {
                // Apply to pairs of columns as needed.

                if ( !is_ident23_k1 )
                    MAC_Apply_G_mx2_ass( m_A,
                                         &gamma23_k1,
                                         &sigma23_k1,
                                         a2, 1,
                                         a3, 1 );

                if ( !is_ident34_k1 )
                    MAC_Apply_G_mx2_ass( m_A,
                                         &gamma34_k1,
                                         &sigma34_k1,
                                         a3, 1,
                                         a4, 1 );

                if ( !is_ident12_k2 )
                    MAC_Apply_G_mx2_ass( m_A,
                                         &gamma12_k2,
                                         &sigma12_k2,
                                         a1, 1,
                                         a2, 1 );

                if ( !is_ident23_k2 )
                    MAC_Apply_G_mx2_ass( m_A,
                                         &gamma23_k2,
                                         &sigma23_k2,
                                         a2, 1,
                                         a3, 1 );
            }
            else
            {
                // Apply to all four columns.

                MAC_Apply_G_mx4s_ass( m_A,
                                      &gamma23_k1,
                                      &sigma23_k1,
                                      &gamma34_k1,
                                      &sigma34_k1,
                                      &gamma12_k2,
                                      &sigma12_k2,
                                      &gamma23_k2,
                                      &sigma23_k2,
                                      a1, 1,
                                      a2, 1,
                                      a3, 1,
                                      a4, 1 );
            }
        }

        if ( n_left == 1 )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );

            if ( !is_ident23_k1 && is_ident34_k1 )
            {
                MAC_Apply_G_mx2_ass( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     a2, 1,
                                     a3, 1 );
            }
            else if ( is_ident23_k1 && !is_ident34_k1 )
            {
                MAC_Apply_G_mx2_ass( m_A,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a3, 1,
                                     a4, 1 );
            }
            else
            {
                MAC_Apply_G_mx3_ass( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a2, 1,
                                     a3, 1,
                                     a4, 1 );
            }
        }
    }

    // Shutdown stage

    for ( j = nG % n_fuse; j < k_G; j += n_fuse )
    {
        g = nG - 1;
        k = j;

        //n_left = 1;
        //if ( n_left == 1 )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );

            if ( !is_ident23_k1 )
                MAC_Apply_G_mx2_ass( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     a2, 1,
                                     a3, 1 );
            ++k;
            --g;
        }

        nG_app = k_minus_1 - j;
        n_iter = nG_app / k_fuse;
        n_left = nG_app % k_fuse;

        for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
            g23_k2 = buff_G + (g    )*rs_G + (k + 1)*cs_G;
            a1     = buff_A + (g - 1)*cs_A;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;
            gamma12_k2 = g12_k2->real;
            sigma12_k2 = g12_k2->imag;
            gamma23_k2 = g23_k2->real;
            sigma23_k2 = g23_k2->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
            is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
            is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
            has_ident     = ( is_ident23_k1 || is_ident34_k1 ||
                              is_ident12_k2 || is_ident23_k2 );

            if      ( has_ident )
            {
                // Apply to pairs of columns as needed.

                if ( !is_ident23_k1 )
                    MAC_Apply_G_mx2_ass( m_A,
                                         &gamma23_k1,
                                         &sigma23_k1,
                                         a2, 1,
                                         a3, 1 );

                if ( !is_ident34_k1 )
                    MAC_Apply_G_mx2_ass( m_A,
                                         &gamma34_k1,
                                         &sigma34_k1,
                                         a3, 1,
                                         a4, 1 );

                if ( !is_ident12_k2 )
                    MAC_Apply_G_mx2_ass( m_A,
                                         &gamma12_k2,
                                         &sigma12_k2,
                                         a1, 1,
                                         a2, 1 );

                if ( !is_ident23_k2 )
                    MAC_Apply_G_mx2_ass( m_A,
                                         &gamma23_k2,
                                         &sigma23_k2,
                                         a2, 1,
                                         a3, 1 );
            }
            else
            {
                // Apply to all four columns.

                MAC_Apply_G_mx4s_ass( m_A,
                                      &gamma23_k1,
                                      &sigma23_k1,
                                      &gamma34_k1,
                                      &sigma34_k1,
                                      &gamma12_k2,
                                      &sigma12_k2,
                                      &gamma23_k2,
                                      &sigma23_k2,
                                      a1, 1,
                                      a2, 1,
                                      a3, 1,
                                      a4, 1 );
            }
        }

        if ( n_left == 1 )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );

            if ( !is_ident23_k1 && is_ident34_k1 )
            {
                MAC_Apply_G_mx2_ass( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     a2, 1,
                                     a3, 1 );
            }
            else if ( is_ident23_k1 && !is_ident34_k1 )
            {
                MAC_Apply_G_mx2_ass( m_A,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a3, 1,
                                     a4, 1 );
            }
            else
            {
                MAC_Apply_G_mx3_ass( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a2, 1,
                                     a3, 1,
                                     a4, 1 );
            }
        }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_asz_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

References bli_d0(), bli_d1(), FLA_Apply_G_rf_asz_var1(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var3(), and FLA_Apply_G_rf_blz_var3().

{
    double    one  = bli_d1();
    double    zero = bli_d0();
    double    gamma23_k1;
    double    sigma23_k1;
    double    gamma34_k1;
    double    sigma34_k1;
    double    gamma12_k2;
    double    sigma12_k2;
    double    gamma23_k2;
    double    sigma23_k2;
    dcomplex* a1;
    dcomplex* a2;
    dcomplex* a3;
    dcomplex* a4;
    dcomplex* g23_k1;
    dcomplex* g34_k1;
    dcomplex* g12_k2;
    dcomplex* g23_k2;
    int       i, j, g, k;
    int       nG, nG_app;
    int       n_iter;
    int       n_left;
    int       k_minus_1;
    int       n_fuse;
    int       k_fuse;
    int       is_ident23_k1, is_ident34_k1;
    int       is_ident12_k2, is_ident23_k2;
    int       has_ident;

    k_minus_1 = k_G - 1;
    nG        = n_A - 1;
    n_fuse    = 2;
    k_fuse    = 2;

    // Use the simple variant for nG < (k - 1) or k == 1.
    if ( nG < 2*k_minus_1 || k_G == 1 )
    {
        FLA_Apply_G_rf_asz_var1( k_G,
                                 m_A,
                                 n_A,
                                 buff_G, rs_G, cs_G,
                                 buff_A, rs_A, cs_A );
        return FLA_SUCCESS;
    }


    // Start-up phase.

    for ( j = -1; j < k_minus_1; j += n_fuse )
    {
        nG_app = j + 2;
        n_iter = nG_app / k_fuse;
        //n_iter = nG_app % k_fuse;
        n_left = 1;

        for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
            g23_k2 = buff_G + (g    )*rs_G + (k + 1)*cs_G;
            a1     = buff_A + (g - 1)*cs_A;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;
            gamma12_k2 = g12_k2->real;
            sigma12_k2 = g12_k2->imag;
            gamma23_k2 = g23_k2->real;
            sigma23_k2 = g23_k2->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
            is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
            is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
            has_ident     = ( is_ident23_k1 || is_ident34_k1 ||
                              is_ident12_k2 || is_ident23_k2 );

            if      ( has_ident )
            {
                // Apply to pairs of columns as needed.

                if ( !is_ident23_k1 )
                    MAC_Apply_G_mx2_asz( m_A,
                                         &gamma23_k1,
                                         &sigma23_k1,
                                         a2, 1,
                                         a3, 1 );

                if ( !is_ident34_k1 )
                    MAC_Apply_G_mx2_asz( m_A,
                                         &gamma34_k1,
                                         &sigma34_k1,
                                         a3, 1,
                                         a4, 1 );

                if ( !is_ident12_k2 )
                    MAC_Apply_G_mx2_asz( m_A,
                                         &gamma12_k2,
                                         &sigma12_k2,
                                         a1, 1,
                                         a2, 1 );

                if ( !is_ident23_k2 )
                    MAC_Apply_G_mx2_asz( m_A,
                                         &gamma23_k2,
                                         &sigma23_k2,
                                         a2, 1,
                                         a3, 1 );
            }
            else
            {
                // Apply to all four columns.

                MAC_Apply_G_mx4s_asz( m_A,
                                      &gamma23_k1,
                                      &sigma23_k1,
                                      &gamma34_k1,
                                      &sigma34_k1,
                                      &gamma12_k2,
                                      &sigma12_k2,
                                      &gamma23_k2,
                                      &sigma23_k2,
                                      a1, 1,
                                      a2, 1,
                                      a3, 1,
                                      a4, 1 );
            }
        }

        if ( n_left == 1 )
        {
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;

            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );

            if ( !is_ident34_k1 )
                MAC_Apply_G_mx2_asz( m_A,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a3, 1,
                                     a4, 1 );
        }
    }

    // Pipeline stage

    for ( ; j < nG - 1; j += n_fuse )
    {
        nG_app = k_G;
        n_iter = nG_app / k_fuse;
        n_left = nG_app % k_fuse;

        for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
            g23_k2 = buff_G + (g    )*rs_G + (k + 1)*cs_G;
            a1     = buff_A + (g - 1)*cs_A;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;
            gamma12_k2 = g12_k2->real;
            sigma12_k2 = g12_k2->imag;
            gamma23_k2 = g23_k2->real;
            sigma23_k2 = g23_k2->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
            is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
            is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
            has_ident     = ( is_ident23_k1 || is_ident34_k1 ||
                              is_ident12_k2 || is_ident23_k2 );

            if      ( has_ident )
            {
                // Apply to pairs of columns as needed.

                if ( !is_ident23_k1 )
                    MAC_Apply_G_mx2_asz( m_A,
                                         &gamma23_k1,
                                         &sigma23_k1,
                                         a2, 1,
                                         a3, 1 );

                if ( !is_ident34_k1 )
                    MAC_Apply_G_mx2_asz( m_A,
                                         &gamma34_k1,
                                         &sigma34_k1,
                                         a3, 1,
                                         a4, 1 );

                if ( !is_ident12_k2 )
                    MAC_Apply_G_mx2_asz( m_A,
                                         &gamma12_k2,
                                         &sigma12_k2,
                                         a1, 1,
                                         a2, 1 );

                if ( !is_ident23_k2 )
                    MAC_Apply_G_mx2_asz( m_A,
                                         &gamma23_k2,
                                         &sigma23_k2,
                                         a2, 1,
                                         a3, 1 );
            }
            else
            {
                // Apply to all four columns.

                MAC_Apply_G_mx4s_asz( m_A,
                                      &gamma23_k1,
                                      &sigma23_k1,
                                      &gamma34_k1,
                                      &sigma34_k1,
                                      &gamma12_k2,
                                      &sigma12_k2,
                                      &gamma23_k2,
                                      &sigma23_k2,
                                      a1, 1,
                                      a2, 1,
                                      a3, 1,
                                      a4, 1 );
            }
        }

        if ( n_left == 1 )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );

            if ( !is_ident23_k1 && is_ident34_k1 )
            {
                MAC_Apply_G_mx2_asz( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     a2, 1,
                                     a3, 1 );
            }
            else if ( is_ident23_k1 && !is_ident34_k1 )
            {
                MAC_Apply_G_mx2_asz( m_A,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a3, 1,
                                     a4, 1 );
            }
            else
            {
                MAC_Apply_G_mx3_asz( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a2, 1,
                                     a3, 1,
                                     a4, 1 );
            }
        }
    }

    // Shutdown stage

    for ( j = nG % n_fuse; j < k_G; j += n_fuse )
    {
        g = nG - 1;
        k = j;

        //n_left = 1;
        //if ( n_left == 1 )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );

            if ( !is_ident23_k1 )
                MAC_Apply_G_mx2_asz( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     a2, 1,
                                     a3, 1 );
            ++k;
            --g;
        }

        nG_app = k_minus_1 - j;
        n_iter = nG_app / k_fuse;
        n_left = nG_app % k_fuse;

        for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
            g23_k2 = buff_G + (g    )*rs_G + (k + 1)*cs_G;
            a1     = buff_A + (g - 1)*cs_A;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;
            gamma12_k2 = g12_k2->real;
            sigma12_k2 = g12_k2->imag;
            gamma23_k2 = g23_k2->real;
            sigma23_k2 = g23_k2->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
            is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
            is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
            has_ident     = ( is_ident23_k1 || is_ident34_k1 ||
                              is_ident12_k2 || is_ident23_k2 );

            if      ( has_ident )
            {
                // Apply to pairs of columns as needed.

                if ( !is_ident23_k1 )
                    MAC_Apply_G_mx2_asz( m_A,
                                         &gamma23_k1,
                                         &sigma23_k1,
                                         a2, 1,
                                         a3, 1 );

                if ( !is_ident34_k1 )
                    MAC_Apply_G_mx2_asz( m_A,
                                         &gamma34_k1,
                                         &sigma34_k1,
                                         a3, 1,
                                         a4, 1 );

                if ( !is_ident12_k2 )
                    MAC_Apply_G_mx2_asz( m_A,
                                         &gamma12_k2,
                                         &sigma12_k2,
                                         a1, 1,
                                         a2, 1 );

                if ( !is_ident23_k2 )
                    MAC_Apply_G_mx2_asz( m_A,
                                         &gamma23_k2,
                                         &sigma23_k2,
                                         a2, 1,
                                         a3, 1 );
            }
            else
            {
                // Apply to all four columns.

                MAC_Apply_G_mx4s_asz( m_A,
                                      &gamma23_k1,
                                      &sigma23_k1,
                                      &gamma34_k1,
                                      &sigma34_k1,
                                      &gamma12_k2,
                                      &sigma12_k2,
                                      &gamma23_k2,
                                      &sigma23_k2,
                                      a1, 1,
                                      a2, 1,
                                      a3, 1,
                                      a4, 1 );
            }
        }

        if ( n_left == 1 )
        {
            g23_k1 = buff_G + (g    )*rs_G + (k    )*cs_G;
            g34_k1 = buff_G + (g + 1)*rs_G + (k    )*cs_G;
            a2     = buff_A + (g    )*cs_A;
            a3     = buff_A + (g + 1)*cs_A;
            a4     = buff_A + (g + 2)*cs_A;

            gamma23_k1 = g23_k1->real;
            sigma23_k1 = g23_k1->imag;
            gamma34_k1 = g34_k1->real;
            sigma34_k1 = g34_k1->imag;

            is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
            is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );

            if ( !is_ident23_k1 && is_ident34_k1 )
            {
                MAC_Apply_G_mx2_asz( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     a2, 1,
                                     a3, 1 );
            }
            else if ( is_ident23_k1 && !is_ident34_k1 )
            {
                MAC_Apply_G_mx2_asz( m_A,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a3, 1,
                                     a4, 1 );
            }
            else
            {
                MAC_Apply_G_mx3_asz( m_A,
                                     &gamma23_k1,
                                     &sigma23_k1,
                                     &gamma34_k1,
                                     &sigma34_k1,
                                     a2, 1,
                                     a3, 1,
                                     a4, 1 );
            }
        }
    }

    return FLA_SUCCESS;
}