libflame revision_anchor
Functions
bli_copymt.c File Reference

(r)

Functions

void bli_icopymt (char trans, int m, int n, int *a, int a_rs, int a_cs, int *b, int b_rs, int b_cs)
void bli_scopymt (char trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_dcopymt (char trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_ccopymt (char trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_zcopymt (char trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_sscopymt (char trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_sdcopymt (char trans, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_dscopymt (char trans, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_sccopymt (char trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_cscopymt (char trans, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_szcopymt (char trans, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_zscopymt (char trans, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_ddcopymt (char trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_dccopymt (char trans, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_cdcopymt (char trans, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_dzcopymt (char trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_zdcopymt (char trans, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_cccopymt (char trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_czcopymt (char trans, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_zccopymt (char trans, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_zzcopymt (char trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)

Function Documentation

void bli_cccopymt ( char  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_ccopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_ccopyv( trans,
                    n_elem,
                    a_begin, inca,
                    b_begin, incb );
    }
}
void bli_ccopymt ( char  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_cconjv(), bli_ccopy(), bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

Referenced by bli_ccreate_contigm(), bli_ccreate_contigmt(), bli_cfree_saved_contigm(), bli_cfree_saved_contigmsr(), bli_cgemm(), bli_chemm(), bli_cher2k(), bli_csymm(), bli_csyr2k(), bli_ctrmmsx(), bli_ctrsmsx(), FLA_Copy_external(), and FLA_Copyt_external().

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bli_ccopy( n_elem,
                   a_begin, inca, 
                   b_begin, incb );

        if ( bli_does_conj( trans ) )
            bli_cconjv( n_elem,
                        b_begin, incb );
    }
}
void bli_cdcopymt ( char  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_cdcopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    scomplex* a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_cdcopyv( trans,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_cscopymt ( char  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_cscopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    scomplex* a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_cscopyv( trans,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_czcopymt ( char  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_czcopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    scomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_czcopyv( trans,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_dccopymt ( char  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_dccopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    double*   a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_dccopyv( trans,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_dcopymt ( char  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_dcopy(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

Referenced by bli_dcreate_contigm(), bli_dcreate_contigmt(), bli_dfree_saved_contigm(), bli_dfree_saved_contigmsr(), bli_dsymm(), bli_dsyr2k(), bli_dtrmmsx(), bli_dtrsmsx(), FLA_Copy_external(), and FLA_Copyt_external().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bli_dcopy( n_elem,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bli_ddcopymt ( char  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_dcopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_dcopyv( trans,
                    n_elem,
                    a_begin, inca,
                    b_begin, incb );
    }
}
void bli_dscopymt ( char  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_dscopyv(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    double*   a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_dscopyv( trans,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_dzcopymt ( char  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_dzcopyv(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    double*   a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_dzcopyv( trans,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_icopymt ( char  trans,
int  m,
int  n,
int *  a,
int  a_rs,
int  a_cs,
int *  b,
int  b_rs,
int  b_cs 
)

References bli_does_notrans(), bli_does_trans(), bli_icopyv(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    int*      a_begin;
    int*      b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bli_icopyv( trans,
                    n_elem,
                    a_begin, inca, 
                    b_begin, incb );
    }
}
void bli_sccopymt ( char  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_sccopyv(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    float*    a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_sccopyv( trans,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_scopymt ( char  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_scopy(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

Referenced by bli_screate_contigm(), bli_screate_contigmt(), bli_sfree_saved_contigm(), bli_sfree_saved_contigmsr(), bli_ssymm(), bli_ssyr2k(), bli_strmmsx(), bli_strsmsx(), FLA_Copy_external(), and FLA_Copyt_external().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bli_scopy( n_elem,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bli_sdcopymt ( char  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_sdcopyv(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    float*    a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_sdcopyv( trans,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_sscopymt ( char  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_scopyv(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_scopyv( trans,
                    n_elem,
                    a_begin, inca,
                    b_begin, incb );
    }
}
void bli_szcopymt ( char  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_szcopyv(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    float*    a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_szcopyv( trans,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_zccopymt ( char  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zccopyv(), and bli_zero_dim2().

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    dcomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_zccopyv( trans,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_zcopymt ( char  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zconjv(), bli_zcopy(), and bli_zero_dim2().

Referenced by bli_zcreate_contigm(), bli_zcreate_contigmt(), bli_zfree_saved_contigm(), bli_zgemm(), bli_zhemm(), bli_zher2k(), bli_zsymm(), bli_zsyr2k(), bli_ztrmmsx(), bli_ztrsmsx(), FLA_Copy_external(), and FLA_Copyt_external().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bli_zcopy( n_elem,
                   a_begin, inca, 
                   b_begin, incb );

        if ( bli_does_conj( trans ) )
            bli_zconjv( n_elem,
                        b_begin, incb );
    }
}
void bli_zdcopymt ( char  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zdcopyv(), and bli_zero_dim2().

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    dcomplex* a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_zdcopyv( trans,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_zscopymt ( char  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and bli_zscopyv().

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    dcomplex* a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_zscopyv( trans,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_zzcopymt ( char  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zcopyv(), and bli_zero_dim2().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_zcopyv( trans,
                    n_elem,
                    a_begin, inca,
                    b_begin, incb );
    }
}