libflame  revision_anchor
Functions
FLA_Bidiag_UT_u_opt_var4.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_u_opt_var4 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var4 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var4 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var4 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var4 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 

Function Documentation

◆ FLA_Bidiag_UT_u_opt_var4()

FLA_Error FLA_Bidiag_UT_u_opt_var4 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14 {
15  FLA_Error r_val;
16  FLA_Obj Y, Z;
17  FLA_Datatype datatype_A;
18  dim_t m_A, n_A;
19 
20  datatype_A = FLA_Obj_datatype( A );
21  m_A = FLA_Obj_length( A );
22  n_A = FLA_Obj_width( A );
23 
24  FLA_Obj_create( datatype_A, n_A, n_A, 0, 0, &Y );
25  FLA_Obj_create( datatype_A, m_A, n_A, 0, 0, &Z );
26 
27  r_val = FLA_Bidiag_UT_u_step_opt_var4( A, Y, Z, TU, TV );
28 
29  FLA_Obj_free( &Y );
30  FLA_Obj_free( &Z );
31 
32  return r_val;
33 }
FLA_Error FLA_Bidiag_UT_u_step_opt_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_opt_var4.c:35
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Error
Definition: FLA_type_defs.h:47
int FLA_Datatype
Definition: FLA_type_defs.h:49
unsigned long dim_t
Definition: FLA_type_defs.h:71
Definition: FLA_type_defs.h:159

References FLA_Bidiag_UT_u_step_opt_var4(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_step_opc_var4()

FLA_Error FLA_Bidiag_UT_u_step_opc_var4 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
1259 {
1260  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
1261  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
1262  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
1263 
1264  scomplex alpha12;
1265  scomplex minus_conj_alpha12;
1266  scomplex psi11_minus_alpha12;
1267  scomplex minus_inv_tau11;
1268  scomplex beta;
1269  scomplex last_elem;
1270  int i;
1271 
1272  // b_alg = FLA_Obj_length( T );
1273  int b_alg = m_TS;
1274 
1275  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1276  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1277  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1278  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1279  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1280  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1281  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1282  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1283  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1284  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1285  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1286  scomplex* buff_al = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1287  scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1288  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1289  scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1290  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1291  scomplex* buff_d = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1292  scomplex* buff_e = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1293  scomplex* buff_f = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1294  scomplex* buff_g = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1295  int inc_w = 1;
1296  int inc_al = 1;
1297  int inc_ap = 1;
1298  int inc_u = 1;
1299  int inc_up = 1;
1300  int inc_v = 1;
1301  int inc_d = 1;
1302  int inc_e = 1;
1303  int inc_f = 1;
1304  int inc_g = 1;
1305 
1306  // FLA_Set( FLA_ZERO, Y );
1307  // FLA_Set( FLA_ZERO, Z );
1308  bl1_csetm( n_A,
1309  b_alg,
1310  buff_0,
1311  buff_Y, rs_Y, cs_Y );
1312  bl1_csetm( m_A,
1313  b_alg,
1314  buff_0,
1315  buff_Z, rs_Z, cs_Z );
1316 
1317  for ( i = 0; i < b_alg; ++i )
1318  {
1319  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1320  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1321  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1322  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1323  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1324  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1325  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1326  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1327 
1328  scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1329  scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1330  scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1331 
1332  scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1333  scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1334  scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1335 
1336  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1337  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1338 
1339  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1340  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1341 
1342  scomplex* w21 = buff_w + (i+1)*inc_w;
1343 
1344  scomplex* a22l = buff_al + (i+1)*inc_al;
1345 
1346  scomplex* a12p = buff_ap + (i+1)*inc_ap;
1347 
1348  scomplex* u21 = buff_u + (i+1)*inc_u;
1349 
1350  scomplex* u21p = buff_up + (i+1)*inc_up;
1351 
1352  scomplex* v21 = buff_v + (i+1)*inc_v;
1353 
1354  scomplex* d0 = buff_d + (0 )*inc_d;
1355 
1356  scomplex* e0 = buff_e + (0 )*inc_e;
1357 
1358  scomplex* f0 = buff_f + (0 )*inc_f;
1359 
1360  scomplex* g0 = buff_g + (0 )*inc_g;
1361 
1362  scomplex* a12p_t = a12p + (0 )*inc_ap;
1363  scomplex* a12p_b = a12p + (1 )*inc_ap;
1364 
1365  scomplex* v21_t = v21 + (0 )*inc_v;
1366  scomplex* v21_b = v21 + (1 )*inc_v;
1367 
1368  scomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1369 
1370  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1371  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1372 
1373  scomplex* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
1374 
1375  scomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1376 
1377  scomplex* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
1378 
1379  scomplex* ABL = a10t;
1380  scomplex* ZBL = z10t;
1381 
1382  scomplex* a2 = alpha11;
1383 
1384  int m_ahead = m_A - i - 1;
1385  int n_ahead = n_A - i - 1;
1386  int m_behind = i;
1387  int n_behind = i;
1388 
1389  /*------------------------------------------------------------*/
1390 
1391  if ( m_behind > 0 )
1392  {
1393  // FLA_Copy( a01_b, last_elem );
1394  // FLA_Set( FLA_ONE, a01_b );
1395  last_elem = *a01_b;
1396  *a01_b = *buff_1;
1397  }
1398 
1399  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1400  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1403  m_ahead + 1,
1404  n_behind,
1405  buff_m1,
1406  ABL, rs_A, cs_A,
1407  y10t, cs_Y,
1408  buff_1,
1409  a2, rs_A );
1412  m_ahead + 1,
1413  n_behind,
1414  buff_m1,
1415  ZBL, rs_Z, cs_Z,
1416  a01, rs_A,
1417  buff_1,
1418  a2, rs_A );
1419 
1420  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1421  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1424  n_ahead,
1425  n_behind,
1426  buff_m1,
1427  Y20, rs_Y, cs_Y,
1428  a10t, cs_A,
1429  buff_1,
1430  a12t, cs_A );
1433  m_behind,
1434  n_ahead,
1435  buff_m1,
1436  A02, rs_A, cs_A,
1437  z10t, cs_Z,
1438  buff_1,
1439  a12t, cs_A );
1440 
1441  if ( m_behind > 0 )
1442  {
1443  // FLA_Copy( last_elem, a01_b );
1444  *a01_b = last_elem;
1445  }
1446 
1447  // FLA_Househ2_UT( FLA_LEFT,
1448  // alpha11,
1449  // a21, tau11 );
1450  // FLA_Copy( a21, u21p );
1451  FLA_Househ2_UT_l_opc( m_ahead,
1452  alpha11,
1453  a21, rs_A,
1454  tau11 );
1456  m_ahead,
1457  a21, rs_A,
1458  u21p, inc_up );
1459 
1460  if ( n_ahead > 0 )
1461  {
1462  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1463  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1464  bl1_cdiv3( buff_m1, tau11, &minus_inv_tau11 );
1465 
1466  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1467  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1469  n_ahead,
1470  a12t, cs_A,
1471  a12p, inc_ap );
1473  n_ahead,
1474  &minus_inv_tau11,
1475  a12t, cs_A,
1476  a12p, inc_ap );
1477 
1478  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
1479  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
1482  m_ahead,
1483  n_behind,
1484  buff_1,
1485  A20, rs_A, cs_A,
1486  u21p, inc_up,
1487  buff_0,
1488  d0, inc_d );
1491  m_ahead,
1492  n_behind,
1493  buff_1,
1494  Z20, rs_Z, cs_Z,
1495  u21p, inc_up,
1496  buff_0,
1497  e0, inc_e );
1498 
1499  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1500  // FLA_Axpy( FLA_ONE, d0, t01 );
1502  n_behind,
1503  a10t, cs_A,
1504  t01, rs_T );
1506  n_behind,
1507  buff_1,
1508  d0, inc_d,
1509  t01, rs_T );
1510 
1511  // FLA_Set( FLA_ZERO, y21 );
1512  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1513  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
1514  bl1_csetv( n_ahead,
1515  buff_0,
1516  y21, rs_Y );
1519  n_ahead,
1520  n_behind,
1521  buff_m1,
1522  Y20, rs_Y, cs_Y,
1523  d0, inc_d,
1524  buff_1,
1525  y21, rs_Y );
1528  m_behind,
1529  n_ahead,
1530  buff_m1,
1531  A02, rs_A, cs_A,
1532  e0, inc_e,
1533  buff_1,
1534  y21, rs_Y );
1535 
1536  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
1539  m_ahead,
1540  n_ahead,
1541  buff_1,
1542  A22, rs_A, cs_A,
1543  u21p, inc_up,
1544  buff_1,
1545  y21, rs_Y );
1546 
1547  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1549  n_ahead,
1550  &minus_inv_tau11,
1551  y21, rs_Y,
1552  a12p, inc_ap );
1553 
1554  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1557  m_ahead,
1558  n_ahead,
1559  buff_1,
1560  A22, rs_A, cs_A,
1561  a12p, inc_ap,
1562  buff_0,
1563  w21, inc_w );
1564 
1565  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1566  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1569  n_ahead,
1570  n_behind,
1571  buff_1,
1572  Y20, rs_Y, cs_Y,
1573  a12p, inc_ap,
1574  buff_0,
1575  f0, inc_f );
1578  m_behind,
1579  n_ahead,
1580  buff_1,
1581  A02, rs_A, cs_A,
1582  a12p, inc_ap,
1583  buff_0,
1584  g0, inc_g );
1585 
1586  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1587  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1590  m_ahead,
1591  n_behind,
1592  buff_m1,
1593  A20, rs_A, cs_A,
1594  f0, inc_f,
1595  buff_1,
1596  w21, inc_w );
1599  m_ahead,
1600  n_behind,
1601  buff_m1,
1602  Z20, rs_Z, cs_Z,
1603  g0, inc_g,
1604  buff_1,
1605  w21, inc_w );
1606 
1607  // FLA_Copy( A22_l, a22l );
1608  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1609  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1611  m_ahead,
1612  A22_l, rs_A,
1613  a22l, inc_al );
1616  m_ahead,
1617  n_behind,
1618  buff_m1,
1619  A20, rs_A, cs_A,
1620  Y20_t, cs_Y,
1621  buff_1,
1622  a22l, inc_al );
1625  m_ahead,
1626  n_behind,
1627  buff_m1,
1628  Z20, rs_Z, cs_Z,
1629  A02_l, rs_A,
1630  buff_1,
1631  a22l, inc_al );
1632 
1633  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1635  n_ahead,
1636  buff_1,
1637  a12t, cs_A,
1638  y21, rs_Y );
1639 
1640  // FLA_Househ2s_UT( FLA_RIGHT,
1641  // a12p_t,
1642  // a12p_b,
1643  // alpha12, psi11_minus_alpha12, sigma11 );
1644  FLA_Househ2s_UT_r_opc( n_ahead - 1,
1645  a12p_t,
1646  a12p_b, inc_ap,
1647  &alpha12,
1648  &psi11_minus_alpha12,
1649  sigma11 );
1650 
1651  // FLA_Copy( a12p, v21 );
1652  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1653  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1654  // FLA_Conjugate( v21_b );
1656  n_ahead,
1657  a12p, inc_ap,
1658  v21, inc_v );
1659  bl1_cmult4( buff_m1, &alpha12, v21_t, v21_t );
1661  n_ahead,
1662  &psi11_minus_alpha12,
1663  v21, inc_v );
1664  bl1_cconjv( n_ahead - 1,
1665  v21_b, inc_v );
1666 
1667  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1668  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1669  bl1_ccopyconj( &alpha12, &minus_conj_alpha12 );
1670  bl1_cneg1( &minus_conj_alpha12 );
1671 
1672  // FLA_Copy( g0, s01 );
1673  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1674  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1676  n_behind,
1677  g0, inc_g,
1678  s01, rs_S );
1680  n_behind,
1681  &minus_conj_alpha12,
1682  A02_l, rs_A,
1683  s01, rs_S );
1685  n_behind,
1686  &psi11_minus_alpha12,
1687  s01, rs_S );
1688 
1689  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1690  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1691  *a12t_l = alpha12;
1693  n_ahead - 1,
1694  v21_b, inc_v,
1695  a12t_r, cs_A );
1696  }
1697 
1698  // FLA_Copy( u21p, u21 );
1700  m_ahead,
1701  u21p, inc_up,
1702  u21, inc_u );
1703 
1704  if ( n_ahead > 0 )
1705  {
1706  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1707  // FLA_Scal( FLA_MINUS_ONE, beta );
1708  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1710  n_ahead,
1711  y21, rs_Y,
1712  v21, inc_v,
1713  &beta );
1714  bl1_cscals( &minus_inv_tau11, &beta );
1715 
1716  // FLA_Copy( w21, z21 );
1717  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1718  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1719  // FLA_Axpy( beta, u21, z21 );
1721  m_ahead,
1722  w21, inc_w,
1723  z21, rs_Z );
1725  m_ahead,
1726  &minus_conj_alpha12,
1727  a22l, inc_al,
1728  z21, rs_Z );
1730  m_ahead,
1731  &psi11_minus_alpha12,
1732  z21, rs_Z );
1734  m_ahead,
1735  &beta,
1736  u21, inc_u,
1737  z21, rs_Z );
1738 
1739  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1740  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1742  n_ahead,
1743  tau11,
1744  y21, rs_Y );
1746  m_ahead,
1747  sigma11,
1748  z21, rs_Z );
1749  }
1750  else // if ( n_ahead == 0 )
1751  {
1752  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1753  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1755  n_behind,
1756  a10t, cs_A,
1757  t01, rs_T );
1760  m_ahead,
1761  n_behind,
1762  buff_1,
1763  A20, rs_A, cs_A,
1764  u21, inc_u,
1765  buff_1,
1766  t01, rs_T );
1767  }
1768 
1769  /*------------------------------------------------------------*/
1770 
1771  }
1772 
1773  // FLA_Obj_free( &w );
1774  // FLA_Obj_free( &al );
1775  // FLA_Obj_free( &ap );
1776  // FLA_Obj_free( &u );
1777  // FLA_Obj_free( &up );
1778  // FLA_Obj_free( &v );
1779  // FLA_Obj_free( &d );
1780  // FLA_Obj_free( &e );
1781  // FLA_Obj_free( &f );
1782  // FLA_Obj_free( &g );
1783  FLA_free( buff_w );
1784  FLA_free( buff_al );
1785  FLA_free( buff_ap );
1786  FLA_free( buff_u );
1787  FLA_free( buff_up );
1788  FLA_free( buff_v );
1789  FLA_free( buff_d );
1790  FLA_free( buff_e );
1791  FLA_free( buff_f );
1792  FLA_free( buff_g );
1793 
1794  return FLA_SUCCESS;
1795 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2s_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *alpha, scomplex *chi_1_minus_alpha, scomplex *tau)
Definition: FLA_Househ2s_UT.c:589
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
int i
Definition: bl1_axmyv2.c:145
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_cconjv(int m, scomplex *x, int incx)
Definition: bl1_conjv.c:23
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition: bl1_setv.c:52
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition: blis_type_defs.h:57
@ BLIS1_TRANSPOSE
Definition: blis_type_defs.h:55
@ BLIS1_CONJ_NO_TRANSPOSE
Definition: blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), bl1_csetm(), bl1_csetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

◆ FLA_Bidiag_UT_u_step_opd_var4()

FLA_Error FLA_Bidiag_UT_u_step_opd_var4 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)
711 {
712  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
713  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
714  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
715 
716  double alpha12;
717  double minus_conj_alpha12;
718  double psi11_minus_alpha12;
719  double minus_inv_tau11;
720  double beta;
721  double last_elem;
722  int i;
723 
724  // b_alg = FLA_Obj_length( T );
725  int b_alg = m_TS;
726 
727  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
728  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
729  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
730  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
731  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
732  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
733  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
734  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
735  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
736  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
737  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
738  double* buff_al = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
739  double* buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
740  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
741  double* buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
742  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
743  double* buff_d = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
744  double* buff_e = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
745  double* buff_f = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
746  double* buff_g = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
747  int inc_w = 1;
748  int inc_al = 1;
749  int inc_ap = 1;
750  int inc_u = 1;
751  int inc_up = 1;
752  int inc_v = 1;
753  int inc_d = 1;
754  int inc_e = 1;
755  int inc_f = 1;
756  int inc_g = 1;
757 
758  // FLA_Set( FLA_ZERO, Y );
759  // FLA_Set( FLA_ZERO, Z );
760  bl1_dsetm( n_A,
761  b_alg,
762  buff_0,
763  buff_Y, rs_Y, cs_Y );
764  bl1_dsetm( m_A,
765  b_alg,
766  buff_0,
767  buff_Z, rs_Z, cs_Z );
768 
769  for ( i = 0; i < b_alg; ++i )
770  {
771  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
772  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
773  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
774  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
775  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
776  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
777  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
778  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
779 
780  double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
781  double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
782  double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
783 
784  double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
785  double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
786  double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
787 
788  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
789  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
790 
791  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
792  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
793 
794  double* w21 = buff_w + (i+1)*inc_w;
795 
796  double* a22l = buff_al + (i+1)*inc_al;
797 
798  double* a12p = buff_ap + (i+1)*inc_ap;
799 
800  double* u21 = buff_u + (i+1)*inc_u;
801 
802  double* u21p = buff_up + (i+1)*inc_up;
803 
804  double* v21 = buff_v + (i+1)*inc_v;
805 
806  double* d0 = buff_d + (0 )*inc_d;
807 
808  double* e0 = buff_e + (0 )*inc_e;
809 
810  double* f0 = buff_f + (0 )*inc_f;
811 
812  double* g0 = buff_g + (0 )*inc_g;
813 
814  double* a12p_t = a12p + (0 )*inc_ap;
815  double* a12p_b = a12p + (1 )*inc_ap;
816 
817  double* v21_t = v21 + (0 )*inc_v;
818  double* v21_b = v21 + (1 )*inc_v;
819 
820  double* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
821 
822  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
823  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
824 
825  double* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
826 
827  double* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
828 
829  double* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
830 
831  double* ABL = a10t;
832  double* ZBL = z10t;
833 
834  double* a2 = alpha11;
835 
836  int m_ahead = m_A - i - 1;
837  int n_ahead = n_A - i - 1;
838  int m_behind = i;
839  int n_behind = i;
840 
841  /*------------------------------------------------------------*/
842 
843  if ( m_behind > 0 )
844  {
845  // FLA_Copy( a01_b, last_elem );
846  // FLA_Set( FLA_ONE, a01_b );
847  last_elem = *a01_b;
848  *a01_b = *buff_1;
849  }
850 
851  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
852  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
855  m_ahead + 1,
856  n_behind,
857  buff_m1,
858  ABL, rs_A, cs_A,
859  y10t, cs_Y,
860  buff_1,
861  a2, rs_A );
864  m_ahead + 1,
865  n_behind,
866  buff_m1,
867  ZBL, rs_Z, cs_Z,
868  a01, rs_A,
869  buff_1,
870  a2, rs_A );
871 
872  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
873  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
876  n_ahead,
877  n_behind,
878  buff_m1,
879  Y20, rs_Y, cs_Y,
880  a10t, cs_A,
881  buff_1,
882  a12t, cs_A );
885  m_behind,
886  n_ahead,
887  buff_m1,
888  A02, rs_A, cs_A,
889  z10t, cs_Z,
890  buff_1,
891  a12t, cs_A );
892 
893  if ( m_behind > 0 )
894  {
895  // FLA_Copy( last_elem, a01_b );
896  *a01_b = last_elem;
897  }
898 
899  // FLA_Househ2_UT( FLA_LEFT,
900  // alpha11,
901  // a21, tau11 );
902  // FLA_Copy( a21, u21p );
903  FLA_Househ2_UT_l_opd( m_ahead,
904  alpha11,
905  a21, rs_A,
906  tau11 );
908  m_ahead,
909  a21, rs_A,
910  u21p, inc_up );
911 
912  if ( n_ahead > 0 )
913  {
914  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
915  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
916  bl1_ddiv3( buff_m1, tau11, &minus_inv_tau11 );
917 
918  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
919  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
921  n_ahead,
922  a12t, cs_A,
923  a12p, inc_ap );
925  n_ahead,
926  &minus_inv_tau11,
927  a12t, cs_A,
928  a12p, inc_ap );
929 
930  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
931  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
934  m_ahead,
935  n_behind,
936  buff_1,
937  A20, rs_A, cs_A,
938  u21p, inc_up,
939  buff_0,
940  d0, inc_d );
943  m_ahead,
944  n_behind,
945  buff_1,
946  Z20, rs_Z, cs_Z,
947  u21p, inc_up,
948  buff_0,
949  e0, inc_e );
950 
951  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
952  // FLA_Axpy( FLA_ONE, d0, t01 );
954  n_behind,
955  a10t, cs_A,
956  t01, rs_T );
958  n_behind,
959  buff_1,
960  d0, inc_d,
961  t01, rs_T );
962 
963  // FLA_Set( FLA_ZERO, y21 );
964  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
965  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
966  bl1_dsetv( n_ahead,
967  buff_0,
968  y21, rs_Y );
971  n_ahead,
972  n_behind,
973  buff_m1,
974  Y20, rs_Y, cs_Y,
975  d0, inc_d,
976  buff_1,
977  y21, rs_Y );
980  m_behind,
981  n_ahead,
982  buff_m1,
983  A02, rs_A, cs_A,
984  e0, inc_e,
985  buff_1,
986  y21, rs_Y );
987 
988  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
991  m_ahead,
992  n_ahead,
993  buff_1,
994  A22, rs_A, cs_A,
995  u21p, inc_up,
996  buff_1,
997  y21, rs_Y );
998 
999  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1001  n_ahead,
1002  &minus_inv_tau11,
1003  y21, rs_Y,
1004  a12p, inc_ap );
1005 
1006  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1009  m_ahead,
1010  n_ahead,
1011  buff_1,
1012  A22, rs_A, cs_A,
1013  a12p, inc_ap,
1014  buff_0,
1015  w21, inc_w );
1016 
1017  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
1018  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
1021  n_ahead,
1022  n_behind,
1023  buff_1,
1024  Y20, rs_Y, cs_Y,
1025  a12p, inc_ap,
1026  buff_0,
1027  f0, inc_f );
1030  m_behind,
1031  n_ahead,
1032  buff_1,
1033  A02, rs_A, cs_A,
1034  a12p, inc_ap,
1035  buff_0,
1036  g0, inc_g );
1037 
1038  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
1039  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
1042  m_ahead,
1043  n_behind,
1044  buff_m1,
1045  A20, rs_A, cs_A,
1046  f0, inc_f,
1047  buff_1,
1048  w21, inc_w );
1051  m_ahead,
1052  n_behind,
1053  buff_m1,
1054  Z20, rs_Z, cs_Z,
1055  g0, inc_g,
1056  buff_1,
1057  w21, inc_w );
1058 
1059  // FLA_Copy( A22_l, a22l );
1060  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
1061  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
1063  m_ahead,
1064  A22_l, rs_A,
1065  a22l, inc_al );
1068  m_ahead,
1069  n_behind,
1070  buff_m1,
1071  A20, rs_A, cs_A,
1072  Y20_t, cs_Y,
1073  buff_1,
1074  a22l, inc_al );
1077  m_ahead,
1078  n_behind,
1079  buff_m1,
1080  Z20, rs_Z, cs_Z,
1081  A02_l, rs_A,
1082  buff_1,
1083  a22l, inc_al );
1084 
1085  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1087  n_ahead,
1088  buff_1,
1089  a12t, cs_A,
1090  y21, rs_Y );
1091 
1092  // FLA_Househ2s_UT( FLA_RIGHT,
1093  // a12p_t,
1094  // a12p_b,
1095  // alpha12, psi11_minus_alpha12, sigma11 );
1096  FLA_Househ2s_UT_r_opd( n_ahead - 1,
1097  a12p_t,
1098  a12p_b, inc_ap,
1099  &alpha12,
1100  &psi11_minus_alpha12,
1101  sigma11 );
1102 
1103  // FLA_Copy( a12p, v21 );
1104  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1105  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1106  // FLA_Conjugate( v21_b );
1108  n_ahead,
1109  a12p, inc_ap,
1110  v21, inc_v );
1111  bl1_dmult4( buff_m1, &alpha12, v21_t, v21_t );
1113  n_ahead,
1114  &psi11_minus_alpha12,
1115  v21, inc_v );
1116  bl1_dconjv( n_ahead - 1,
1117  v21_b, inc_v );
1118 
1119  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1120  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1121  bl1_dcopyconj( &alpha12, &minus_conj_alpha12 );
1122  bl1_dneg1( &minus_conj_alpha12 );
1123 
1124  // FLA_Copy( g0, s01 );
1125  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
1126  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
1128  n_behind,
1129  g0, inc_g,
1130  s01, rs_S );
1132  n_behind,
1133  &minus_conj_alpha12,
1134  A02_l, rs_A,
1135  s01, rs_S );
1137  n_behind,
1138  &psi11_minus_alpha12,
1139  s01, rs_S );
1140 
1141  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1142  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1143  *a12t_l = alpha12;
1145  n_ahead - 1,
1146  v21_b, inc_v,
1147  a12t_r, cs_A );
1148  }
1149 
1150  // FLA_Copy( u21p, u21 );
1152  m_ahead,
1153  u21p, inc_up,
1154  u21, inc_u );
1155 
1156  if ( n_ahead > 0 )
1157  {
1158  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1159  // FLA_Scal( FLA_MINUS_ONE, beta );
1160  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1162  n_ahead,
1163  y21, rs_Y,
1164  v21, inc_v,
1165  &beta );
1166  bl1_dscals( &minus_inv_tau11, &beta );
1167 
1168  // FLA_Copy( w21, z21 );
1169  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
1170  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1171  // FLA_Axpy( beta, u21, z21 );
1173  m_ahead,
1174  w21, inc_w,
1175  z21, rs_Z );
1177  m_ahead,
1178  &minus_conj_alpha12,
1179  a22l, inc_al,
1180  z21, rs_Z );
1182  m_ahead,
1183  &psi11_minus_alpha12,
1184  z21, rs_Z );
1186  m_ahead,
1187  &beta,
1188  u21, inc_u,
1189  z21, rs_Z );
1190 
1191  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1192  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1194  n_ahead,
1195  tau11,
1196  y21, rs_Y );
1198  m_ahead,
1199  sigma11,
1200  z21, rs_Z );
1201  }
1202  else // if ( n_ahead == 0 )
1203  {
1204  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1205  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1207  n_behind,
1208  a10t, cs_A,
1209  t01, rs_T );
1212  m_ahead,
1213  n_behind,
1214  buff_1,
1215  A20, rs_A, cs_A,
1216  u21, inc_u,
1217  buff_1,
1218  t01, rs_T );
1219  }
1220 
1221  /*------------------------------------------------------------*/
1222 
1223  }
1224 
1225  // FLA_Obj_free( &w );
1226  // FLA_Obj_free( &al );
1227  // FLA_Obj_free( &ap );
1228  // FLA_Obj_free( &u );
1229  // FLA_Obj_free( &up );
1230  // FLA_Obj_free( &v );
1231  // FLA_Obj_free( &d );
1232  // FLA_Obj_free( &e );
1233  // FLA_Obj_free( &f );
1234  // FLA_Obj_free( &g );
1235  FLA_free( buff_w );
1236  FLA_free( buff_al );
1237  FLA_free( buff_ap );
1238  FLA_free( buff_u );
1239  FLA_free( buff_up );
1240  FLA_free( buff_v );
1241  FLA_free( buff_d );
1242  FLA_free( buff_e );
1243  FLA_free( buff_f );
1244  FLA_free( buff_g );
1245 
1246  return FLA_SUCCESS;
1247 }
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
FLA_Error FLA_Househ2s_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *alpha, double *chi_1_minus_alpha, double *tau)
Definition: FLA_Househ2s_UT.c:572
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
void bl1_dconjv(int m, double *x, int incx)
Definition: bl1_conjv.c:18
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition: bl1_setv.c:39
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition: bl1_setm.c:45

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), bl1_dsetm(), bl1_dsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

◆ FLA_Bidiag_UT_u_step_ops_var4()

FLA_Error FLA_Bidiag_UT_u_step_ops_var4 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)
163 {
164  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
165  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
166  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
167 
168  float alpha12;
169  float minus_conj_alpha12;
170  float psi11_minus_alpha12;
171  float minus_inv_tau11;
172  float beta;
173  float last_elem;
174  int i;
175 
176  // b_alg = FLA_Obj_length( T );
177  int b_alg = m_TS;
178 
179  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
180  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
181  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
182  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
183  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
184  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
185  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
186  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
187  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
188  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
189  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
190  float* buff_al = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
191  float* buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
192  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
193  float* buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
194  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
195  float* buff_d = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
196  float* buff_e = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
197  float* buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
198  float* buff_g = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
199  int inc_w = 1;
200  int inc_al = 1;
201  int inc_ap = 1;
202  int inc_u = 1;
203  int inc_up = 1;
204  int inc_v = 1;
205  int inc_d = 1;
206  int inc_e = 1;
207  int inc_f = 1;
208  int inc_g = 1;
209 
210  // FLA_Set( FLA_ZERO, Y );
211  // FLA_Set( FLA_ZERO, Z );
212  bl1_ssetm( n_A,
213  b_alg,
214  buff_0,
215  buff_Y, rs_Y, cs_Y );
216  bl1_ssetm( m_A,
217  b_alg,
218  buff_0,
219  buff_Z, rs_Z, cs_Z );
220 
221  for ( i = 0; i < b_alg; ++i )
222  {
223  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
224  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
225  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
226  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
227  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
228  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
229  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
230  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
231 
232  float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
233  float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
234  float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
235 
236  float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
237  float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
238  float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
239 
240  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
241  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
242 
243  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
244  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
245 
246  float* w21 = buff_w + (i+1)*inc_w;
247 
248  float* a22l = buff_al + (i+1)*inc_al;
249 
250  float* a12p = buff_ap + (i+1)*inc_ap;
251 
252  float* u21 = buff_u + (i+1)*inc_u;
253 
254  float* u21p = buff_up + (i+1)*inc_up;
255 
256  float* v21 = buff_v + (i+1)*inc_v;
257 
258  float* d0 = buff_d + (0 )*inc_d;
259 
260  float* e0 = buff_e + (0 )*inc_e;
261 
262  float* f0 = buff_f + (0 )*inc_f;
263 
264  float* g0 = buff_g + (0 )*inc_g;
265 
266  float* a12p_t = a12p + (0 )*inc_ap;
267  float* a12p_b = a12p + (1 )*inc_ap;
268 
269  float* v21_t = v21 + (0 )*inc_v;
270  float* v21_b = v21 + (1 )*inc_v;
271 
272  float* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
273 
274  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
275  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
276 
277  float* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
278 
279  float* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
280 
281  float* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
282 
283  float* ABL = a10t;
284  float* ZBL = z10t;
285 
286  float* a2 = alpha11;
287 
288  int m_ahead = m_A - i - 1;
289  int n_ahead = n_A - i - 1;
290  int m_behind = i;
291  int n_behind = i;
292 
293  /*------------------------------------------------------------*/
294 
295  if ( m_behind > 0 )
296  {
297  // FLA_Copy( a01_b, last_elem );
298  // FLA_Set( FLA_ONE, a01_b );
299  last_elem = *a01_b;
300  *a01_b = *buff_1;
301  }
302 
303  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
304  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
307  m_ahead + 1,
308  n_behind,
309  buff_m1,
310  ABL, rs_A, cs_A,
311  y10t, cs_Y,
312  buff_1,
313  a2, rs_A );
316  m_ahead + 1,
317  n_behind,
318  buff_m1,
319  ZBL, rs_Z, cs_Z,
320  a01, rs_A,
321  buff_1,
322  a2, rs_A );
323 
324  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
325  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
328  n_ahead,
329  n_behind,
330  buff_m1,
331  Y20, rs_Y, cs_Y,
332  a10t, cs_A,
333  buff_1,
334  a12t, cs_A );
337  m_behind,
338  n_ahead,
339  buff_m1,
340  A02, rs_A, cs_A,
341  z10t, cs_Z,
342  buff_1,
343  a12t, cs_A );
344 
345  if ( m_behind > 0 )
346  {
347  // FLA_Copy( last_elem, a01_b );
348  *a01_b = last_elem;
349  }
350 
351  // FLA_Househ2_UT( FLA_LEFT,
352  // alpha11,
353  // a21, tau11 );
354  // FLA_Copy( a21, u21p );
355  FLA_Househ2_UT_l_ops( m_ahead,
356  alpha11,
357  a21, rs_A,
358  tau11 );
360  m_ahead,
361  a21, rs_A,
362  u21p, inc_up );
363 
364  if ( n_ahead > 0 )
365  {
366  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
367  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
368  bl1_sdiv3( buff_m1, tau11, &minus_inv_tau11 );
369 
370  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
371  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
373  n_ahead,
374  a12t, cs_A,
375  a12p, inc_ap );
377  n_ahead,
378  &minus_inv_tau11,
379  a12t, cs_A,
380  a12p, inc_ap );
381 
382  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
383  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
386  m_ahead,
387  n_behind,
388  buff_1,
389  A20, rs_A, cs_A,
390  u21p, inc_up,
391  buff_0,
392  d0, inc_d );
395  m_ahead,
396  n_behind,
397  buff_1,
398  Z20, rs_Z, cs_Z,
399  u21p, inc_up,
400  buff_0,
401  e0, inc_e );
402 
403  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
404  // FLA_Axpy( FLA_ONE, d0, t01 );
406  n_behind,
407  a10t, cs_A,
408  t01, rs_T );
410  n_behind,
411  buff_1,
412  d0, inc_d,
413  t01, rs_T );
414 
415  // FLA_Set( FLA_ZERO, y21 );
416  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
417  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
418  bl1_ssetv( n_ahead,
419  buff_0,
420  y21, rs_Y );
423  n_ahead,
424  n_behind,
425  buff_m1,
426  Y20, rs_Y, cs_Y,
427  d0, inc_d,
428  buff_1,
429  y21, rs_Y );
432  m_behind,
433  n_ahead,
434  buff_m1,
435  A02, rs_A, cs_A,
436  e0, inc_e,
437  buff_1,
438  y21, rs_Y );
439 
440  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
443  m_ahead,
444  n_ahead,
445  buff_1,
446  A22, rs_A, cs_A,
447  u21p, inc_up,
448  buff_1,
449  y21, rs_Y );
450 
451  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
453  n_ahead,
454  &minus_inv_tau11,
455  y21, rs_Y,
456  a12p, inc_ap );
457 
458  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
461  m_ahead,
462  n_ahead,
463  buff_1,
464  A22, rs_A, cs_A,
465  a12p, inc_ap,
466  buff_0,
467  w21, inc_w );
468 
469  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
470  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
473  n_ahead,
474  n_behind,
475  buff_1,
476  Y20, rs_Y, cs_Y,
477  a12p, inc_ap,
478  buff_0,
479  f0, inc_f );
482  m_behind,
483  n_ahead,
484  buff_1,
485  A02, rs_A, cs_A,
486  a12p, inc_ap,
487  buff_0,
488  g0, inc_g );
489 
490  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
491  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
494  m_ahead,
495  n_behind,
496  buff_m1,
497  A20, rs_A, cs_A,
498  f0, inc_f,
499  buff_1,
500  w21, inc_w );
503  m_ahead,
504  n_behind,
505  buff_m1,
506  Z20, rs_Z, cs_Z,
507  g0, inc_g,
508  buff_1,
509  w21, inc_w );
510 
511  // FLA_Copy( A22_l, a22l );
512  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
513  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
515  m_ahead,
516  A22_l, rs_A,
517  a22l, inc_al );
520  m_ahead,
521  n_behind,
522  buff_m1,
523  A20, rs_A, cs_A,
524  Y20_t, cs_Y,
525  buff_1,
526  a22l, inc_al );
529  m_ahead,
530  n_behind,
531  buff_m1,
532  Z20, rs_Z, cs_Z,
533  A02_l, rs_A,
534  buff_1,
535  a22l, inc_al );
536 
537  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
539  n_ahead,
540  buff_1,
541  a12t, cs_A,
542  y21, rs_Y );
543 
544  // FLA_Househ2s_UT( FLA_RIGHT,
545  // a12p_t,
546  // a12p_b,
547  // alpha12, psi11_minus_alpha12, sigma11 );
548  FLA_Househ2s_UT_r_ops( n_ahead - 1,
549  a12p_t,
550  a12p_b, inc_ap,
551  &alpha12,
552  &psi11_minus_alpha12,
553  sigma11 );
554 
555  // FLA_Copy( a12p, v21 );
556  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
557  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
558  // FLA_Conjugate( v21_b );
560  n_ahead,
561  a12p, inc_ap,
562  v21, inc_v );
563  bl1_smult4( buff_m1, &alpha12, v21_t, v21_t );
565  n_ahead,
566  &psi11_minus_alpha12,
567  v21, inc_v );
568  bl1_sconjv( n_ahead - 1,
569  v21_b, inc_v );
570 
571  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
572  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
573  bl1_scopyconj( &alpha12, &minus_conj_alpha12 );
574  bl1_sneg1( &minus_conj_alpha12 );
575 
576  // FLA_Copy( g0, s01 );
577  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
578  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
580  n_behind,
581  g0, inc_g,
582  s01, rs_S );
584  n_behind,
585  &minus_conj_alpha12,
586  A02_l, rs_A,
587  s01, rs_S );
589  n_behind,
590  &psi11_minus_alpha12,
591  s01, rs_S );
592 
593  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
594  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
595  *a12t_l = alpha12;
597  n_ahead - 1,
598  v21_b, inc_v,
599  a12t_r, cs_A );
600  }
601 
602  // FLA_Copy( u21p, u21 );
604  m_ahead,
605  u21p, inc_up,
606  u21, inc_u );
607 
608  if ( n_ahead > 0 )
609  {
610  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
611  // FLA_Scal( FLA_MINUS_ONE, beta );
612  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
614  n_ahead,
615  y21, rs_Y,
616  v21, inc_v,
617  &beta );
618  bl1_sscals( &minus_inv_tau11, &beta );
619 
620  // FLA_Copy( w21, z21 );
621  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
622  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
623  // FLA_Axpy( beta, u21, z21 );
625  m_ahead,
626  w21, inc_w,
627  z21, rs_Z );
629  m_ahead,
630  &minus_conj_alpha12,
631  a22l, inc_al,
632  z21, rs_Z );
634  m_ahead,
635  &psi11_minus_alpha12,
636  z21, rs_Z );
638  m_ahead,
639  &beta,
640  u21, inc_u,
641  z21, rs_Z );
642 
643  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
644  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
646  n_ahead,
647  tau11,
648  y21, rs_Y );
650  m_ahead,
651  sigma11,
652  z21, rs_Z );
653  }
654  else // if ( n_ahead == 0 )
655  {
656  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
657  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
659  n_behind,
660  a10t, cs_A,
661  t01, rs_T );
664  m_ahead,
665  n_behind,
666  buff_1,
667  A20, rs_A, cs_A,
668  u21, inc_u,
669  buff_1,
670  t01, rs_T );
671  }
672 
673  /*------------------------------------------------------------*/
674 
675  }
676 
677  // FLA_Obj_free( &w );
678  // FLA_Obj_free( &al );
679  // FLA_Obj_free( &ap );
680  // FLA_Obj_free( &u );
681  // FLA_Obj_free( &up );
682  // FLA_Obj_free( &v );
683  // FLA_Obj_free( &d );
684  // FLA_Obj_free( &e );
685  // FLA_Obj_free( &f );
686  // FLA_Obj_free( &g );
687  FLA_free( buff_w );
688  FLA_free( buff_al );
689  FLA_free( buff_ap );
690  FLA_free( buff_u );
691  FLA_free( buff_up );
692  FLA_free( buff_v );
693  FLA_free( buff_d );
694  FLA_free( buff_e );
695  FLA_free( buff_f );
696  FLA_free( buff_g );
697 
698  return FLA_SUCCESS;
699 }
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
FLA_Error FLA_Househ2s_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *alpha, float *chi_1_minus_alpha, float *tau)
Definition: FLA_Househ2s_UT.c:555
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sconjv(int m, float *x, int incx)
Definition: bl1_conjv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition: bl1_setm.c:29
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition: bl1_setv.c:26

References bl1_saxpyv(), bl1_sconjv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), bl1_ssetm(), bl1_ssetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().

◆ FLA_Bidiag_UT_u_step_opt_var4()

FLA_Error FLA_Bidiag_UT_u_step_opt_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T,
FLA_Obj  S 
)
36 {
37  FLA_Datatype datatype;
38  int m_A, n_A, m_TS;
39  int rs_A, cs_A;
40  int rs_Y, cs_Y;
41  int rs_Z, cs_Z;
42  int rs_T, cs_T;
43  int rs_S, cs_S;
44 
45  datatype = FLA_Obj_datatype( A );
46 
47  m_A = FLA_Obj_length( A );
48  n_A = FLA_Obj_width( A );
49  m_TS = FLA_Obj_length( T );
50 
51  rs_A = FLA_Obj_row_stride( A );
52  cs_A = FLA_Obj_col_stride( A );
53 
54  rs_Y = FLA_Obj_row_stride( Y );
55  cs_Y = FLA_Obj_col_stride( Y );
56 
57  rs_Z = FLA_Obj_row_stride( Z );
58  cs_Z = FLA_Obj_col_stride( Z );
59 
60  rs_T = FLA_Obj_row_stride( T );
61  cs_T = FLA_Obj_col_stride( T );
62 
63  rs_S = FLA_Obj_row_stride( S );
64  cs_S = FLA_Obj_col_stride( S );
65 
66 
67  switch ( datatype )
68  {
69  case FLA_FLOAT:
70  {
71  float* buff_A = FLA_FLOAT_PTR( A );
72  float* buff_Y = FLA_FLOAT_PTR( Y );
73  float* buff_Z = FLA_FLOAT_PTR( Z );
74  float* buff_T = FLA_FLOAT_PTR( T );
75  float* buff_S = FLA_FLOAT_PTR( S );
76 
78  n_A,
79  m_TS,
80  buff_A, rs_A, cs_A,
81  buff_Y, rs_Y, cs_Y,
82  buff_Z, rs_Z, cs_Z,
83  buff_T, rs_T, cs_T,
84  buff_S, rs_S, cs_S );
85 
86  break;
87  }
88 
89  case FLA_DOUBLE:
90  {
91  double* buff_A = FLA_DOUBLE_PTR( A );
92  double* buff_Y = FLA_DOUBLE_PTR( Y );
93  double* buff_Z = FLA_DOUBLE_PTR( Z );
94  double* buff_T = FLA_DOUBLE_PTR( T );
95  double* buff_S = FLA_DOUBLE_PTR( S );
96 
98  n_A,
99  m_TS,
100  buff_A, rs_A, cs_A,
101  buff_Y, rs_Y, cs_Y,
102  buff_Z, rs_Z, cs_Z,
103  buff_T, rs_T, cs_T,
104  buff_S, rs_S, cs_S );
105 
106  break;
107  }
108 
109  case FLA_COMPLEX:
110  {
111  scomplex* buff_A = FLA_COMPLEX_PTR( A );
112  scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
113  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
114  scomplex* buff_T = FLA_COMPLEX_PTR( T );
115  scomplex* buff_S = FLA_COMPLEX_PTR( S );
116 
118  n_A,
119  m_TS,
120  buff_A, rs_A, cs_A,
121  buff_Y, rs_Y, cs_Y,
122  buff_Z, rs_Z, cs_Z,
123  buff_T, rs_T, cs_T,
124  buff_S, rs_S, cs_S );
125 
126  break;
127  }
128 
129  case FLA_DOUBLE_COMPLEX:
130  {
131  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
132  dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
133  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
134  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
135  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
136 
138  n_A,
139  m_TS,
140  buff_A, rs_A, cs_A,
141  buff_Y, rs_Y, cs_Y,
142  buff_Z, rs_Z, cs_Z,
143  buff_T, rs_T, cs_T,
144  buff_S, rs_S, cs_S );
145 
146  break;
147  }
148  }
149 
150  return FLA_SUCCESS;
151 }
FLA_Error FLA_Bidiag_UT_u_step_ops_var4(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var4.c:155
FLA_Error FLA_Bidiag_UT_u_step_opz_var4(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var4.c:1799
FLA_Error FLA_Bidiag_UT_u_step_opd_var4(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var4.c:703
FLA_Error FLA_Bidiag_UT_u_step_opc_var4(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var4.c:1251
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
Definition: blis_type_defs.h:138

References FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blk_var4(), and FLA_Bidiag_UT_u_opt_var4().

◆ FLA_Bidiag_UT_u_step_opz_var4()

FLA_Error FLA_Bidiag_UT_u_step_opz_var4 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
1807 {
1808  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1809  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1810  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1811 
1812  dcomplex alpha12;
1813  dcomplex minus_conj_alpha12;
1814  dcomplex psi11_minus_alpha12;
1815  dcomplex minus_inv_tau11;
1816  dcomplex beta;
1817  dcomplex last_elem;
1818  int i;
1819 
1820  // b_alg = FLA_Obj_length( T );
1821  int b_alg = m_TS;
1822 
1823  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1824  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &al );
1825  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1826  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1827  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1828  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1829  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &d );
1830  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &e );
1831  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1832  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &g );
1833  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1834  dcomplex* buff_al = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1835  dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1836  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1837  dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1838  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1839  dcomplex* buff_d = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1840  dcomplex* buff_e = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1841  dcomplex* buff_f = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1842  dcomplex* buff_g = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1843  int inc_w = 1;
1844  int inc_al = 1;
1845  int inc_ap = 1;
1846  int inc_u = 1;
1847  int inc_up = 1;
1848  int inc_v = 1;
1849  int inc_d = 1;
1850  int inc_e = 1;
1851  int inc_f = 1;
1852  int inc_g = 1;
1853 
1854  // FLA_Set( FLA_ZERO, Y );
1855  // FLA_Set( FLA_ZERO, Z );
1856  bl1_zsetm( n_A,
1857  b_alg,
1858  buff_0,
1859  buff_Y, rs_Y, cs_Y );
1860  bl1_zsetm( m_A,
1861  b_alg,
1862  buff_0,
1863  buff_Z, rs_Z, cs_Z );
1864 
1865  for ( i = 0; i < b_alg; ++i )
1866  {
1867  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1868  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1869  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
1870  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1871  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1872  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1873  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1874  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1875 
1876  dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1877  dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1878  dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1879 
1880  dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1881  dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1882  dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1883 
1884  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1885  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1886 
1887  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1888  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1889 
1890  dcomplex* w21 = buff_w + (i+1)*inc_w;
1891 
1892  dcomplex* a22l = buff_al + (i+1)*inc_al;
1893 
1894  dcomplex* a12p = buff_ap + (i+1)*inc_ap;
1895 
1896  dcomplex* u21 = buff_u + (i+1)*inc_u;
1897 
1898  dcomplex* u21p = buff_up + (i+1)*inc_up;
1899 
1900  dcomplex* v21 = buff_v + (i+1)*inc_v;
1901 
1902  dcomplex* d0 = buff_d + (0 )*inc_d;
1903 
1904  dcomplex* e0 = buff_e + (0 )*inc_e;
1905 
1906  dcomplex* f0 = buff_f + (0 )*inc_f;
1907 
1908  dcomplex* g0 = buff_g + (0 )*inc_g;
1909 
1910  dcomplex* a12p_t = a12p + (0 )*inc_ap;
1911  dcomplex* a12p_b = a12p + (1 )*inc_ap;
1912 
1913  dcomplex* v21_t = v21 + (0 )*inc_v;
1914  dcomplex* v21_b = v21 + (1 )*inc_v;
1915 
1916  dcomplex* a01_b = a01 + (0 )*cs_A + (i-1)*rs_A;
1917 
1918  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1919  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1920 
1921  dcomplex* A02_l = A02 + (0 )*cs_A + (0 )*rs_A;
1922 
1923  dcomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1924 
1925  dcomplex* Y20_t = Y20 + (0 )*cs_Y + (0 )*rs_Y;
1926 
1927  dcomplex* ABL = a10t;
1928  dcomplex* ZBL = z10t;
1929 
1930  dcomplex* a2 = alpha11;
1931 
1932  int m_ahead = m_A - i - 1;
1933  int n_ahead = n_A - i - 1;
1934  int m_behind = i;
1935  int n_behind = i;
1936 
1937  /*------------------------------------------------------------*/
1938 
1939  if ( m_behind > 0 )
1940  {
1941  // FLA_Copy( a01_b, last_elem );
1942  // FLA_Set( FLA_ONE, a01_b );
1943  last_elem = *a01_b;
1944  *a01_b = *buff_1;
1945  }
1946 
1947  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1948  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a01, FLA_ONE, a2 );
1951  m_ahead + 1,
1952  n_behind,
1953  buff_m1,
1954  ABL, rs_A, cs_A,
1955  y10t, cs_Y,
1956  buff_1,
1957  a2, rs_A );
1960  m_ahead + 1,
1961  n_behind,
1962  buff_m1,
1963  ZBL, rs_Z, cs_Z,
1964  a01, rs_A,
1965  buff_1,
1966  a2, rs_A );
1967 
1968  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1969  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, A02, z10t, FLA_ONE, a12t );
1972  n_ahead,
1973  n_behind,
1974  buff_m1,
1975  Y20, rs_Y, cs_Y,
1976  a10t, cs_A,
1977  buff_1,
1978  a12t, cs_A );
1981  m_behind,
1982  n_ahead,
1983  buff_m1,
1984  A02, rs_A, cs_A,
1985  z10t, cs_Z,
1986  buff_1,
1987  a12t, cs_A );
1988 
1989  if ( m_behind > 0 )
1990  {
1991  // FLA_Copy( last_elem, a01_b );
1992  *a01_b = last_elem;
1993  }
1994 
1995  // FLA_Househ2_UT( FLA_LEFT,
1996  // alpha11,
1997  // a21, tau11 );
1998  // FLA_Copy( a21, u21p );
1999  FLA_Househ2_UT_l_opz( m_ahead,
2000  alpha11,
2001  a21, rs_A,
2002  tau11 );
2004  m_ahead,
2005  a21, rs_A,
2006  u21p, inc_up );
2007 
2008  if ( n_ahead > 0 )
2009  {
2010  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
2011  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
2012  bl1_zdiv3( buff_m1, tau11, &minus_inv_tau11 );
2013 
2014  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
2015  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
2017  n_ahead,
2018  a12t, cs_A,
2019  a12p, inc_ap );
2021  n_ahead,
2022  &minus_inv_tau11,
2023  a12t, cs_A,
2024  a12p, inc_ap );
2025 
2026  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21p, FLA_ZERO, d0 );
2027  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, u21p, FLA_ZERO, e0 );
2030  m_ahead,
2031  n_behind,
2032  buff_1,
2033  A20, rs_A, cs_A,
2034  u21p, inc_up,
2035  buff_0,
2036  d0, inc_d );
2039  m_ahead,
2040  n_behind,
2041  buff_1,
2042  Z20, rs_Z, cs_Z,
2043  u21p, inc_up,
2044  buff_0,
2045  e0, inc_e );
2046 
2047  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
2048  // FLA_Axpy( FLA_ONE, d0, t01 );
2050  n_behind,
2051  a10t, cs_A,
2052  t01, rs_T );
2054  n_behind,
2055  buff_1,
2056  d0, inc_d,
2057  t01, rs_T );
2058 
2059  // FLA_Set( FLA_ZERO, y21 );
2060  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
2061  // FLA_Gemv( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, e0, FLA_ONE, y21 );
2062  bl1_zsetv( n_ahead,
2063  buff_0,
2064  y21, rs_Y );
2067  n_ahead,
2068  n_behind,
2069  buff_m1,
2070  Y20, rs_Y, cs_Y,
2071  d0, inc_d,
2072  buff_1,
2073  y21, rs_Y );
2076  m_behind,
2077  n_ahead,
2078  buff_m1,
2079  A02, rs_A, cs_A,
2080  e0, inc_e,
2081  buff_1,
2082  y21, rs_Y );
2083 
2084  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, u21p, FLA_ONE, y21 );
2087  m_ahead,
2088  n_ahead,
2089  buff_1,
2090  A22, rs_A, cs_A,
2091  u21p, inc_up,
2092  buff_1,
2093  y21, rs_Y );
2094 
2095  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
2097  n_ahead,
2098  &minus_inv_tau11,
2099  y21, rs_Y,
2100  a12p, inc_ap );
2101 
2102  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
2105  m_ahead,
2106  n_ahead,
2107  buff_1,
2108  A22, rs_A, cs_A,
2109  a12p, inc_ap,
2110  buff_0,
2111  w21, inc_w );
2112 
2113  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, Y20, a12p, FLA_ZERO, f0 );
2114  // FLA_Gemvc( FLA_CONJ_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, a12p, FLA_ZERO, g0 );
2117  n_ahead,
2118  n_behind,
2119  buff_1,
2120  Y20, rs_Y, cs_Y,
2121  a12p, inc_ap,
2122  buff_0,
2123  f0, inc_f );
2126  m_behind,
2127  n_ahead,
2128  buff_1,
2129  A02, rs_A, cs_A,
2130  a12p, inc_ap,
2131  buff_0,
2132  g0, inc_g );
2133 
2134  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, w21 );
2135  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, g0, FLA_ONE, w21 );
2138  m_ahead,
2139  n_behind,
2140  buff_m1,
2141  A20, rs_A, cs_A,
2142  f0, inc_f,
2143  buff_1,
2144  w21, inc_w );
2147  m_ahead,
2148  n_behind,
2149  buff_m1,
2150  Z20, rs_Z, cs_Z,
2151  g0, inc_g,
2152  buff_1,
2153  w21, inc_w );
2154 
2155  // FLA_Copy( A22_l, a22l );
2156  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, A20, Y20_t, FLA_ONE, a22l );
2157  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, Z20, A02_l, FLA_ONE, a22l );
2159  m_ahead,
2160  A22_l, rs_A,
2161  a22l, inc_al );
2164  m_ahead,
2165  n_behind,
2166  buff_m1,
2167  A20, rs_A, cs_A,
2168  Y20_t, cs_Y,
2169  buff_1,
2170  a22l, inc_al );
2173  m_ahead,
2174  n_behind,
2175  buff_m1,
2176  Z20, rs_Z, cs_Z,
2177  A02_l, rs_A,
2178  buff_1,
2179  a22l, inc_al );
2180 
2181  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
2183  n_ahead,
2184  buff_1,
2185  a12t, cs_A,
2186  y21, rs_Y );
2187 
2188  // FLA_Househ2s_UT( FLA_RIGHT,
2189  // a12p_t,
2190  // a12p_b,
2191  // alpha12, psi11_minus_alpha12, sigma11 );
2192  FLA_Househ2s_UT_r_opz( n_ahead - 1,
2193  a12p_t,
2194  a12p_b, inc_ap,
2195  &alpha12,
2196  &psi11_minus_alpha12,
2197  sigma11 );
2198 
2199  // FLA_Copy( a12p, v21 );
2200  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
2201  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
2202  // FLA_Conjugate( v21_b );
2204  n_ahead,
2205  a12p, inc_ap,
2206  v21, inc_v );
2207  bl1_zmult4( buff_m1, &alpha12, v21_t, v21_t );
2209  n_ahead,
2210  &psi11_minus_alpha12,
2211  v21, inc_v );
2212  bl1_zconjv( n_ahead - 1,
2213  v21_b, inc_v );
2214 
2215  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
2216  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
2217  bl1_zcopyconj( &alpha12, &minus_conj_alpha12 );
2218  bl1_zneg1( &minus_conj_alpha12 );
2219 
2220  // FLA_Copy( g0, s01 );
2221  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_conj_alpha12, A02_l, s01 );
2222  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, s01 );
2224  n_behind,
2225  g0, inc_g,
2226  s01, rs_S );
2228  n_behind,
2229  &minus_conj_alpha12,
2230  A02_l, rs_A,
2231  s01, rs_S );
2233  n_behind,
2234  &psi11_minus_alpha12,
2235  s01, rs_S );
2236 
2237  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
2238  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
2239  *a12t_l = alpha12;
2241  n_ahead - 1,
2242  v21_b, inc_v,
2243  a12t_r, cs_A );
2244  }
2245 
2246  // FLA_Copy( u21p, u21 );
2248  m_ahead,
2249  u21p, inc_up,
2250  u21, inc_u );
2251 
2252  if ( n_ahead > 0 )
2253  {
2254  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
2255  // FLA_Scal( FLA_MINUS_ONE, beta );
2256  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
2258  n_ahead,
2259  y21, rs_Y,
2260  v21, inc_v,
2261  &beta );
2262  bl1_zscals( &minus_inv_tau11, &beta );
2263 
2264  // FLA_Copy( w21, z21 );
2265  // FLA_Axpy( minus_conj_alpha12, a22l, z21 );
2266  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
2267  // FLA_Axpy( beta, u21, z21 );
2269  m_ahead,
2270  w21, inc_w,
2271  z21, rs_Z );
2273  m_ahead,
2274  &minus_conj_alpha12,
2275  a22l, inc_al,
2276  z21, rs_Z );
2278  m_ahead,
2279  &psi11_minus_alpha12,
2280  z21, rs_Z );
2282  m_ahead,
2283  &beta,
2284  u21, inc_u,
2285  z21, rs_Z );
2286 
2287  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
2288  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
2290  n_ahead,
2291  tau11,
2292  y21, rs_Y );
2294  m_ahead,
2295  sigma11,
2296  z21, rs_Z );
2297  }
2298  else // if ( n_ahead == 0 )
2299  {
2300  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
2301  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
2303  n_behind,
2304  a10t, cs_A,
2305  t01, rs_T );
2308  m_ahead,
2309  n_behind,
2310  buff_1,
2311  A20, rs_A, cs_A,
2312  u21, inc_u,
2313  buff_1,
2314  t01, rs_T );
2315  }
2316 
2317  /*------------------------------------------------------------*/
2318 
2319  }
2320 
2321  // FLA_Obj_free( &w );
2322  // FLA_Obj_free( &al );
2323  // FLA_Obj_free( &ap );
2324  // FLA_Obj_free( &u );
2325  // FLA_Obj_free( &up );
2326  // FLA_Obj_free( &v );
2327  // FLA_Obj_free( &d );
2328  // FLA_Obj_free( &e );
2329  // FLA_Obj_free( &f );
2330  // FLA_Obj_free( &g );
2331  FLA_free( buff_w );
2332  FLA_free( buff_al );
2333  FLA_free( buff_ap );
2334  FLA_free( buff_u );
2335  FLA_free( buff_up );
2336  FLA_free( buff_v );
2337  FLA_free( buff_d );
2338  FLA_free( buff_e );
2339  FLA_free( buff_f );
2340  FLA_free( buff_g );
2341 
2342  return FLA_SUCCESS;
2343 }
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
FLA_Error FLA_Househ2s_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *alpha, dcomplex *chi_1_minus_alpha, dcomplex *tau)
Definition: FLA_Househ2s_UT.c:610
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zconjv(int m, dcomplex *x, int incx)
Definition: bl1_conjv.c:34
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition: bl1_setv.c:66
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), bl1_zscals(), bl1_zsetm(), bl1_zsetv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var4().