libflame  revision_anchor
Functions
FLA_Apply_G_rf_opt_var6.c File Reference

(r)

Functions

FLA_Error FLA_Apply_G_rf_opt_var6 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_Apply_G_rf_opc_var6()

FLA_Error FLA_Apply_G_rf_opc_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
679 {
680  float one = bl1_s1();
681  float zero = bl1_s0();
682  float gamma12;
683  float sigma12;
684  float gamma23;
685  float sigma23;
686  scomplex* a1;
687  scomplex* a2;
688  scomplex* a3;
689  scomplex* g12;
690  scomplex* g23;
691  int i, j, g, k;
692  int nG, nG_app;
693  int n_iter;
694  int n_left;
695  int k_minus_1;
696  int n_fuse;
697  int is_ident12, is_ident23;
698 
699  k_minus_1 = k_G - 1;
700  nG = n_A - 1;
701  n_fuse = 2;
702 
703  // Use the simple variant for nG < (k - 1) or k == 1.
704  if ( nG < k_minus_1 || k_G == 1 )
705  {
707  m_A,
708  n_A,
709  buff_G, rs_G, cs_G,
710  buff_A, rs_A, cs_A );
711  return FLA_SUCCESS;
712  }
713 
714 
715  // Start-up phase.
716 
717  for ( j = 0; j < k_minus_1; ++j )
718  {
719  nG_app = j + 1;
720  n_iter = nG_app / n_fuse;
721  n_left = nG_app % n_fuse;
722 
723  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
724  {
725  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
726  g23 = buff_G + (g )*rs_G + (k )*cs_G;
727  a1 = buff_A + (g - 1)*cs_A;
728  a2 = buff_A + (g )*cs_A;
729  a3 = buff_A + (g + 1)*cs_A;
730 
731  gamma12 = g12->real;
732  sigma12 = g12->imag;
733  gamma23 = g23->real;
734  sigma23 = g23->imag;
735 
736  is_ident12 = ( gamma12 == one && sigma12 == zero );
737  is_ident23 = ( gamma23 == one && sigma23 == zero );
738 
739  if ( !is_ident12 && is_ident23 )
740  {
741  // Apply only to columns 1 and 2.
742 
743  MAC_Apply_G_mx2_opc( m_A,
744  &gamma12,
745  &sigma12,
746  a1, rs_A,
747  a2, rs_A );
748  }
749  else if ( is_ident12 && !is_ident23 )
750  {
751  // Apply only to columns 2 and 3.
752 
753  MAC_Apply_G_mx2_opc( m_A,
754  &gamma23,
755  &sigma23,
756  a2, rs_A,
757  a3, rs_A );
758  }
759  else if ( !is_ident12 && !is_ident23 )
760  {
761  // Apply to all three columns.
762 
763  MAC_Apply_G_mx3b_opc( m_A,
764  &gamma12,
765  &sigma12,
766  &gamma23,
767  &sigma23,
768  a1, rs_A,
769  a2, rs_A,
770  a3, rs_A );
771  }
772  }
773  //for ( k = 0; k < n_left; k += 1, g -= 1 )
774  if ( n_left == 1 )
775  {
776  g23 = buff_G + (g )*rs_G + (k )*cs_G;
777  a2 = buff_A + (g )*cs_A;
778  a3 = buff_A + (g + 1)*cs_A;
779 
780  gamma23 = g23->real;
781  sigma23 = g23->imag;
782 
783  is_ident23 = ( gamma23 == one && sigma23 == zero );
784 
785  if ( !is_ident23 )
786  MAC_Apply_G_mx2_opc( m_A,
787  &gamma23,
788  &sigma23,
789  a2, rs_A,
790  a3, rs_A );
791  }
792  }
793 
794  // Pipeline stage
795 
796  for ( j = k_minus_1; j < nG; ++j )
797  {
798  nG_app = k_G;
799  n_iter = nG_app / n_fuse;
800  n_left = nG_app % n_fuse;
801 
802  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
803  {
804  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
805  g23 = buff_G + (g )*rs_G + (k )*cs_G;
806  a1 = buff_A + (g - 1)*cs_A;
807  a2 = buff_A + (g )*cs_A;
808  a3 = buff_A + (g + 1)*cs_A;
809 
810  gamma12 = g12->real;
811  sigma12 = g12->imag;
812  gamma23 = g23->real;
813  sigma23 = g23->imag;
814 
815  is_ident12 = ( gamma12 == one && sigma12 == zero );
816  is_ident23 = ( gamma23 == one && sigma23 == zero );
817 
818  if ( !is_ident12 && is_ident23 )
819  {
820  // Apply only to columns 1 and 2.
821 
822  MAC_Apply_G_mx2_opc( m_A,
823  &gamma12,
824  &sigma12,
825  a1, rs_A,
826  a2, rs_A );
827  }
828  else if ( is_ident12 && !is_ident23 )
829  {
830  // Apply only to columns 2 and 3.
831 
832  MAC_Apply_G_mx2_opc( m_A,
833  &gamma23,
834  &sigma23,
835  a2, rs_A,
836  a3, rs_A );
837  }
838  else if ( !is_ident12 && !is_ident23 )
839  {
840  // Apply to all three columns.
841 
842  MAC_Apply_G_mx3b_opc( m_A,
843  &gamma12,
844  &sigma12,
845  &gamma23,
846  &sigma23,
847  a1, rs_A,
848  a2, rs_A,
849  a3, rs_A );
850  }
851  }
852  //for ( k = 0; k < n_left; k += 1, g -= 1 )
853  if ( n_left == 1 )
854  {
855  g23 = buff_G + (g )*rs_G + (k )*cs_G;
856  a2 = buff_A + (g )*cs_A;
857  a3 = buff_A + (g + 1)*cs_A;
858 
859  gamma23 = g23->real;
860  sigma23 = g23->imag;
861 
862  is_ident23 = ( gamma23 == one && sigma23 == zero );
863 
864  if ( !is_ident23 )
865  MAC_Apply_G_mx2_opc( m_A,
866  &gamma23,
867  &sigma23,
868  a2, rs_A,
869  a3, rs_A );
870  }
871  }
872 
873  // Shutdown stage
874 
875  for ( j = 1; j < k_G; ++j )
876  {
877  nG_app = k_G - j;
878  n_iter = nG_app / n_fuse;
879  n_left = nG_app % n_fuse;
880 
881  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
882  {
883  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
884  g23 = buff_G + (g )*rs_G + (k )*cs_G;
885  a1 = buff_A + (g - 1)*cs_A;
886  a2 = buff_A + (g )*cs_A;
887  a3 = buff_A + (g + 1)*cs_A;
888 
889  gamma12 = g12->real;
890  sigma12 = g12->imag;
891  gamma23 = g23->real;
892  sigma23 = g23->imag;
893 
894  is_ident12 = ( gamma12 == one && sigma12 == zero );
895  is_ident23 = ( gamma23 == one && sigma23 == zero );
896 
897  if ( !is_ident12 && is_ident23 )
898  {
899  // Apply only to columns 1 and 2.
900 
901  MAC_Apply_G_mx2_opc( m_A,
902  &gamma12,
903  &sigma12,
904  a1, rs_A,
905  a2, rs_A );
906  }
907  else if ( is_ident12 && !is_ident23 )
908  {
909  // Apply only to columns 2 and 3.
910 
911  MAC_Apply_G_mx2_opc( m_A,
912  &gamma23,
913  &sigma23,
914  a2, rs_A,
915  a3, rs_A );
916  }
917  else if ( !is_ident12 && !is_ident23 )
918  {
919  // Apply to all three columns.
920 
921  MAC_Apply_G_mx3b_opc( m_A,
922  &gamma12,
923  &sigma12,
924  &gamma23,
925  &sigma23,
926  a1, rs_A,
927  a2, rs_A,
928  a3, rs_A );
929  }
930  }
931  //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
932  if ( n_left == 1 )
933  {
934  g23 = buff_G + (g )*rs_G + (k )*cs_G;
935  a2 = buff_A + (g )*cs_A;
936  a3 = buff_A + (g + 1)*cs_A;
937 
938  gamma23 = g23->real;
939  sigma23 = g23->imag;
940 
941  is_ident23 = ( gamma23 == one && sigma23 == zero );
942 
943  if ( !is_ident23 )
944  MAC_Apply_G_mx2_opc( m_A,
945  &gamma23,
946  &sigma23,
947  a2, rs_A,
948  a3, rs_A );
949  }
950  }
951 
952  return FLA_SUCCESS;
953 }
FLA_Error FLA_Apply_G_rf_opc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:215
int n_left
Definition: bl1_axmyv2.c:149
int i
Definition: bl1_axmyv2.c:145
float bl1_s0(void)
Definition: bl1_constants.c:111
float bl1_s1(void)
Definition: bl1_constants.c:47
Definition: blis_type_defs.h:133
float imag
Definition: blis_type_defs.h:134
float real
Definition: blis_type_defs.h:134

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_opc_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_opt_var6().

◆ FLA_Apply_G_rf_opd_var6()

FLA_Error FLA_Apply_G_rf_opd_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)
398 {
399  double one = bl1_d1();
400  double zero = bl1_d0();
401  double gamma12;
402  double sigma12;
403  double gamma23;
404  double sigma23;
405  double* a1;
406  double* a2;
407  double* a3;
408  dcomplex* g12;
409  dcomplex* g23;
410  int i, j, g, k;
411  int nG, nG_app;
412  int n_iter;
413  int n_left;
414  int k_minus_1;
415  int n_fuse;
416  int is_ident12, is_ident23;
417 
418  k_minus_1 = k_G - 1;
419  nG = n_A - 1;
420  n_fuse = 2;
421 
422  // Use the simple variant for nG < (k - 1) or k == 1.
423  if ( nG < k_minus_1 || k_G == 1 )
424  {
426  m_A,
427  n_A,
428  buff_G, rs_G, cs_G,
429  buff_A, rs_A, cs_A );
430  return FLA_SUCCESS;
431  }
432 
433 
434  // Start-up phase.
435 
436  for ( j = 0; j < k_minus_1; ++j )
437  {
438  nG_app = j + 1;
439  n_iter = nG_app / n_fuse;
440  n_left = nG_app % n_fuse;
441 
442  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
443  {
444  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
445  g23 = buff_G + (g )*rs_G + (k )*cs_G;
446  a1 = buff_A + (g - 1)*cs_A;
447  a2 = buff_A + (g )*cs_A;
448  a3 = buff_A + (g + 1)*cs_A;
449 
450  gamma12 = g12->real;
451  sigma12 = g12->imag;
452  gamma23 = g23->real;
453  sigma23 = g23->imag;
454 
455  is_ident12 = ( gamma12 == one && sigma12 == zero );
456  is_ident23 = ( gamma23 == one && sigma23 == zero );
457 
458  if ( !is_ident12 && is_ident23 )
459  {
460  // Apply only to columns 1 and 2.
461 
462  MAC_Apply_G_mx2_opd( m_A,
463  &gamma12,
464  &sigma12,
465  a1, rs_A,
466  a2, rs_A );
467  }
468  else if ( is_ident12 && !is_ident23 )
469  {
470  // Apply only to columns 2 and 3.
471 
472  MAC_Apply_G_mx2_opd( m_A,
473  &gamma23,
474  &sigma23,
475  a2, rs_A,
476  a3, rs_A );
477  }
478  else if ( !is_ident12 && !is_ident23 )
479  {
480  // Apply to all three columns.
481 
482  MAC_Apply_G_mx3b_opd( m_A,
483  &gamma12,
484  &sigma12,
485  &gamma23,
486  &sigma23,
487  a1, rs_A,
488  a2, rs_A,
489  a3, rs_A );
490  }
491  }
492  //for ( k = 0; k < n_left; k += 1, g -= 1 )
493  if ( n_left == 1 )
494  {
495  g23 = buff_G + (g )*rs_G + (k )*cs_G;
496  a2 = buff_A + (g )*cs_A;
497  a3 = buff_A + (g + 1)*cs_A;
498 
499  gamma23 = g23->real;
500  sigma23 = g23->imag;
501 
502  is_ident23 = ( gamma23 == one && sigma23 == zero );
503 
504  if ( !is_ident23 )
505  MAC_Apply_G_mx2_opd( m_A,
506  &gamma23,
507  &sigma23,
508  a2, rs_A,
509  a3, rs_A );
510  }
511  }
512 
513  // Pipeline stage
514 
515  for ( j = k_minus_1; j < nG; ++j )
516  {
517  nG_app = k_G;
518  n_iter = nG_app / n_fuse;
519  n_left = nG_app % n_fuse;
520 
521  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
522  {
523  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
524  g23 = buff_G + (g )*rs_G + (k )*cs_G;
525  a1 = buff_A + (g - 1)*cs_A;
526  a2 = buff_A + (g )*cs_A;
527  a3 = buff_A + (g + 1)*cs_A;
528 
529  gamma12 = g12->real;
530  sigma12 = g12->imag;
531  gamma23 = g23->real;
532  sigma23 = g23->imag;
533 
534  is_ident12 = ( gamma12 == one && sigma12 == zero );
535  is_ident23 = ( gamma23 == one && sigma23 == zero );
536 
537  if ( !is_ident12 && is_ident23 )
538  {
539  // Apply only to columns 1 and 2.
540 
541  MAC_Apply_G_mx2_opd( m_A,
542  &gamma12,
543  &sigma12,
544  a1, rs_A,
545  a2, rs_A );
546  }
547  else if ( is_ident12 && !is_ident23 )
548  {
549  // Apply only to columns 2 and 3.
550 
551  MAC_Apply_G_mx2_opd( m_A,
552  &gamma23,
553  &sigma23,
554  a2, rs_A,
555  a3, rs_A );
556  }
557  else if ( !is_ident12 && !is_ident23 )
558  {
559  // Apply to all three columns.
560 
561  MAC_Apply_G_mx3b_opd( m_A,
562  &gamma12,
563  &sigma12,
564  &gamma23,
565  &sigma23,
566  a1, rs_A,
567  a2, rs_A,
568  a3, rs_A );
569  }
570  }
571  //for ( k = 0; k < n_left; k += 1, g -= 1 )
572  if ( n_left == 1 )
573  {
574  g23 = buff_G + (g )*rs_G + (k )*cs_G;
575  a2 = buff_A + (g )*cs_A;
576  a3 = buff_A + (g + 1)*cs_A;
577 
578  gamma23 = g23->real;
579  sigma23 = g23->imag;
580 
581  is_ident23 = ( gamma23 == one && sigma23 == zero );
582 
583  if ( !is_ident23 )
584  MAC_Apply_G_mx2_opd( m_A,
585  &gamma23,
586  &sigma23,
587  a2, rs_A,
588  a3, rs_A );
589  }
590  }
591 
592  // Shutdown stage
593 
594  for ( j = 1; j < k_G; ++j )
595  {
596  nG_app = k_G - j;
597  n_iter = nG_app / n_fuse;
598  n_left = nG_app % n_fuse;
599 
600  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
601  {
602  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
603  g23 = buff_G + (g )*rs_G + (k )*cs_G;
604  a1 = buff_A + (g - 1)*cs_A;
605  a2 = buff_A + (g )*cs_A;
606  a3 = buff_A + (g + 1)*cs_A;
607 
608  gamma12 = g12->real;
609  sigma12 = g12->imag;
610  gamma23 = g23->real;
611  sigma23 = g23->imag;
612 
613  is_ident12 = ( gamma12 == one && sigma12 == zero );
614  is_ident23 = ( gamma23 == one && sigma23 == zero );
615 
616  if ( !is_ident12 && is_ident23 )
617  {
618  // Apply only to columns 1 and 2.
619 
620  MAC_Apply_G_mx2_opd( m_A,
621  &gamma12,
622  &sigma12,
623  a1, rs_A,
624  a2, rs_A );
625  }
626  else if ( is_ident12 && !is_ident23 )
627  {
628  // Apply only to columns 2 and 3.
629 
630  MAC_Apply_G_mx2_opd( m_A,
631  &gamma23,
632  &sigma23,
633  a2, rs_A,
634  a3, rs_A );
635  }
636  else if ( !is_ident12 && !is_ident23 )
637  {
638  // Apply to all three columns.
639 
640  MAC_Apply_G_mx3b_opd( m_A,
641  &gamma12,
642  &sigma12,
643  &gamma23,
644  &sigma23,
645  a1, rs_A,
646  a2, rs_A,
647  a3, rs_A );
648  }
649  }
650  //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
651  if ( n_left == 1 )
652  {
653  g23 = buff_G + (g )*rs_G + (k )*cs_G;
654  a2 = buff_A + (g )*cs_A;
655  a3 = buff_A + (g + 1)*cs_A;
656 
657  gamma23 = g23->real;
658  sigma23 = g23->imag;
659 
660  is_ident23 = ( gamma23 == one && sigma23 == zero );
661 
662  if ( !is_ident23 )
663  MAC_Apply_G_mx2_opd( m_A,
664  &gamma23,
665  &sigma23,
666  a2, rs_A,
667  a3, rs_A );
668  }
669  }
670 
671  return FLA_SUCCESS;
672 }
FLA_Error FLA_Apply_G_rf_opd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:164
double bl1_d0(void)
Definition: bl1_constants.c:118
double bl1_d1(void)
Definition: bl1_constants.c:54
Definition: blis_type_defs.h:138
double real
Definition: blis_type_defs.h:139
double imag
Definition: blis_type_defs.h:139

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opd_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_opt_var6().

◆ FLA_Apply_G_rf_ops_var6()

FLA_Error FLA_Apply_G_rf_ops_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)
117 {
118  float one = bl1_s1();
119  float zero = bl1_s0();
120  float gamma12;
121  float sigma12;
122  float gamma23;
123  float sigma23;
124  float* a1;
125  float* a2;
126  float* a3;
127  scomplex* g12;
128  scomplex* g23;
129  int i, j, g, k;
130  int nG, nG_app;
131  int n_iter;
132  int n_left;
133  int k_minus_1;
134  int n_fuse;
135  int is_ident12, is_ident23;
136 
137  k_minus_1 = k_G - 1;
138  nG = n_A - 1;
139  n_fuse = 2;
140 
141  // Use the simple variant for nG < (k - 1) or k == 1.
142  if ( nG < k_minus_1 || k_G == 1 )
143  {
145  m_A,
146  n_A,
147  buff_G, rs_G, cs_G,
148  buff_A, rs_A, cs_A );
149  return FLA_SUCCESS;
150  }
151 
152 
153  // Start-up phase.
154 
155  for ( j = 0; j < k_minus_1; ++j )
156  {
157  nG_app = j + 1;
158  n_iter = nG_app / n_fuse;
159  n_left = nG_app % n_fuse;
160 
161  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
162  {
163  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
164  g23 = buff_G + (g )*rs_G + (k )*cs_G;
165  a1 = buff_A + (g - 1)*cs_A;
166  a2 = buff_A + (g )*cs_A;
167  a3 = buff_A + (g + 1)*cs_A;
168 
169  gamma12 = g12->real;
170  sigma12 = g12->imag;
171  gamma23 = g23->real;
172  sigma23 = g23->imag;
173 
174  is_ident12 = ( gamma12 == one && sigma12 == zero );
175  is_ident23 = ( gamma23 == one && sigma23 == zero );
176 
177  if ( !is_ident12 && is_ident23 )
178  {
179  // Apply only to columns 1 and 2.
180 
181  MAC_Apply_G_mx2_ops( m_A,
182  &gamma12,
183  &sigma12,
184  a1, rs_A,
185  a2, rs_A );
186  }
187  else if ( is_ident12 && !is_ident23 )
188  {
189  // Apply only to columns 2 and 3.
190 
191  MAC_Apply_G_mx2_ops( m_A,
192  &gamma23,
193  &sigma23,
194  a2, rs_A,
195  a3, rs_A );
196  }
197  else if ( !is_ident12 && !is_ident23 )
198  {
199  // Apply to all three columns.
200 
201  MAC_Apply_G_mx3b_ops( m_A,
202  &gamma12,
203  &sigma12,
204  &gamma23,
205  &sigma23,
206  a1, rs_A,
207  a2, rs_A,
208  a3, rs_A );
209  }
210  }
211  //for ( k = 0; k < n_left; k += 1, g -= 1 )
212  if ( n_left == 1 )
213  {
214  g23 = buff_G + (g )*rs_G + (k )*cs_G;
215  a2 = buff_A + (g )*cs_A;
216  a3 = buff_A + (g + 1)*cs_A;
217 
218  gamma23 = g23->real;
219  sigma23 = g23->imag;
220 
221  is_ident23 = ( gamma23 == one && sigma23 == zero );
222 
223  if ( !is_ident23 )
224  MAC_Apply_G_mx2_ops( m_A,
225  &gamma23,
226  &sigma23,
227  a2, rs_A,
228  a3, rs_A );
229  }
230  }
231 
232  // Pipeline stage
233 
234  for ( j = k_minus_1; j < nG; ++j )
235  {
236  nG_app = k_G;
237  n_iter = nG_app / n_fuse;
238  n_left = nG_app % n_fuse;
239 
240  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
241  {
242  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
243  g23 = buff_G + (g )*rs_G + (k )*cs_G;
244  a1 = buff_A + (g - 1)*cs_A;
245  a2 = buff_A + (g )*cs_A;
246  a3 = buff_A + (g + 1)*cs_A;
247 
248  gamma12 = g12->real;
249  sigma12 = g12->imag;
250  gamma23 = g23->real;
251  sigma23 = g23->imag;
252 
253  is_ident12 = ( gamma12 == one && sigma12 == zero );
254  is_ident23 = ( gamma23 == one && sigma23 == zero );
255 
256  if ( !is_ident12 && is_ident23 )
257  {
258  // Apply only to columns 1 and 2.
259 
260  MAC_Apply_G_mx2_ops( m_A,
261  &gamma12,
262  &sigma12,
263  a1, rs_A,
264  a2, rs_A );
265  }
266  else if ( is_ident12 && !is_ident23 )
267  {
268  // Apply only to columns 2 and 3.
269 
270  MAC_Apply_G_mx2_ops( m_A,
271  &gamma23,
272  &sigma23,
273  a2, rs_A,
274  a3, rs_A );
275  }
276  else if ( !is_ident12 && !is_ident23 )
277  {
278  // Apply to all three columns.
279 
280  MAC_Apply_G_mx3b_ops( m_A,
281  &gamma12,
282  &sigma12,
283  &gamma23,
284  &sigma23,
285  a1, rs_A,
286  a2, rs_A,
287  a3, rs_A );
288  }
289  }
290  //for ( k = 0; k < n_left; k += 1, g -= 1 )
291  if ( n_left == 1 )
292  {
293  g23 = buff_G + (g )*rs_G + (k )*cs_G;
294  a2 = buff_A + (g )*cs_A;
295  a3 = buff_A + (g + 1)*cs_A;
296 
297  gamma23 = g23->real;
298  sigma23 = g23->imag;
299 
300  is_ident23 = ( gamma23 == one && sigma23 == zero );
301 
302  if ( !is_ident23 )
303  MAC_Apply_G_mx2_ops( m_A,
304  &gamma23,
305  &sigma23,
306  a2, rs_A,
307  a3, rs_A );
308  }
309  }
310 
311  // Shutdown stage
312 
313  for ( j = 1; j < k_G; ++j )
314  {
315  nG_app = k_G - j;
316  n_iter = nG_app / n_fuse;
317  n_left = nG_app % n_fuse;
318 
319  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
320  {
321  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
322  g23 = buff_G + (g )*rs_G + (k )*cs_G;
323  a1 = buff_A + (g - 1)*cs_A;
324  a2 = buff_A + (g )*cs_A;
325  a3 = buff_A + (g + 1)*cs_A;
326 
327  gamma12 = g12->real;
328  sigma12 = g12->imag;
329  gamma23 = g23->real;
330  sigma23 = g23->imag;
331 
332  is_ident12 = ( gamma12 == one && sigma12 == zero );
333  is_ident23 = ( gamma23 == one && sigma23 == zero );
334 
335  if ( !is_ident12 && is_ident23 )
336  {
337  // Apply only to columns 1 and 2.
338 
339  MAC_Apply_G_mx2_ops( m_A,
340  &gamma12,
341  &sigma12,
342  a1, rs_A,
343  a2, rs_A );
344  }
345  else if ( is_ident12 && !is_ident23 )
346  {
347  // Apply only to columns 2 and 3.
348 
349  MAC_Apply_G_mx2_ops( m_A,
350  &gamma23,
351  &sigma23,
352  a2, rs_A,
353  a3, rs_A );
354  }
355  else if ( !is_ident12 && !is_ident23 )
356  {
357  // Apply to all three columns.
358 
359  MAC_Apply_G_mx3b_ops( m_A,
360  &gamma12,
361  &sigma12,
362  &gamma23,
363  &sigma23,
364  a1, rs_A,
365  a2, rs_A,
366  a3, rs_A );
367  }
368  }
369  //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
370  if ( n_left == 1 )
371  {
372  g23 = buff_G + (g )*rs_G + (k )*cs_G;
373  a2 = buff_A + (g )*cs_A;
374  a3 = buff_A + (g + 1)*cs_A;
375 
376  gamma23 = g23->real;
377  sigma23 = g23->imag;
378 
379  is_ident23 = ( gamma23 == one && sigma23 == zero );
380 
381  if ( !is_ident23 )
382  MAC_Apply_G_mx2_ops( m_A,
383  &gamma23,
384  &sigma23,
385  a2, rs_A,
386  a3, rs_A );
387  }
388  }
389 
390  return FLA_SUCCESS;
391 }
FLA_Error FLA_Apply_G_rf_ops_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:113

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ops_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_opt_var6().

◆ FLA_Apply_G_rf_opt_var6()

FLA_Error FLA_Apply_G_rf_opt_var6 ( FLA_Obj  G,
FLA_Obj  A 
)
32 {
33  FLA_Datatype datatype;
34  int k_G, m_A, n_A;
35  int rs_G, cs_G;
36  int rs_A, cs_A;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  k_G = FLA_Obj_width( G );
41  m_A = FLA_Obj_length( A );
42  n_A = FLA_Obj_width( A );
43 
44  rs_G = FLA_Obj_row_stride( G );
45  cs_G = FLA_Obj_col_stride( G );
46 
47  rs_A = FLA_Obj_row_stride( A );
48  cs_A = FLA_Obj_col_stride( A );
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
55  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56 
58  m_A,
59  n_A,
60  buff_G, rs_G, cs_G,
61  buff_A, rs_A, cs_A );
62 
63  break;
64  }
65 
66  case FLA_DOUBLE:
67  {
68  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
69  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70 
72  m_A,
73  n_A,
74  buff_G, rs_G, cs_G,
75  buff_A, rs_A, cs_A );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
83  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
84 
86  m_A,
87  n_A,
88  buff_G, rs_G, cs_G,
89  buff_A, rs_A, cs_A );
90 
91  break;
92  }
93 
94  case FLA_DOUBLE_COMPLEX:
95  {
96  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
98 
100  m_A,
101  n_A,
102  buff_G, rs_G, cs_G,
103  buff_A, rs_A, cs_A );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Apply_G_rf_opc_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var6.c:674
FLA_Error FLA_Apply_G_rf_opd_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var6.c:393
FLA_Error FLA_Apply_G_rf_ops_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var6.c:112
FLA_Error FLA_Apply_G_rf_opz_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var6.c:955
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49

References FLA_Apply_G_rf_opc_var6(), FLA_Apply_G_rf_opd_var6(), FLA_Apply_G_rf_ops_var6(), FLA_Apply_G_rf_opz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_opz_var6()

FLA_Error FLA_Apply_G_rf_opz_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
960 {
961  double one = bl1_d1();
962  double zero = bl1_d0();
963  double gamma12;
964  double sigma12;
965  double gamma23;
966  double sigma23;
967  dcomplex* a1;
968  dcomplex* a2;
969  dcomplex* a3;
970  dcomplex* g12;
971  dcomplex* g23;
972  int i, j, g, k;
973  int nG, nG_app;
974  int n_iter;
975  int n_left;
976  int k_minus_1;
977  int n_fuse;
978  int is_ident12, is_ident23;
979 
980  k_minus_1 = k_G - 1;
981  nG = n_A - 1;
982  n_fuse = 2;
983 
984  // Use the simple variant for nG < (k - 1) or k == 1.
985  if ( nG < k_minus_1 || k_G == 1 )
986  {
988  m_A,
989  n_A,
990  buff_G, rs_G, cs_G,
991  buff_A, rs_A, cs_A );
992  return FLA_SUCCESS;
993  }
994 
995 
996  // Start-up phase.
997 
998  for ( j = 0; j < k_minus_1; ++j )
999  {
1000  nG_app = j + 1;
1001  n_iter = nG_app / n_fuse;
1002  n_left = nG_app % n_fuse;
1003 
1004  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1005  {
1006  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1007  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1008  a1 = buff_A + (g - 1)*cs_A;
1009  a2 = buff_A + (g )*cs_A;
1010  a3 = buff_A + (g + 1)*cs_A;
1011 
1012  gamma12 = g12->real;
1013  sigma12 = g12->imag;
1014  gamma23 = g23->real;
1015  sigma23 = g23->imag;
1016 
1017  is_ident12 = ( gamma12 == one && sigma12 == zero );
1018  is_ident23 = ( gamma23 == one && sigma23 == zero );
1019 
1020  if ( !is_ident12 && is_ident23 )
1021  {
1022  // Apply only to columns 1 and 2.
1023 
1024  MAC_Apply_G_mx2_opz( m_A,
1025  &gamma12,
1026  &sigma12,
1027  a1, rs_A,
1028  a2, rs_A );
1029  }
1030  else if ( is_ident12 && !is_ident23 )
1031  {
1032  // Apply only to columns 2 and 3.
1033 
1034  MAC_Apply_G_mx2_opz( m_A,
1035  &gamma23,
1036  &sigma23,
1037  a2, rs_A,
1038  a3, rs_A );
1039  }
1040  else if ( !is_ident12 && !is_ident23 )
1041  {
1042  // Apply to all three columns.
1043 
1044  MAC_Apply_G_mx3b_opz( m_A,
1045  &gamma12,
1046  &sigma12,
1047  &gamma23,
1048  &sigma23,
1049  a1, rs_A,
1050  a2, rs_A,
1051  a3, rs_A );
1052  }
1053  }
1054  //for ( k = 0; k < n_left; k += 1, g -= 1 )
1055  if ( n_left == 1 )
1056  {
1057  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1058  a2 = buff_A + (g )*cs_A;
1059  a3 = buff_A + (g + 1)*cs_A;
1060 
1061  gamma23 = g23->real;
1062  sigma23 = g23->imag;
1063 
1064  is_ident23 = ( gamma23 == one && sigma23 == zero );
1065 
1066  if ( !is_ident23 )
1067  MAC_Apply_G_mx2_opz( m_A,
1068  &gamma23,
1069  &sigma23,
1070  a2, rs_A,
1071  a3, rs_A );
1072  }
1073  }
1074 
1075  // Pipeline stage
1076 
1077  for ( j = k_minus_1; j < nG; ++j )
1078  {
1079  nG_app = k_G;
1080  n_iter = nG_app / n_fuse;
1081  n_left = nG_app % n_fuse;
1082 
1083  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1084  {
1085  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1086  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1087  a1 = buff_A + (g - 1)*cs_A;
1088  a2 = buff_A + (g )*cs_A;
1089  a3 = buff_A + (g + 1)*cs_A;
1090 
1091  gamma12 = g12->real;
1092  sigma12 = g12->imag;
1093  gamma23 = g23->real;
1094  sigma23 = g23->imag;
1095 
1096  is_ident12 = ( gamma12 == one && sigma12 == zero );
1097  is_ident23 = ( gamma23 == one && sigma23 == zero );
1098 
1099  if ( !is_ident12 && is_ident23 )
1100  {
1101  // Apply only to columns 1 and 2.
1102 
1103  MAC_Apply_G_mx2_opz( m_A,
1104  &gamma12,
1105  &sigma12,
1106  a1, rs_A,
1107  a2, rs_A );
1108  }
1109  else if ( is_ident12 && !is_ident23 )
1110  {
1111  // Apply only to columns 2 and 3.
1112 
1113  MAC_Apply_G_mx2_opz( m_A,
1114  &gamma23,
1115  &sigma23,
1116  a2, rs_A,
1117  a3, rs_A );
1118  }
1119  else if ( !is_ident12 && !is_ident23 )
1120  {
1121  // Apply to all three columns.
1122 
1123  MAC_Apply_G_mx3b_opz( m_A,
1124  &gamma12,
1125  &sigma12,
1126  &gamma23,
1127  &sigma23,
1128  a1, rs_A,
1129  a2, rs_A,
1130  a3, rs_A );
1131  }
1132  }
1133  //for ( k = 0; k < n_left; k += 1, g -= 1 )
1134  if ( n_left == 1 )
1135  {
1136  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1137  a2 = buff_A + (g )*cs_A;
1138  a3 = buff_A + (g + 1)*cs_A;
1139 
1140  gamma23 = g23->real;
1141  sigma23 = g23->imag;
1142 
1143  is_ident23 = ( gamma23 == one && sigma23 == zero );
1144 
1145  if ( !is_ident23 )
1146  MAC_Apply_G_mx2_opz( m_A,
1147  &gamma23,
1148  &sigma23,
1149  a2, rs_A,
1150  a3, rs_A );
1151  }
1152  }
1153 
1154  // Shutdown stage
1155 
1156  for ( j = 1; j < k_G; ++j )
1157  {
1158  nG_app = k_G - j;
1159  n_iter = nG_app / n_fuse;
1160  n_left = nG_app % n_fuse;
1161 
1162  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1163  {
1164  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1165  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1166  a1 = buff_A + (g - 1)*cs_A;
1167  a2 = buff_A + (g )*cs_A;
1168  a3 = buff_A + (g + 1)*cs_A;
1169 
1170  gamma12 = g12->real;
1171  sigma12 = g12->imag;
1172  gamma23 = g23->real;
1173  sigma23 = g23->imag;
1174 
1175  is_ident12 = ( gamma12 == one && sigma12 == zero );
1176  is_ident23 = ( gamma23 == one && sigma23 == zero );
1177 
1178  if ( !is_ident12 && is_ident23 )
1179  {
1180  // Apply only to columns 1 and 2.
1181 
1182  MAC_Apply_G_mx2_opz( m_A,
1183  &gamma12,
1184  &sigma12,
1185  a1, rs_A,
1186  a2, rs_A );
1187  }
1188  else if ( is_ident12 && !is_ident23 )
1189  {
1190  // Apply only to columns 2 and 3.
1191 
1192  MAC_Apply_G_mx2_opz( m_A,
1193  &gamma23,
1194  &sigma23,
1195  a2, rs_A,
1196  a3, rs_A );
1197  }
1198  else if ( !is_ident12 && !is_ident23 )
1199  {
1200  // Apply to all three columns.
1201 
1202  MAC_Apply_G_mx3b_opz( m_A,
1203  &gamma12,
1204  &sigma12,
1205  &gamma23,
1206  &sigma23,
1207  a1, rs_A,
1208  a2, rs_A,
1209  a3, rs_A );
1210  }
1211  }
1212  //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
1213  if ( n_left == 1 )
1214  {
1215  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1216  a2 = buff_A + (g )*cs_A;
1217  a3 = buff_A + (g + 1)*cs_A;
1218 
1219  gamma23 = g23->real;
1220  sigma23 = g23->imag;
1221 
1222  is_ident23 = ( gamma23 == one && sigma23 == zero );
1223 
1224  if ( !is_ident23 )
1225  MAC_Apply_G_mx2_opz( m_A,
1226  &gamma23,
1227  &sigma23,
1228  a2, rs_A,
1229  a3, rs_A );
1230  }
1231  }
1232 
1233  return FLA_SUCCESS;
1234 }
FLA_Error FLA_Apply_G_rf_opz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:267

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opz_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_opt_var6().