libflame  revision_anchor
Functions
FLA_Axpy_external_gpu.c File Reference

(r)

Functions

FLA_Error FLA_Axpy_external_gpu (FLA_Obj alpha, FLA_Obj A, void *A_gpu, FLA_Obj B, void *B_gpu)
 

Function Documentation

◆ FLA_Axpy_external_gpu()

FLA_Error FLA_Axpy_external_gpu ( FLA_Obj  alpha,
FLA_Obj  A,
void *  A_gpu,
FLA_Obj  B,
void *  B_gpu 
)
18 {
19  FLA_Datatype datatype;
20  int m_B, n_B;
21  int ldim_A, inc_A;
22  int ldim_B, inc_B;
23  int i;
24 
25  if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
26  FLA_Axpy_check( alpha, A, B );
27 
28  if ( FLA_Obj_has_zero_dim( A ) ) return FLA_SUCCESS;
29 
30  datatype = FLA_Obj_datatype( A );
31 
32  ldim_A = FLA_Obj_length( A );
33  inc_A = 1;
34 
35  m_B = FLA_Obj_length( B );
36  n_B = FLA_Obj_width( B );
37  ldim_B = FLA_Obj_length( B );
38  inc_B = 1;
39 
40  switch ( datatype ){
41 
42  case FLA_FLOAT:
43  {
44  float* buff_alpha = ( float* ) FLA_FLOAT_PTR( alpha );
45  float* buff_A_gpu = ( float* ) A_gpu;
46  float* buff_B_gpu = ( float* ) B_gpu;
47 
48  for ( i = 0; i < n_B; i++ )
49  cublasSaxpy( m_B,
50  *buff_alpha,
51  buff_A_gpu + i * ldim_A, inc_A,
52  buff_B_gpu + i * ldim_B, inc_B );
53 
54  break;
55  }
56 
57  case FLA_DOUBLE:
58  {
59  double* buff_alpha = ( double* ) FLA_DOUBLE_PTR( alpha );
60  double* buff_A_gpu = ( double* ) A_gpu;
61  double* buff_B_gpu = ( double* ) B_gpu;
62 
63  for ( i = 0; i < n_B; i++ )
64  cublasDaxpy( m_B,
65  *buff_alpha,
66  buff_A_gpu + i * ldim_A, inc_A,
67  buff_B_gpu + i * ldim_B, inc_B );
68 
69  break;
70  }
71 
72  case FLA_COMPLEX:
73  {
74  cuComplex* buff_alpha = ( cuComplex* ) FLA_COMPLEX_PTR( alpha );
75  cuComplex* buff_A_gpu = ( cuComplex* ) A_gpu;
76  cuComplex* buff_B_gpu = ( cuComplex* ) B_gpu;
77 
78  for ( i = 0; i < n_B; i++ )
79  cublasCaxpy( m_B,
80  *buff_alpha,
81  buff_A_gpu + i * ldim_A, inc_A,
82  buff_B_gpu + i * ldim_B, inc_B );
83 
84  break;
85  }
86 
87  case FLA_DOUBLE_COMPLEX:
88  {
89  cuDoubleComplex* buff_alpha = ( cuDoubleComplex* ) FLA_DOUBLE_COMPLEX_PTR( alpha );
90  cuDoubleComplex* buff_A_gpu = ( cuDoubleComplex* ) A_gpu;
91  cuDoubleComplex* buff_B_gpu = ( cuDoubleComplex* ) B_gpu;
92 
93  for ( i = 0; i < n_B; i++ )
94  cublasZaxpy( m_B,
95  *buff_alpha,
96  buff_A_gpu + i * ldim_A, inc_A,
97  buff_B_gpu + i * ldim_B, inc_B );
98 
99  break;
100  }
101 
102  }
103 
104  return FLA_SUCCESS;
105 }
FLA_Error FLA_Axpy_check(FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpy_check.c:13
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Bool FLA_Obj_has_zero_dim(FLA_Obj A)
Definition: FLA_Query.c:400
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
int i
Definition: bl1_axmyv2.c:145

References FLA_Axpy_check(), FLA_Check_error_level(), FLA_Obj_datatype(), FLA_Obj_has_zero_dim(), FLA_Obj_length(), FLA_Obj_width(), and i.

Referenced by FLASH_Queue_exec_task_gpu().