11 void CUDADeviceKernels::load(CUDADevice *device)
13 CUmodule cuModule = device->cuModule;
16 CUDADeviceKernel &
kernel = kernels_[i];
23 const std::string function_name = std::string(
"kernel_gpu_") +
25 cuda_device_assert(device,
26 cuModuleGetFunction(&
kernel.function, cuModule, function_name.c_str()));
29 cuda_device_assert(device, cuFuncSetCacheConfig(
kernel.function, CU_FUNC_CACHE_PREFER_L1));
33 cuOccupancyMaxPotentialBlockSize(
37 LOG(ERROR) <<
"Unable to load kernel " << function_name;
46 return kernels_[(int)
kernel];
51 return kernels_[(int)
kernel].
function !=
nullptr;
#define CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN const char * device_kernel_as_string(DeviceKernel kernel)
SyclQueue void void size_t num_bytes SyclQueue void const char void *memory_device_pointer KernelContext int kernel
@ DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL