20 #if !defined(WITH_HIP)
22 #elif defined(WITH_HIP_DYNLOAD)
24 static bool result =
false;
30 int hipew_result = hipewInit(HIPEW_INIT_HIP);
31 if (hipew_result == HIPEW_SUCCESS) {
32 VLOG_INFO <<
"HIPEW initialization succeeded";
33 if (HIPDevice::have_precompiled_kernels()) {
37 else if (hipewCompilerPath() !=
NULL) {
38 VLOG_INFO <<
"Found HIPCC " << hipewCompilerPath();
42 VLOG_INFO <<
"Neither precompiled kernels nor HIPCC was found,"
43 <<
" unable to use HIP";
47 if (hipew_result == HIPEW_ERROR_ATEXIT_FAILED) {
48 VLOG_WARNING <<
"HIPEW initialization failed: Error setting up atexit() handler";
50 else if (hipew_result == HIPEW_ERROR_OLD_DRIVER) {
52 <<
"HIPEW initialization failed: Driver version too old, requires AMD Radeon Pro "
53 "21.Q4 driver or newer";
56 VLOG_WARNING <<
"HIPEW initialization failed: Error opening HIP dynamic library";
69 return new HIPDevice(info, stats, profiler);
75 LOG(FATAL) <<
"Request to create HIP device without compiled-in support. Should never happen.";
82 static hipError_t device_hip_safe_init()
88 __except (EXCEPTION_EXECUTE_HANDLER) {
91 fprintf(stderr,
"Cycles HIP: driver crashed, continuing without HIP.\n");
94 return hipErrorNoDevice;
104 hipError_t
result = device_hip_safe_init();
105 if (
result != hipSuccess) {
106 if (
result != hipErrorNoDevice)
107 fprintf(stderr,
"HIP hipInit: %s\n", hipewErrorString(
result));
113 if (
result != hipSuccess) {
114 fprintf(stderr,
"HIP hipGetDeviceCount: %s\n", hipewErrorString(
result));
120 for (
int num = 0; num <
count; num++) {
123 result = hipDeviceGetName(name, 256, num);
124 if (
result != hipSuccess) {
125 fprintf(stderr,
"HIP :hipDeviceGetName: %s\n", hipewErrorString(
result));
129 if (!hipSupportsDevice(num)) {
145 if (num != peer_num) {
147 hipDeviceCanAccessPeer(&can_access, num, peer_num);
152 int pci_location[3] = {0, 0, 0};
153 hipDeviceGetAttribute(&pci_location[0], hipDeviceAttributePciDomainID, num);
154 hipDeviceGetAttribute(&pci_location[1], hipDeviceAttributePciBusId, num);
155 hipDeviceGetAttribute(&pci_location[2], hipDeviceAttributePciDeviceId, num);
158 (
unsigned int)pci_location[0],
159 (
unsigned int)pci_location[1],
160 (
unsigned int)pci_location[2]);
165 int timeout_attr = 0, preempt_attr = 0;
166 hipDeviceGetAttribute(&timeout_attr, hipDeviceAttributeKernelExecTimeout, num);
168 if (timeout_attr && !preempt_attr) {
169 VLOG_INFO <<
"Device is recognized as display.";
172 display_devices.push_back(info);
175 VLOG_INFO <<
"Device has compute preemption or is not used for display.";
178 VLOG_INFO <<
"Added device \"" << name <<
"\" with id \"" << info.
id <<
"\".";
181 if (!display_devices.empty())
182 devices.insert(
devices.end(), display_devices.begin(), display_devices.end());
191 hipError_t
result = device_hip_safe_init();
192 if (
result != hipSuccess) {
193 if (
result != hipErrorNoDevice) {
194 return string(
"Error initializing HIP: ") + hipewErrorString(
result);
196 return "No HIP device found\n";
201 if (
result != hipSuccess) {
202 return string(
"Error getting devices: ") + hipewErrorString(
result);
205 string capabilities =
"";
206 for (
int num = 0; num <
count; num++) {
208 if (hipDeviceGetName(name, 256, num) != hipSuccess) {
211 capabilities += string(
"\t") + name +
"\n";
213 # define GET_ATTR(attr) \
215 if (hipDeviceGetAttribute(&value, hipDeviceAttribute##attr, num) == hipSuccess) { \
216 capabilities += string_printf("\t\thipDeviceAttribute" #attr "\t\t\t%d\n", value); \
223 GET_ATTR(MaxThreadsPerBlock);
224 GET_ATTR(MaxBlockDimX);
225 GET_ATTR(MaxBlockDimY);
226 GET_ATTR(MaxBlockDimZ);
227 GET_ATTR(MaxGridDimX);
228 GET_ATTR(MaxGridDimY);
229 GET_ATTR(MaxGridDimZ);
230 GET_ATTR(MaxSharedMemoryPerBlock);
231 GET_ATTR(TotalConstantMemory);
234 GET_ATTR(MaxRegistersPerBlock);
236 GET_ATTR(TextureAlignment);
237 GET_ATTR(MultiprocessorCount);
238 GET_ATTR(KernelExecTimeout);
239 GET_ATTR(Integrated);
240 GET_ATTR(CanMapHostMemory);
241 GET_ATTR(ComputeMode);
242 GET_ATTR(MaxTexture1DWidth);
243 GET_ATTR(MaxTexture2DWidth);
244 GET_ATTR(MaxTexture2DHeight);
245 GET_ATTR(MaxTexture3DWidth);
246 GET_ATTR(MaxTexture3DHeight);
247 GET_ATTR(MaxTexture3DDepth);
248 GET_ATTR(ConcurrentKernels);
249 GET_ATTR(EccEnabled);
250 GET_ATTR(MemoryClockRate);
251 GET_ATTR(MemoryBusWidth);
252 GET_ATTR(L2CacheSize);
253 GET_ATTR(MaxThreadsPerMultiProcessor);
254 GET_ATTR(ComputeCapabilityMajor);
255 GET_ATTR(ComputeCapabilityMinor);
256 GET_ATTR(MaxSharedMemoryPerMultiprocessor);
257 GET_ATTR(ManagedMemory);
258 GET_ATTR(IsMultiGpuBoard);
260 capabilities +=
"\n";
DenoiserTypeMask denoisers
#define CCL_NAMESPACE_END
void device_hip_info(vector< DeviceInfo > &devices)
Device * device_hip_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
string device_hip_capabilities()
CCL_NAMESPACE_BEGIN bool device_hip_init()
SyclQueue void void size_t num_bytes void
Vector< CPUDevice > devices
list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)