forked from OSchip/llvm-project
[Libomptarget] Add checks for CUDA subarchitecture using new info
This patch extends the `is_valid_binary` routine to also check if the binary's architecture string matches the one parsed from the runtime. This should allow us to only use the binary whose compute capability matches, allowing us to support basic multi-architecture binaries for CUDA. Depends on D127432 Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D127505
This commit is contained in:
parent
fbcb1ee7f3
commit
e01ce4e88a
|
@ -1519,6 +1519,43 @@ int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
|
|||
return elf_check_machine(Image, /* EM_CUDA */ 190);
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image,
|
||||
__tgt_image_info *info) {
|
||||
if (!__tgt_rtl_is_valid_binary(image))
|
||||
return false;
|
||||
|
||||
// A subarchitecture was not specified. Assume it is compatible.
|
||||
if (!info->Arch)
|
||||
return true;
|
||||
|
||||
int32_t NumberOfDevices = 0;
|
||||
if (cuDeviceGetCount(&NumberOfDevices) != CUDA_SUCCESS)
|
||||
return false;
|
||||
|
||||
for (int32_t DeviceId = 0; DeviceId < NumberOfDevices; ++DeviceId) {
|
||||
CUdevice Device;
|
||||
if (cuDeviceGet(&Device, DeviceId) != CUDA_SUCCESS)
|
||||
return false;
|
||||
|
||||
int32_t Major, Minor;
|
||||
if (cuDeviceGetAttribute(&Major,
|
||||
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
|
||||
Device) != CUDA_SUCCESS)
|
||||
return false;
|
||||
if (cuDeviceGetAttribute(&Minor,
|
||||
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
|
||||
Device) != CUDA_SUCCESS)
|
||||
return false;
|
||||
|
||||
std::string ArchStr = "sm_" + std::to_string(Major) + std::to_string(Minor);
|
||||
if (ArchStr != info->Arch)
|
||||
return false;
|
||||
}
|
||||
|
||||
DP("Image has compatible compute capability: %s\n", info->Arch);
|
||||
return true;
|
||||
}
|
||||
|
||||
int32_t __tgt_rtl_number_of_devices() { return DeviceRTL.getNumOfDevices(); }
|
||||
|
||||
int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
VERS1.0 {
|
||||
global:
|
||||
__tgt_rtl_is_valid_binary;
|
||||
__tgt_rtl_is_valid_binary_info;
|
||||
__tgt_rtl_is_data_exchangable;
|
||||
__tgt_rtl_number_of_devices;
|
||||
__tgt_rtl_init_requires;
|
||||
|
|
Loading…
Reference in New Issue