[Libomptarget] Add checks for CUDA subarchitecture using new info

This patch extends the `is_valid_binary` routine to also check if the
binary's architecture string matches the one parsed from the runtime.
This should allow us to only use the binary whose compute capability
matches, allowing us to support basic multi-architecture binaries for
CUDA.

Depends on D127432

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D127505
This commit is contained in:
Joseph Huber 2022-06-10 09:37:21 -04:00
parent fbcb1ee7f3
commit e01ce4e88a
2 changed files with 38 additions and 0 deletions

View File

@ -1519,6 +1519,43 @@ int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
return elf_check_machine(Image, /* EM_CUDA */ 190);
}
int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image,
__tgt_image_info *info) {
if (!__tgt_rtl_is_valid_binary(image))
return false;
// A subarchitecture was not specified. Assume it is compatible.
if (!info->Arch)
return true;
int32_t NumberOfDevices = 0;
if (cuDeviceGetCount(&NumberOfDevices) != CUDA_SUCCESS)
return false;
for (int32_t DeviceId = 0; DeviceId < NumberOfDevices; ++DeviceId) {
CUdevice Device;
if (cuDeviceGet(&Device, DeviceId) != CUDA_SUCCESS)
return false;
int32_t Major, Minor;
if (cuDeviceGetAttribute(&Major,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
Device) != CUDA_SUCCESS)
return false;
if (cuDeviceGetAttribute(&Minor,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
Device) != CUDA_SUCCESS)
return false;
std::string ArchStr = "sm_" + std::to_string(Major) + std::to_string(Minor);
if (ArchStr != info->Arch)
return false;
}
DP("Image has compatible compute capability: %s\n", info->Arch);
return true;
}
int32_t __tgt_rtl_number_of_devices() { return DeviceRTL.getNumOfDevices(); }
int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {

View File

@ -1,6 +1,7 @@
VERS1.0 {
global:
__tgt_rtl_is_valid_binary;
__tgt_rtl_is_valid_binary_info;
__tgt_rtl_is_data_exchangable;
__tgt_rtl_number_of_devices;
__tgt_rtl_init_requires;