forked from OSchip/llvm-project
[HIP] Fix amdgcn builtin for long type
Currently some amdgcn builtins are defined with long int type, which causes invalid IR on Windows since long int is 32 bit on Windows whereas these builtins have 64 bit arguments. long long int type cannot be used since it is 128 bit in OpenCL. This patch uses 64 bit int type instead of long int to define 64 bit int arguments or return for amdgcn builtins. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D103563
This commit is contained in:
parent
986bef9782
commit
e42def62d8
|
@ -9,6 +9,11 @@
|
|||
// This file defines the AMDGPU-specific builtin function database. Users of
|
||||
// this file must define the BUILTIN macro to make use of this information.
|
||||
//
|
||||
// Note: (unsigned) long int type should be avoided in builtin definitions
|
||||
// since it has different size on Linux (64 bit) and Windows (32 bit).
|
||||
// (unsigned) long long int type should also be avoided, which is 64 bit for
|
||||
// C/C++/HIP but is 128 bit for OpenCL. Use `W` as width modifier in builtin
|
||||
// definitions since it is fixed for 64 bit.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// The format of this database matches clang/Basic/Builtins.def.
|
||||
|
@ -44,14 +49,14 @@ BUILTIN(__builtin_amdgcn_grid_size_z, "Ui", "nc")
|
|||
BUILTIN(__builtin_amdgcn_mbcnt_hi, "UiUiUi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_mbcnt_lo, "UiUiUi", "nc")
|
||||
|
||||
TARGET_BUILTIN(__builtin_amdgcn_s_memtime, "LUi", "n", "s-memtime-inst")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_s_memtime, "WUi", "n", "s-memtime-inst")
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction builtins.
|
||||
//===----------------------------------------------------------------------===//
|
||||
BUILTIN(__builtin_amdgcn_s_getreg, "UiIi", "n")
|
||||
BUILTIN(__builtin_amdgcn_s_setreg, "vIiUi", "n")
|
||||
BUILTIN(__builtin_amdgcn_s_getpc, "LUi", "n")
|
||||
BUILTIN(__builtin_amdgcn_s_getpc, "WUi", "n")
|
||||
BUILTIN(__builtin_amdgcn_s_waitcnt, "vIi", "n")
|
||||
BUILTIN(__builtin_amdgcn_s_sendmsg, "vIiUi", "n")
|
||||
BUILTIN(__builtin_amdgcn_s_sendmsghalt, "vIiUi", "n")
|
||||
|
@ -111,12 +116,12 @@ BUILTIN(__builtin_amdgcn_cubema, "ffff", "nc")
|
|||
BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n")
|
||||
BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n")
|
||||
BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n")
|
||||
BUILTIN(__builtin_amdgcn_uicmp, "LUiUiUiIi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_uicmpl, "LUiLUiLUiIi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_sicmp, "LUiiiIi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_sicmpl, "LUiLiLiIi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_fcmp, "LUiddIi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_fcmpf, "LUiffIi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_uicmp, "WUiUiUiIi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_uicmpl, "WUiWUiWUiIi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_sicmp, "WUiiiIi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_sicmpl, "WUiWiWiIi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_fcmp, "WUiddIi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_fcmpf, "WUiffIi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_ds_swizzle, "iiIi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_ds_permute, "iii", "nc")
|
||||
BUILTIN(__builtin_amdgcn_ds_bpermute, "iii", "nc")
|
||||
|
@ -142,9 +147,9 @@ BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc")
|
|||
BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_sad_u16, "UiUiUiUi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "LUiLUiUiLUi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "LUiLUiUiLUi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiLUiUiV4Ui", "nc")
|
||||
BUILTIN(__builtin_amdgcn_qsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc")
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// CI+ only builtins.
|
||||
|
@ -179,7 +184,7 @@ TARGET_BUILTIN(__builtin_amdgcn_frexp_manth, "hh", "nc", "16-bit-insts")
|
|||
TARGET_BUILTIN(__builtin_amdgcn_frexp_exph, "sh", "nc", "16-bit-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_fracth, "hh", "nc", "16-bit-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_classh, "bhi", "nc", "16-bit-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "WUi", "n", "s-memrealtime")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nc", "dpp")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_update_dpp, "iiiIiIiIiIb", "nc", "dpp")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_s_dcache_wb, "v", "n", "gfx8-insts")
|
||||
|
@ -213,7 +218,7 @@ TARGET_BUILTIN(__builtin_amdgcn_mov_dpp8, "UiUiIUi", "nc", "gfx10-insts")
|
|||
//===----------------------------------------------------------------------===//
|
||||
// Special builtins.
|
||||
//===----------------------------------------------------------------------===//
|
||||
BUILTIN(__builtin_amdgcn_read_exec, "LUi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_read_exec, "WUi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_read_exec_lo, "Ui", "nc")
|
||||
BUILTIN(__builtin_amdgcn_read_exec_hi, "Ui", "nc")
|
||||
|
||||
|
|
|
@ -1,4 +1,11 @@
|
|||
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \
|
||||
// RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \
|
||||
// RUN: -o - | FileCheck %s
|
||||
|
||||
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \
|
||||
// RUN: -aux-triple x86_64-pc-windows-msvc -fcuda-is-device -emit-llvm %s \
|
||||
// RUN: -o - | FileCheck %s
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
// CHECK-LABEL: @_Z16use_dispatch_ptrPi(
|
||||
|
@ -22,3 +29,32 @@ void test_ds_fmax(float src) {
|
|||
__global__ void endpgm() {
|
||||
__builtin_amdgcn_endpgm();
|
||||
}
|
||||
|
||||
// Check the 64 bit argument is correctly passed to the intrinsic without truncation or assertion.
|
||||
|
||||
// CHECK-LABEL: @_Z14test_uicmp_i64
|
||||
// CHECK: store i64* %out, i64** %out.addr.ascast
|
||||
// CHECK-NEXT: store i64 %a, i64* %a.addr.ascast
|
||||
// CHECK-NEXT: store i64 %b, i64* %b.addr.ascast
|
||||
// CHECK-NEXT: %[[V0:.*]] = load i64, i64* %a.addr.ascast
|
||||
// CHECK-NEXT: %[[V1:.*]] = load i64, i64* %b.addr.ascast
|
||||
// CHECK-NEXT: %[[V2:.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 %0, i64 %1, i32 35)
|
||||
// CHECK-NEXT: %[[V3:.*]] = load i64*, i64** %out.addr.ascast
|
||||
// CHECK-NEXT: store i64 %[[V2]], i64* %[[V3]]
|
||||
// CHECK-NEXT: ret void
|
||||
__global__ void test_uicmp_i64(unsigned long long *out, unsigned long long a, unsigned long long b)
|
||||
{
|
||||
*out = __builtin_amdgcn_uicmpl(a, b, 30+5);
|
||||
}
|
||||
|
||||
// Check the 64 bit return value is correctly returned without truncation or assertion.
|
||||
|
||||
// CHECK-LABEL: @_Z14test_s_memtime
|
||||
// CHECK: %[[V1:.*]] = call i64 @llvm.amdgcn.s.memtime()
|
||||
// CHECK-NEXT: %[[PTR:.*]] = load i64*, i64** %out.addr.ascast
|
||||
// CHECK-NEXT: store i64 %[[V1]], i64* %[[PTR]]
|
||||
// CHECK-NEXT: ret void
|
||||
__global__ void test_s_memtime(unsigned long long* out)
|
||||
{
|
||||
*out = __builtin_amdgcn_s_memtime();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue