forked from OSchip/llvm-project
[NVVM] Update intrinsic defintions to include the `nocallback` attribute
This patch adds the `nocallback` attribute to the NVVM intrinsics that did not use the `DefaultAttrsIntrinsic` method that includes it already. The `nocallback` attribute states that the intrinsic function cannot enter back into the caller's translation-unit. This allows as to determine that a function calling a `nocallback` function can have the `norecurse` attribute. This should be safe for all the NVVM intrinsics because they do not call other functions within the translation unit. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D125937
This commit is contained in:
parent
c35ca3a1c7
commit
dbffa4073c
|
@ -1235,34 +1235,34 @@ let TargetPrefix = "nvvm" in {
|
|||
DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
def int_nvvm_ff2bf16x2_rn : GCCBuiltin<"__nvvm_ff2bf16x2_rn">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
|
||||
def int_nvvm_ff2bf16x2_rn_relu : GCCBuiltin<"__nvvm_ff2bf16x2_rn_relu">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
|
||||
def int_nvvm_ff2bf16x2_rz : GCCBuiltin<"__nvvm_ff2bf16x2_rz">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
|
||||
def int_nvvm_ff2bf16x2_rz_relu : GCCBuiltin<"__nvvm_ff2bf16x2_rz_relu">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
|
||||
def int_nvvm_ff2f16x2_rn : GCCBuiltin<"__nvvm_ff2f16x2_rn">,
|
||||
Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
|
||||
def int_nvvm_ff2f16x2_rn_relu : GCCBuiltin<"__nvvm_ff2f16x2_rn_relu">,
|
||||
Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
|
||||
def int_nvvm_ff2f16x2_rz : GCCBuiltin<"__nvvm_ff2f16x2_rz">,
|
||||
Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
|
||||
def int_nvvm_ff2f16x2_rz_relu : GCCBuiltin<"__nvvm_ff2f16x2_rz_relu">,
|
||||
Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
|
||||
|
||||
def int_nvvm_f2bf16_rn : GCCBuiltin<"__nvvm_f2bf16_rn">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
|
||||
def int_nvvm_f2bf16_rn_relu : GCCBuiltin<"__nvvm_f2bf16_rn_relu">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
|
||||
def int_nvvm_f2bf16_rz : GCCBuiltin<"__nvvm_f2bf16_rz">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
|
||||
def int_nvvm_f2bf16_rz_relu : GCCBuiltin<"__nvvm_f2bf16_rz_relu">,
|
||||
Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
|
||||
|
||||
def int_nvvm_f2tf32_rna : GCCBuiltin<"__nvvm_f2tf32_rna">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
|
||||
|
||||
//
|
||||
// Bitcast
|
||||
|
@ -1287,20 +1287,20 @@ let TargetPrefix = "nvvm" in {
|
|||
// Atomics not available as llvm intrinsics.
|
||||
def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
|
||||
[LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
|
||||
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
|
||||
[IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
|
||||
def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty],
|
||||
[LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
|
||||
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
|
||||
[IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
|
||||
|
||||
class SCOPED_ATOMIC2_impl<LLVMType elty>
|
||||
: Intrinsic<[elty],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>, LLVMMatchType<0>],
|
||||
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
|
||||
[IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
|
||||
class SCOPED_ATOMIC3_impl<LLVMType elty>
|
||||
: Intrinsic<[elty],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>, LLVMMatchType<0>,
|
||||
LLVMMatchType<0>],
|
||||
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
|
||||
[IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
|
||||
|
||||
multiclass PTXAtomicWithScope2<LLVMType elty> {
|
||||
def _cta : SCOPED_ATOMIC2_impl<elty>;
|
||||
|
@ -1330,80 +1330,80 @@ let TargetPrefix = "nvvm" in {
|
|||
// The builtin for "bar.sync 0" is called __syncthreads. Unlike most of the
|
||||
// intrinsics in this file, this one is a user-facing API.
|
||||
def int_nvvm_barrier0 : GCCBuiltin<"__syncthreads">,
|
||||
Intrinsic<[], [], [IntrConvergent]>;
|
||||
Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
|
||||
// Synchronize all threads in the CTA at barrier 'n'.
|
||||
def int_nvvm_barrier_n : GCCBuiltin<"__nvvm_bar_n">,
|
||||
Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>;
|
||||
Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
// Synchronize 'm', a multiple of warp size, (arg 2) threads in
|
||||
// the CTA at barrier 'n' (arg 1).
|
||||
def int_nvvm_barrier : GCCBuiltin<"__nvvm_bar">,
|
||||
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent]>;
|
||||
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_barrier0_popc : GCCBuiltin<"__nvvm_bar0_popc">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>;
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_barrier0_and : GCCBuiltin<"__nvvm_bar0_and">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>;
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>;
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>;
|
||||
|
||||
def int_nvvm_bar_sync :
|
||||
Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>,
|
||||
Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
|
||||
GCCBuiltin<"__nvvm_bar_sync">;
|
||||
def int_nvvm_bar_warp_sync :
|
||||
Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>,
|
||||
Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
|
||||
GCCBuiltin<"__nvvm_bar_warp_sync">;
|
||||
|
||||
// barrier.sync id[, cnt]
|
||||
def int_nvvm_barrier_sync :
|
||||
Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>,
|
||||
Intrinsic<[], [llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
|
||||
GCCBuiltin<"__nvvm_barrier_sync">;
|
||||
def int_nvvm_barrier_sync_cnt :
|
||||
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent]>,
|
||||
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
|
||||
GCCBuiltin<"__nvvm_barrier_sync_cnt">;
|
||||
|
||||
// Membar
|
||||
def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">,
|
||||
Intrinsic<[], [], []>;
|
||||
Intrinsic<[], [], [IntrNoCallback]>;
|
||||
def int_nvvm_membar_gl : GCCBuiltin<"__nvvm_membar_gl">,
|
||||
Intrinsic<[], [], []>;
|
||||
Intrinsic<[], [], [IntrNoCallback]>;
|
||||
def int_nvvm_membar_sys : GCCBuiltin<"__nvvm_membar_sys">,
|
||||
Intrinsic<[], [], []>;
|
||||
Intrinsic<[], [], [IntrNoCallback]>;
|
||||
|
||||
// Async Copy
|
||||
def int_nvvm_cp_async_mbarrier_arrive :
|
||||
GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive">,
|
||||
Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent]>;
|
||||
Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_cp_async_mbarrier_arrive_shared :
|
||||
GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive_shared">,
|
||||
Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent]>;
|
||||
Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_cp_async_mbarrier_arrive_noinc :
|
||||
GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc">,
|
||||
Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent]>;
|
||||
Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_cp_async_mbarrier_arrive_noinc_shared :
|
||||
GCCBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc_shared">,
|
||||
Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent]>;
|
||||
Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
|
||||
def int_nvvm_cp_async_ca_shared_global_4 :
|
||||
GCCBuiltin<"__nvvm_cp_async_ca_shared_global_4">,
|
||||
Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty],
|
||||
[IntrArgMemOnly, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
|
||||
[IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
|
||||
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
|
||||
"llvm.nvvm.cp.async.ca.shared.global.4">;
|
||||
def int_nvvm_cp_async_ca_shared_global_8 :
|
||||
GCCBuiltin<"__nvvm_cp_async_ca_shared_global_8">,
|
||||
Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty],
|
||||
[IntrArgMemOnly, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
|
||||
[IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
|
||||
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
|
||||
"llvm.nvvm.cp.async.ca.shared.global.8">;
|
||||
def int_nvvm_cp_async_ca_shared_global_16 :
|
||||
GCCBuiltin<"__nvvm_cp_async_ca_shared_global_16">,
|
||||
Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty],
|
||||
[IntrArgMemOnly, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
|
||||
[IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
|
||||
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
|
||||
"llvm.nvvm.cp.async.ca.shared.global.16">;
|
||||
def int_nvvm_cp_async_cg_shared_global_16 :
|
||||
GCCBuiltin<"__nvvm_cp_async_cg_shared_global_16">,
|
||||
Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty],
|
||||
[IntrArgMemOnly, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
|
||||
[IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
|
||||
WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
|
||||
"llvm.nvvm.cp.async.cg.shared.global.16">;
|
||||
|
||||
|
@ -1421,85 +1421,87 @@ def int_nvvm_cp_async_wait_all :
|
|||
|
||||
// mbarrier
|
||||
def int_nvvm_mbarrier_init : GCCBuiltin<"__nvvm_mbarrier_init">,
|
||||
Intrinsic<[],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>;
|
||||
Intrinsic<[],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_mbarrier_init_shared :
|
||||
GCCBuiltin<"__nvvm_mbarrier_init_shared">,
|
||||
Intrinsic<[],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>;
|
||||
Intrinsic<[],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
|
||||
def int_nvvm_mbarrier_inval : GCCBuiltin<"__nvvm_mbarrier_inval">,
|
||||
Intrinsic<[],[llvm_i64ptr_ty],
|
||||
[IntrConvergent, IntrWriteMem, IntrArgMemOnly,
|
||||
[IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
|
||||
WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
|
||||
def int_nvvm_mbarrier_inval_shared :
|
||||
GCCBuiltin<"__nvvm_mbarrier_inval_shared">,
|
||||
Intrinsic<[],[llvm_shared_i64ptr_ty],
|
||||
[IntrConvergent, IntrWriteMem, IntrArgMemOnly,
|
||||
[IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
|
||||
WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
|
||||
|
||||
def int_nvvm_mbarrier_arrive : GCCBuiltin<"__nvvm_mbarrier_arrive">,
|
||||
Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent]>;
|
||||
Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_mbarrier_arrive_shared :
|
||||
GCCBuiltin<"__nvvm_mbarrier_arrive_shared">,
|
||||
Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent]>;
|
||||
Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_mbarrier_arrive_noComplete :
|
||||
GCCBuiltin<"__nvvm_mbarrier_arrive_noComplete">,
|
||||
Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>;
|
||||
Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_mbarrier_arrive_noComplete_shared :
|
||||
GCCBuiltin<"__nvvm_mbarrier_arrive_noComplete_shared">,
|
||||
Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>;
|
||||
Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty,
|
||||
llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
|
||||
def int_nvvm_mbarrier_arrive_drop :
|
||||
GCCBuiltin<"__nvvm_mbarrier_arrive_drop">,
|
||||
Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent]>;
|
||||
Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_mbarrier_arrive_drop_shared :
|
||||
GCCBuiltin<"__nvvm_mbarrier_arrive_drop_shared">,
|
||||
Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent]>;
|
||||
Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_mbarrier_arrive_drop_noComplete :
|
||||
GCCBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete">,
|
||||
Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>;
|
||||
Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_mbarrier_arrive_drop_noComplete_shared :
|
||||
GCCBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete_shared">,
|
||||
Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent]>;
|
||||
Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty,
|
||||
llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
|
||||
def int_nvvm_mbarrier_test_wait :
|
||||
GCCBuiltin<"__nvvm_mbarrier_test_wait">,
|
||||
Intrinsic<[llvm_i1_ty],[llvm_i64ptr_ty, llvm_i64_ty],[IntrConvergent]>;
|
||||
Intrinsic<[llvm_i1_ty],[llvm_i64ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
def int_nvvm_mbarrier_test_wait_shared :
|
||||
GCCBuiltin<"__nvvm_mbarrier_test_wait_shared">,
|
||||
Intrinsic<[llvm_i1_ty],[llvm_shared_i64ptr_ty, llvm_i64_ty],[IntrConvergent]>;
|
||||
Intrinsic<[llvm_i1_ty],[llvm_shared_i64ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>;
|
||||
|
||||
def int_nvvm_mbarrier_pending_count :
|
||||
GCCBuiltin<"__nvvm_mbarrier_pending_count">,
|
||||
Intrinsic<[llvm_i32_ty],[llvm_i64_ty],[IntrNoMem, IntrConvergent]>;
|
||||
Intrinsic<[llvm_i32_ty],[llvm_i64_ty],[IntrNoMem, IntrConvergent, IntrNoCallback]>;
|
||||
|
||||
// Generated within nvvm. Use for ldu on sm_20 or later. Second arg is the
|
||||
// pointer's alignment.
|
||||
def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
|
||||
[IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>],
|
||||
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
|
||||
"llvm.nvvm.ldu.global.i">;
|
||||
def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
|
||||
[IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>],
|
||||
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
|
||||
"llvm.nvvm.ldu.global.f">;
|
||||
def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
|
||||
[IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>],
|
||||
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
|
||||
"llvm.nvvm.ldu.global.p">;
|
||||
|
||||
// Generated within nvvm. Use for ldg on sm_35 or later. Second arg is the
|
||||
// pointer's alignment.
|
||||
def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
|
||||
[IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>],
|
||||
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
|
||||
"llvm.nvvm.ldg.global.i">;
|
||||
def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
|
||||
[IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>],
|
||||
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
|
||||
"llvm.nvvm.ldg.global.f">;
|
||||
def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
|
||||
[LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
|
||||
[IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>],
|
||||
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
|
||||
"llvm.nvvm.ldg.global.p">;
|
||||
|
||||
// Use for generic pointers
|
||||
|
@ -1540,7 +1542,7 @@ def int_nvvm_ptr_gen_to_constant: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
|
|||
// This is for params that are passed to kernel functions by pointer by-val.
|
||||
def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
|
||||
[llvm_anyptr_ty],
|
||||
[IntrNoMem, IntrSpeculatable],
|
||||
[IntrNoMem, IntrSpeculatable, IntrNoCallback],
|
||||
"llvm.nvvm.ptr.gen.to.param">;
|
||||
|
||||
// Move intrinsics, used in nvvm internally
|
||||
|
@ -4353,13 +4355,13 @@ multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
|
|||
// FIXME: Enable this once v4i32 support is enabled in back-end.
|
||||
// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
|
||||
def _x : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">;
|
||||
def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
|
||||
def _y : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">;
|
||||
def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
|
||||
def _z : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">;
|
||||
def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
|
||||
def _w : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">;
|
||||
}
|
||||
|
||||
|
@ -4373,10 +4375,10 @@ class PTXReadSRegIntrinsic_r64<string name>
|
|||
// Intrinsics to read registers with non-constant values. E.g. the values that
|
||||
// do change over the kernel lifetime. Such reads should not be CSE'd.
|
||||
class PTXReadNCSRegIntrinsic_r32<string name>
|
||||
: Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>,
|
||||
: Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
|
||||
class PTXReadNCSRegIntrinsic_r64<string name>
|
||||
: Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly]>,
|
||||
: Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
|
||||
|
||||
defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
|
||||
|
@ -4426,12 +4428,14 @@ foreach sync = [false, true] in {
|
|||
if i.withGccBuiltin then {
|
||||
def i.Name : GCCBuiltin<i.Builtin>,
|
||||
Intrinsic<i.RetTy, i.ArgsTy,
|
||||
[IntrInaccessibleMemOnly, IntrConvergent],
|
||||
[IntrInaccessibleMemOnly, IntrConvergent,
|
||||
IntrNoCallback],
|
||||
i.IntrName>;
|
||||
}
|
||||
if i.withoutGccBuiltin then {
|
||||
def i.Name : Intrinsic<i.RetTy, i.ArgsTy,
|
||||
[IntrInaccessibleMemOnly, IntrConvergent], i.IntrName>;
|
||||
[IntrInaccessibleMemOnly, IntrConvergent,
|
||||
IntrNoCallback], i.IntrName>;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4446,22 +4450,22 @@ foreach sync = [false, true] in {
|
|||
// vote.all pred
|
||||
def int_nvvm_vote_all :
|
||||
Intrinsic<[llvm_i1_ty], [llvm_i1_ty],
|
||||
[IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.all">,
|
||||
[IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.all">,
|
||||
GCCBuiltin<"__nvvm_vote_all">;
|
||||
// vote.any pred
|
||||
def int_nvvm_vote_any :
|
||||
Intrinsic<[llvm_i1_ty], [llvm_i1_ty],
|
||||
[IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.any">,
|
||||
[IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.any">,
|
||||
GCCBuiltin<"__nvvm_vote_any">;
|
||||
// vote.uni pred
|
||||
def int_nvvm_vote_uni :
|
||||
Intrinsic<[llvm_i1_ty], [llvm_i1_ty],
|
||||
[IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.uni">,
|
||||
[IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.uni">,
|
||||
GCCBuiltin<"__nvvm_vote_uni">;
|
||||
// vote.ballot pred
|
||||
def int_nvvm_vote_ballot :
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i1_ty],
|
||||
[IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.ballot">,
|
||||
[IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.ballot">,
|
||||
GCCBuiltin<"__nvvm_vote_ballot">;
|
||||
|
||||
//
|
||||
|
@ -4471,22 +4475,22 @@ def int_nvvm_vote_ballot :
|
|||
// vote.sync.all mask, pred
|
||||
def int_nvvm_vote_all_sync :
|
||||
Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty],
|
||||
[IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.all.sync">,
|
||||
[IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.all.sync">,
|
||||
GCCBuiltin<"__nvvm_vote_all_sync">;
|
||||
// vote.sync.any mask, pred
|
||||
def int_nvvm_vote_any_sync :
|
||||
Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty],
|
||||
[IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.any.sync">,
|
||||
[IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.any.sync">,
|
||||
GCCBuiltin<"__nvvm_vote_any_sync">;
|
||||
// vote.sync.uni mask, pred
|
||||
def int_nvvm_vote_uni_sync :
|
||||
Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty],
|
||||
[IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.uni.sync">,
|
||||
[IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.uni.sync">,
|
||||
GCCBuiltin<"__nvvm_vote_uni_sync">;
|
||||
// vote.sync.ballot mask, pred
|
||||
def int_nvvm_vote_ballot_sync :
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i1_ty],
|
||||
[IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.vote.ballot.sync">,
|
||||
[IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.vote.ballot.sync">,
|
||||
GCCBuiltin<"__nvvm_vote_ballot_sync">;
|
||||
|
||||
//
|
||||
|
@ -4495,12 +4499,12 @@ def int_nvvm_vote_ballot_sync :
|
|||
// match.any.sync.b32 mask, value
|
||||
def int_nvvm_match_any_sync_i32 :
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.any.sync.i32">,
|
||||
[IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.any.sync.i32">,
|
||||
GCCBuiltin<"__nvvm_match_any_sync_i32">;
|
||||
// match.any.sync.b64 mask, value
|
||||
def int_nvvm_match_any_sync_i64 :
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
|
||||
[IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.any.sync.i64">,
|
||||
[IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.any.sync.i64">,
|
||||
GCCBuiltin<"__nvvm_match_any_sync_i64">;
|
||||
|
||||
// match.all instruction have two variants -- one returns a single value, another
|
||||
|
@ -4510,11 +4514,11 @@ def int_nvvm_match_any_sync_i64 :
|
|||
// match.all.sync.b32p mask, value
|
||||
def int_nvvm_match_all_sync_i32p :
|
||||
Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.all.sync.i32p">;
|
||||
[IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.all.sync.i32p">;
|
||||
// match.all.sync.b64p mask, value
|
||||
def int_nvvm_match_all_sync_i64p :
|
||||
Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty],
|
||||
[IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.all.sync.i64p">;
|
||||
[IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback], "llvm.nvvm.match.all.sync.i64p">;
|
||||
|
||||
//
|
||||
// REDUX.SYNC
|
||||
|
@ -4522,42 +4526,42 @@ def int_nvvm_match_all_sync_i64p :
|
|||
// redux.sync.min.u32 dst, src, membermask;
|
||||
def int_nvvm_redux_sync_umin : GCCBuiltin<"__nvvm_redux_sync_umin">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrConvergent, IntrInaccessibleMemOnly]>;
|
||||
[IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
|
||||
|
||||
// redux.sync.max.u32 dst, src, membermask;
|
||||
def int_nvvm_redux_sync_umax : GCCBuiltin<"__nvvm_redux_sync_umax">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrConvergent, IntrInaccessibleMemOnly]>;
|
||||
[IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
|
||||
|
||||
// redux.sync.add.s32 dst, src, membermask;
|
||||
def int_nvvm_redux_sync_add : GCCBuiltin<"__nvvm_redux_sync_add">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrConvergent, IntrInaccessibleMemOnly]>;
|
||||
[IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
|
||||
|
||||
// redux.sync.min.s32 dst, src, membermask;
|
||||
def int_nvvm_redux_sync_min : GCCBuiltin<"__nvvm_redux_sync_min">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrConvergent, IntrInaccessibleMemOnly]>;
|
||||
[IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
|
||||
|
||||
// redux.sync.max.s32 dst, src, membermask;
|
||||
def int_nvvm_redux_sync_max : GCCBuiltin<"__nvvm_redux_sync_max">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrConvergent, IntrInaccessibleMemOnly]>;
|
||||
[IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
|
||||
|
||||
// redux.sync.and.b32 dst, src, membermask;
|
||||
def int_nvvm_redux_sync_and : GCCBuiltin<"__nvvm_redux_sync_and">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrConvergent, IntrInaccessibleMemOnly]>;
|
||||
[IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
|
||||
|
||||
// redux.sync.xor.b32 dst, src, membermask;
|
||||
def int_nvvm_redux_sync_xor : GCCBuiltin<"__nvvm_redux_sync_xor">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrConvergent, IntrInaccessibleMemOnly]>;
|
||||
[IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
|
||||
|
||||
// redux.sync.or.b32 dst, src, membermask;
|
||||
def int_nvvm_redux_sync_or : GCCBuiltin<"__nvvm_redux_sync_or">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrConvergent, IntrInaccessibleMemOnly]>;
|
||||
[IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
|
||||
|
||||
//
|
||||
// WMMA instructions
|
||||
|
@ -4566,7 +4570,7 @@ def int_nvvm_redux_sync_or : GCCBuiltin<"__nvvm_redux_sync_or">,
|
|||
class NVVM_WMMA_LD<WMMA_REGS Frag, string Layout, int WithStride>
|
||||
: Intrinsic<Frag.regs,
|
||||
!if(WithStride, [llvm_anyptr_ty, llvm_i32_ty], [llvm_anyptr_ty]),
|
||||
[IntrReadMem, IntrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
|
||||
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
|
||||
WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr>;
|
||||
|
||||
// WMMA.STORE.D
|
||||
|
@ -4576,7 +4580,7 @@ class NVVM_WMMA_ST<WMMA_REGS Frag, string Layout, int WithStride>
|
|||
[llvm_anyptr_ty],
|
||||
Frag.regs,
|
||||
!if(WithStride, [llvm_i32_ty], [])),
|
||||
[IntrWriteMem, IntrArgMemOnly, WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
|
||||
[IntrWriteMem, IntrArgMemOnly, IntrNoCallback, WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>],
|
||||
WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.intr>;
|
||||
|
||||
// Create all load/store variants
|
||||
|
@ -4599,7 +4603,7 @@ class NVVM_WMMA_MMA<string ALayout, string BLayout, int Satfinite, string rnd, s
|
|||
WMMA_REGS C, WMMA_REGS D>
|
||||
: Intrinsic<D.regs,
|
||||
!listconcat(A.regs, B.regs, C.regs),
|
||||
[IntrNoMem],
|
||||
[IntrNoMem, IntrNoCallback],
|
||||
WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, A, B, C, D>.llvm>;
|
||||
|
||||
foreach layout_a = ["row", "col"] in {
|
||||
|
@ -4626,7 +4630,7 @@ class NVVM_MMA<string ALayout, string BLayout, int Satfinite, string b1op,
|
|||
WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D>
|
||||
: Intrinsic<D.regs,
|
||||
!listconcat(A.regs, B.regs, C.regs),
|
||||
[IntrNoMem],
|
||||
[IntrNoMem, IntrNoCallback],
|
||||
MMA_NAME<ALayout, BLayout, Satfinite, b1op, A, B, C, D>.llvm>;
|
||||
|
||||
foreach layout_a = ["row", "col"] in {
|
||||
|
@ -4647,7 +4651,7 @@ foreach layout_a = ["row", "col"] in {
|
|||
// LDMATRIX
|
||||
class NVVM_LDMATRIX<WMMA_REGS Frag, int Transposed>
|
||||
: Intrinsic<Frag.regs, [llvm_anyptr_ty],
|
||||
[IntrReadMem, IntrArgMemOnly, ReadOnly<ArgIndex<0>>,
|
||||
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, ReadOnly<ArgIndex<0>>,
|
||||
NoCapture<ArgIndex<0>>],
|
||||
LDMATRIX_NAME<Frag, Transposed>.intr>;
|
||||
|
||||
|
|
|
@ -246,7 +246,7 @@ define void @pos_multiple() {
|
|||
!13 = !{i32 7, !"openmp-device", i32 50}
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0:[0-9]+]] = { "llvm.assume"="ompx_aligned_barrier" }
|
||||
; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind }
|
||||
; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind }
|
||||
; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind willreturn }
|
||||
;.
|
||||
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
|
||||
|
|
|
@ -145,12 +145,12 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
|
|||
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true)
|
||||
; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR7:[0-9]+]]
|
||||
; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]]
|
||||
; CHECK-NEXT: call void @unknown_no_openmp()
|
||||
; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[X]] to i32*
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[X_ON_STACK]] to i8*
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[TMP0]]) #[[ATTR8:[0-9]+]]
|
||||
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR9:[0-9]+]]
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[TMP0]]) #[[ATTR7:[0-9]+]]
|
||||
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR8:[0-9]+]]
|
||||
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
|
@ -164,7 +164,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
|
|||
; CHECK: master1:
|
||||
; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*) to [4 x i32]*
|
||||
; CHECK-NEXT: [[A0:%.*]] = bitcast [4 x i32]* [[X_ON_STACK]] to i8*
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[A0]]) #[[ATTR8]]
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[A0]]) #[[ATTR7]]
|
||||
; CHECK-NEXT: br label [[NEXT:%.*]]
|
||||
; CHECK: next:
|
||||
; CHECK-NEXT: call void @unknown_no_openmp()
|
||||
|
@ -172,7 +172,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
|
|||
; CHECK: master2:
|
||||
; CHECK-NEXT: [[Y_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*) to [4 x i32]*
|
||||
; CHECK-NEXT: [[B1:%.*]] = bitcast [4 x i32]* [[Y_ON_STACK]] to i8*
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[B1]]) #[[ATTR8]]
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[B1]]) #[[ATTR7]]
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
|
||||
|
@ -186,11 +186,11 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
|
|||
; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1
|
||||
; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]]
|
||||
; CHECK: master3:
|
||||
; CHECK-NEXT: [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 24) #[[ATTR7]], !dbg [[DBG10:![0-9]+]]
|
||||
; CHECK-NEXT: [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG10:![0-9]+]]
|
||||
; CHECK-NEXT: [[Z_ON_STACK:%.*]] = bitcast i8* [[Z]] to [6 x i32]*
|
||||
; CHECK-NEXT: [[C1:%.*]] = bitcast [6 x i32]* [[Z_ON_STACK]] to i8*
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[C1]]) #[[ATTR8]]
|
||||
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR9]]
|
||||
; CHECK-NEXT: call void @use.internalized(i8* nofree [[C1]]) #[[ATTR7]]
|
||||
; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR8]]
|
||||
; CHECK-NEXT: br label [[EXIT]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true)
|
||||
|
@ -223,12 +223,11 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
|
|||
; CHECK: attributes #[[ATTR1]] = { nofree nounwind writeonly }
|
||||
; CHECK: attributes #[[ATTR2]] = { nosync nounwind readonly allocsize(0) }
|
||||
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind }
|
||||
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nounwind readnone speculatable }
|
||||
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn }
|
||||
; CHECK: attributes #[[ATTR6:[0-9]+]] = { "llvm.assume"="omp_no_openmp" }
|
||||
; CHECK: attributes #[[ATTR7]] = { nounwind readonly }
|
||||
; CHECK: attributes #[[ATTR8]] = { nounwind writeonly }
|
||||
; CHECK: attributes #[[ATTR9]] = { nounwind }
|
||||
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn }
|
||||
; CHECK: attributes #[[ATTR5:[0-9]+]] = { "llvm.assume"="omp_no_openmp" }
|
||||
; CHECK: attributes #[[ATTR6]] = { nounwind readonly }
|
||||
; CHECK: attributes #[[ATTR7]] = { nounwind writeonly }
|
||||
; CHECK: attributes #[[ATTR8]] = { nounwind }
|
||||
;.
|
||||
; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
|
||||
; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c")
|
||||
|
|
Loading…
Reference in New Issue