forked from OSchip/llvm-project
[NVPTX] Add MoveParam instruction for TargetExternalSymbol operand
TargetExternalSymbol is considered to be an immediate and not a register, so machine verifier emits an error: *** Bad machine code: Expected a register operand. *** - function: static_offset - basic block: %bb.0 bb (0x560e9b306028) - instruction: %3:int64regs = MoveParamI64 &static_offset_param_1 - operand 1: &static_offset_param_1 The patch adds variants of this instruction with an immediate operand for byval arguments on 64-bit and 32-bit targets. Differential Revision: https://reviews.llvm.org/D113006
This commit is contained in:
parent
3bc586b9aa
commit
0e70785538
|
@ -2247,8 +2247,18 @@ class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
|
|||
!strconcat("mov", asmstr, " \t$dst, $src;"),
|
||||
[(set regclass:$dst, (MoveParam regclass:$src))]>;
|
||||
|
||||
class MoveParamSymbolInst<NVPTXRegClass regclass, Operand srcty,
|
||||
string asmstr> :
|
||||
NVPTXInst<(outs regclass:$dst), (ins srcty:$src),
|
||||
!strconcat("mov", asmstr, " \t$dst, $src;"),
|
||||
[(set regclass:$dst, (MoveParam texternalsym:$src))]>;
|
||||
|
||||
def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
|
||||
def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
|
||||
|
||||
def MoveParamSymbolI64 : MoveParamSymbolInst<Int64Regs, i64imm, ".b64">;
|
||||
def MoveParamSymbolI32 : MoveParamSymbolInst<Int32Regs, i32imm, ".b32">;
|
||||
|
||||
def MoveParamI16 :
|
||||
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
|
||||
"cvt.u16.u32 \t$dst, $src;",
|
||||
|
|
|
@ -1,16 +1,21 @@
|
|||
; RUN: llc < %s -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
; RUN: llc < %s -mtriple nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK64
|
||||
; RUN: llc < %s -mtriple nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK32
|
||||
|
||||
%struct.ham = type { [4 x i32] }
|
||||
|
||||
; // Verify that load with static offset into parameter is done directly.
|
||||
; CHECK-LABEL: .visible .entry static_offset
|
||||
; CHECK-NOT: .local
|
||||
; CHECK: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0]
|
||||
; CHECK: mov.b64 %[[param_addr:rd[0-9]+]], {{.*}}_param_1
|
||||
; CHECK: mov.u64 %[[param_addr1:rd[0-9]+]], %[[param_addr]]
|
||||
; CHECK: cvta.to.global.u64 [[result_addr_g:%rd[0-9]+]], [[result_addr]]
|
||||
; CHECK64: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0]
|
||||
; CHECK64: mov.b64 %[[param_addr:rd[0-9]+]], {{.*}}_param_1
|
||||
; CHECK64: mov.u64 %[[param_addr1:rd[0-9]+]], %[[param_addr]]
|
||||
; CHECK64: cvta.to.global.u64 [[result_addr_g:%rd[0-9]+]], [[result_addr]]
|
||||
;
|
||||
; CHECK32: ld.param.u32 [[result_addr:%r[0-9]+]], [{{.*}}_param_0]
|
||||
; CHECK32: mov.b32 %[[param_addr:r[0-9]+]], {{.*}}_param_1
|
||||
; CHECK32: mov.u32 %[[param_addr1:r[0-9]+]], %[[param_addr]]
|
||||
; CHECK32: cvta.to.global.u32 [[result_addr_g:%r[0-9]+]], [[result_addr]]
|
||||
;
|
||||
; CHECK: ld.param.u32 [[value:%r[0-9]+]], [%[[param_addr1]]+12];
|
||||
; CHECK st.global.u32 [[[result_addr_g]]], [[value]];
|
||||
; Function Attrs: nofree norecurse nounwind willreturn mustprogress
|
||||
|
@ -32,11 +37,18 @@ bb6: ; preds = %bb3, %bb
|
|||
; // Verify that load with dynamic offset into parameter is also done directly.
|
||||
; CHECK-LABEL: .visible .entry dynamic_offset
|
||||
; CHECK-NOT: .local
|
||||
; CHECK: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0]
|
||||
; CHECK: mov.b64 %[[param_addr:rd[0-9]+]], {{.*}}_param_1
|
||||
; CHECK: mov.u64 %[[param_addr1:rd[0-9]+]], %[[param_addr]]
|
||||
; CHECK: cvta.to.global.u64 [[result_addr_g:%rd[0-9]+]], [[result_addr]]
|
||||
; CHECK: add.s64 %[[param_w_offset:rd[0-9]+]], %[[param_addr1]],
|
||||
; CHECK64: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0]
|
||||
; CHECK64: mov.b64 %[[param_addr:rd[0-9]+]], {{.*}}_param_1
|
||||
; CHECK64: mov.u64 %[[param_addr1:rd[0-9]+]], %[[param_addr]]
|
||||
; CHECK64: cvta.to.global.u64 [[result_addr_g:%rd[0-9]+]], [[result_addr]]
|
||||
; CHECK64: add.s64 %[[param_w_offset:rd[0-9]+]], %[[param_addr1]],
|
||||
;
|
||||
; CHECK32: ld.param.u32 [[result_addr:%r[0-9]+]], [{{.*}}_param_0]
|
||||
; CHECK32: mov.b32 %[[param_addr:r[0-9]+]], {{.*}}_param_1
|
||||
; CHECK32: mov.u32 %[[param_addr1:r[0-9]+]], %[[param_addr]]
|
||||
; CHECK32: cvta.to.global.u32 [[result_addr_g:%r[0-9]+]], [[result_addr]]
|
||||
; CHECK32: add.s32 %[[param_w_offset:r[0-9]+]], %[[param_addr1]],
|
||||
;
|
||||
; CHECK: ld.param.u32 [[value:%r[0-9]+]], [%[[param_w_offset]]];
|
||||
; CHECK st.global.u32 [[[result_addr_g]]], [[value]];
|
||||
|
||||
|
@ -53,11 +65,17 @@ bb:
|
|||
; Same as above, but with a bitcast present in the chain
|
||||
; CHECK-LABEL:.visible .entry gep_bitcast
|
||||
; CHECK-NOT: .local
|
||||
; CHECK-DAG: ld.param.u64 [[out:%rd[0-9]+]], [gep_bitcast_param_0]
|
||||
; CHECK-DAG: mov.b64 {{%rd[0-9]+}}, gep_bitcast_param_1
|
||||
; CHECK64-DAG: ld.param.u64 [[out:%rd[0-9]+]], [gep_bitcast_param_0]
|
||||
; CHECK64-DAG: mov.b64 {{%rd[0-9]+}}, gep_bitcast_param_1
|
||||
;
|
||||
; CHECK32-DAG: ld.param.u32 [[out:%r[0-9]+]], [gep_bitcast_param_0]
|
||||
; CHECK32-DAG: mov.b32 {{%r[0-9]+}}, gep_bitcast_param_1
|
||||
;
|
||||
; CHECK-DAG: ld.param.u32 {{%r[0-9]+}}, [gep_bitcast_param_2]
|
||||
; CHECK: ld.param.u8 [[value:%rs[0-9]+]], [{{%rd[0-9]+}}]
|
||||
; CHECK: st.global.u8 [{{%rd[0-9]+}}], [[value]];
|
||||
; CHECK64: ld.param.u8 [[value:%rs[0-9]+]], [{{%rd[0-9]+}}]
|
||||
; CHECK64: st.global.u8 [{{%rd[0-9]+}}], [[value]];
|
||||
; CHECK32: ld.param.u8 [[value:%rs[0-9]+]], [{{%r[0-9]+}}]
|
||||
; CHECK32: st.global.u8 [{{%r[0-9]+}}], [[value]];
|
||||
;
|
||||
; Function Attrs: nofree norecurse nounwind willreturn mustprogress
|
||||
define dso_local void @gep_bitcast(i8* nocapture %out, %struct.ham* nocapture readonly byval(%struct.ham) align 4 %in, i32 %n) local_unnamed_addr #0 {
|
||||
|
@ -73,11 +91,17 @@ bb:
|
|||
; Same as above, but with an ASC(101) present in the chain
|
||||
; CHECK-LABEL:.visible .entry gep_bitcast_asc
|
||||
; CHECK-NOT: .local
|
||||
; CHECK-DAG: ld.param.u64 [[out:%rd[0-9]+]], [gep_bitcast_asc_param_0]
|
||||
; CHECK-DAG: mov.b64 {{%rd[0-9]+}}, gep_bitcast_asc_param_1
|
||||
; CHECK64-DAG: ld.param.u64 [[out:%rd[0-9]+]], [gep_bitcast_asc_param_0]
|
||||
; CHECK64-DAG: mov.b64 {{%rd[0-9]+}}, gep_bitcast_asc_param_1
|
||||
;
|
||||
; CHECK32-DAG: ld.param.u32 [[out:%r[0-9]+]], [gep_bitcast_asc_param_0]
|
||||
; CHECK32-DAG: mov.b32 {{%r[0-9]+}}, gep_bitcast_asc_param_1
|
||||
;
|
||||
; CHECK-DAG: ld.param.u32 {{%r[0-9]+}}, [gep_bitcast_asc_param_2]
|
||||
; CHECK: ld.param.u8 [[value:%rs[0-9]+]], [{{%rd[0-9]+}}]
|
||||
; CHECK: st.global.u8 [{{%rd[0-9]+}}], [[value]];
|
||||
; CHECK64: ld.param.u8 [[value:%rs[0-9]+]], [{{%rd[0-9]+}}]
|
||||
; CHECK64: st.global.u8 [{{%rd[0-9]+}}], [[value]];
|
||||
; CHECK32: ld.param.u8 [[value:%rs[0-9]+]], [{{%r[0-9]+}}]
|
||||
; CHECK32: st.global.u8 [{{%r[0-9]+}}], [[value]];
|
||||
;
|
||||
; Function Attrs: nofree norecurse nounwind willreturn mustprogress
|
||||
define dso_local void @gep_bitcast_asc(i8* nocapture %out, %struct.ham* nocapture readonly byval(%struct.ham) align 4 %in, i32 %n) local_unnamed_addr #0 {
|
||||
|
@ -95,8 +119,10 @@ bb:
|
|||
; Verify that if the pointer escapes, then we do fall back onto using a temp copy.
|
||||
; CHECK-LABEL: .visible .entry pointer_escapes
|
||||
; CHECK: .local .align 8 .b8 __local_depot{{.*}}
|
||||
; CHECK: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0]
|
||||
; CHECK: add.u64 %[[copy_addr:rd[0-9]+]], %SPL, 0;
|
||||
; CHECK64: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0]
|
||||
; CHECK64: add.u64 %[[copy_addr:rd[0-9]+]], %SPL, 0;
|
||||
; CHECK32: ld.param.u32 [[result_addr:%r[0-9]+]], [{{.*}}_param_0]
|
||||
; CHECK32: add.u32 %[[copy_addr:r[0-9]+]], %SPL, 0;
|
||||
; CHECK-DAG: ld.param.u32 %{{.*}}, [pointer_escapes_param_1+12];
|
||||
; CHECK-DAG: ld.param.u32 %{{.*}}, [pointer_escapes_param_1+8];
|
||||
; CHECK-DAG: ld.param.u32 %{{.*}}, [pointer_escapes_param_1+4];
|
||||
|
@ -105,8 +131,10 @@ bb:
|
|||
; CHECK-DAG: st.local.u32 [%[[copy_addr]]+8],
|
||||
; CHECK-DAG: st.local.u32 [%[[copy_addr]]+4],
|
||||
; CHECK-DAG: st.local.u32 [%[[copy_addr]]],
|
||||
; CHECK: cvta.to.global.u64 [[result_addr_g:%rd[0-9]+]], [[result_addr]]
|
||||
; CHECK: add.s64 %[[copy_w_offset:rd[0-9]+]], %[[copy_addr]],
|
||||
; CHECK64: cvta.to.global.u64 [[result_addr_g:%rd[0-9]+]], [[result_addr]]
|
||||
; CHECK64: add.s64 %[[copy_w_offset:rd[0-9]+]], %[[copy_addr]],
|
||||
; CHECK32: cvta.to.global.u32 [[result_addr_g:%r[0-9]+]], [[result_addr]]
|
||||
; CHECK32: add.s32 %[[copy_w_offset:r[0-9]+]], %[[copy_addr]],
|
||||
; CHECK: ld.local.u32 [[value:%r[0-9]+]], [%[[copy_w_offset]]];
|
||||
; CHECK st.global.u32 [[[result_addr_g]]], [[value]];
|
||||
|
||||
|
|
Loading…
Reference in New Issue