GlobalISel: Partially fix handling of byval arguments

This was essentially ignoring byval and treating them as a pointer
argument which needed to be loaded from. This should copy the frame
index value to the virtual register, not insert a load from the frame
index into the pointer value.

For AMDGPU, this was producing a load from the byval pointer argument,
to a pointer used for the byval arguments. I do not understand how
AArch64 managed to work before since it appears to be similarly
broken.

We could also change the ValueHandler API to avoid the extra copy from
the frame index, since currently it returns a new register.

I believe there is still an issue with outgoing byval arguments. These
should have a copy inserted in case the callee decided to overwrite
the memory.
This commit is contained in:
Matt Arsenault 2021-03-05 17:28:32 -05:00
parent d44a3dad99
commit 34471c3060
5 changed files with 105 additions and 96 deletions

View File

@ -627,7 +627,9 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
Register ArgReg = Args[i].Regs[Part];
// There should be Regs.size() ArgLocs per argument.
VA = ArgLocs[j + Part];
if (VA.isMemLoc()) {
const ISD::ArgFlagsTy Flags = Args[i].Flags[Part];
if (VA.isMemLoc() && !Flags.isByVal()) {
// Individual pieces may have been spilled to the stack and others
// passed in registers.
@ -643,7 +645,22 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
continue;
}
assert(VA.isRegLoc() && "custom loc should have been handled already");
if (VA.isMemLoc() && Flags.isByVal()) {
// FIXME: We should be inserting a memcpy from the source pointer to the
// result for outgoing byval parameters.
if (!Handler.isIncomingArgumentHandler())
continue;
MachinePointerInfo MPO;
Register StackAddr = Handler.getStackAddress(Flags.getByValSize(),
VA.getLocMemOffset(), MPO);
assert(Args[i].Regs.size() == 1 &&
"didn't expect split byval pointer");
MIRBuilder.buildCopy(Args[i].Regs[0], StackAddr);
continue;
}
assert(!VA.needsCustom() && "custom loc should have been handled already");
if (i == 0 && ThisReturnReg.isValid() &&
Handler.isIncomingArgumentHandler() &&

View File

@ -309,7 +309,7 @@ define void @test_byval(i8* byval(i8) %ptr) {
; DARWIN-LABEL: name: test_byval
; DARWIN: bb.1 (%ir-block.0):
; DARWIN: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; DARWIN: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 16)
; DARWIN: [[COPY:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0)
; DARWIN: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; DARWIN: BL @simple_fn, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp
; DARWIN: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
@ -317,7 +317,7 @@ define void @test_byval(i8* byval(i8) %ptr) {
; WINDOWS-LABEL: name: test_byval
; WINDOWS: bb.1 (%ir-block.0):
; WINDOWS: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; WINDOWS: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 16)
; WINDOWS: [[COPY:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0)
; WINDOWS: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; WINDOWS: BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
; WINDOWS: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp

View File

@ -49,12 +49,8 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; GCN: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C5]](s32)
; GCN: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GCN: $vgpr0 = COPY [[FRAME_INDEX1]](p5)
; GCN: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg
; GCN: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32)
; GCN: G_STORE [[FRAME_INDEX]](p5), [[PTR_ADD2]](p5) :: (store 4 into stack, align 16, addrspace 5)
; GCN: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>)
; GCN: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; GCN: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
; GCN: $sgpr6_sgpr7 = COPY [[COPY12]](p4)
; GCN: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
@ -65,9 +61,9 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; GCN: $vgpr31 = COPY [[OR1]](s32)
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GCN: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; GCN: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32)
; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32)
; GCN: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load 1 from %ir.out.gep02, addrspace 5)
; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (dereferenceable load 4 from %ir.out.gep1, addrspace 5)
; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (dereferenceable load 4 from %ir.out.gep1, addrspace 5)
; GCN: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1)
; GCN: G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: S_ENDPGM 0

View File

@ -3912,12 +3912,8 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32)
; CHECK: G_STORE [[FRAME_INDEX]](p5), [[PTR_ADD2]](p5) :: (store 4 into stack, align 16, addrspace 5)
; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
; CHECK: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; CHECK: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)

View File

@ -1727,19 +1727,19 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8,
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5)
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (dereferenceable load 1 from %ir.arg0, align 4, addrspace 5)
; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load 1 from %ir.arg0, align 4, addrspace 5)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[LOAD]], [[C]](s32)
; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load 4 from %ir.arg0 + 4, addrspace 5)
; CHECK: G_STORE [[LOAD1]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load 4 from %ir.arg0 + 4, addrspace 5)
; CHECK: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK: S_SETPC_B64_return [[COPY1]]
; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]]
%arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0
store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef
ret void
@ -1750,30 +1750,30 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5)
; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 8, addrspace 5)
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (volatile dereferenceable load 1 from %ir.arg0, align 4, addrspace 5)
; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load 1 from %ir.arg0, align 4, addrspace 5)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[LOAD]], [[C]](s32)
; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load 4 from %ir.arg0 + 4, addrspace 5)
; CHECK: [[LOAD4:%[0-9]+]]:_(s8) = G_LOAD [[LOAD1]](p5) :: (volatile dereferenceable load 1 from %ir.arg1, align 4, addrspace 5)
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[LOAD1]], [[C]](s32)
; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load 4 from %ir.arg1 + 4, addrspace 5)
; CHECK: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load 4 from %ir.arg0 + 4, addrspace 5)
; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load 1 from %ir.arg1, align 4, addrspace 5)
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32)
; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load 4 from %ir.arg1 + 4, addrspace 5)
; CHECK: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
; CHECK: G_STORE [[LOAD3]](s32), [[PTR_ADD2]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
; CHECK: G_STORE [[LOAD4]](s8), [[DEF]](p1) :: (volatile store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
; CHECK: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
; CHECK: G_STORE [[LOAD5]](s32), [[PTR_ADD3]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
; CHECK: G_STORE [[COPY]](s32), [[DEF1]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]]
; CHECK: G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
; CHECK: G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3)
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
; CHECK: S_SETPC_B64_return [[COPY4]]
%arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0
%arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1
store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef
@ -1787,18 +1787,18 @@ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5)
; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 8, addrspace 5)
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p5) :: (dereferenceable load 4 from %ir.arg0, addrspace 5)
; CHECK: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[LOAD1]](p5) :: (dereferenceable load 8 from %ir.arg1, addrspace 5)
; CHECK: G_STORE [[LOAD2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; CHECK: G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK: S_SETPC_B64_return [[COPY2]]
; CHECK: [[COPY3:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load 4 from %ir.arg0, addrspace 5)
; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load 8 from %ir.arg1, addrspace 5)
; CHECK: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; CHECK: G_STORE [[LOAD1]](s64), [[COPY3]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1)
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK: S_SETPC_B64_return [[COPY4]]
%arg0.load = load i32, i32 addrspace(5)* %arg0
%arg1.load = load i64, i64 addrspace(5)* %arg1
store i32 %arg0.load, i32 addrspace(1)* undef
@ -1818,18 +1818,18 @@ define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) %
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5)
; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5)
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C]](p1)
; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (dereferenceable load 1 from %ir.arg0, addrspace 5)
; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[LOAD1]](p5) :: (dereferenceable load 2 from %ir.arg1, addrspace 5)
; CHECK: G_STORE [[LOAD2]](s8), [[C]](p1) :: (store 1 into `i8 addrspace(1)* null`, addrspace 1)
; CHECK: G_STORE [[LOAD3]](s16), [[COPY1]](p1) :: (store 2 into `i16 addrspace(1)* null`, addrspace 1)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK: S_SETPC_B64_return [[COPY2]]
; CHECK: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1)
; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load 1 from %ir.arg0, addrspace 5)
; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load 2 from %ir.arg1, addrspace 5)
; CHECK: G_STORE [[LOAD]](s8), [[C]](p1) :: (store 1 into `i8 addrspace(1)* null`, addrspace 1)
; CHECK: G_STORE [[LOAD1]](s16), [[COPY3]](p1) :: (store 2 into `i16 addrspace(1)* null`, addrspace 1)
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK: S_SETPC_B64_return [[COPY4]]
%arg0.load = load i8, i8 addrspace(5)* %arg0
%arg1.load = load i16, i16 addrspace(5)* %arg1
store i8 %arg0.load, i8 addrspace(1)* null
@ -1850,30 +1850,30 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byva
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5)
; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5)
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C]](p1)
; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p5) :: (dereferenceable load 4 from %ir.arg0, addrspace 5)
; CHECK: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1)
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load 4 from %ir.arg0, addrspace 5)
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[LOAD]], [[C1]](s32)
; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load 4 from %ir.arg0 + 4, addrspace 5)
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load 4 from %ir.arg0 + 4, addrspace 5)
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[LOAD]], [[C2]](s32)
; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load 4 from %ir.arg0 + 8, addrspace 5)
; CHECK: [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[LOAD1]](p5) :: (dereferenceable load 2 from %ir.arg1, addrspace 5)
; CHECK: G_STORE [[LOAD2]](s32), [[C]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null`, addrspace 1)
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load 4 from %ir.arg0 + 8, addrspace 5)
; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load 2 from %ir.arg1, addrspace 5)
; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null`, addrspace 1)
; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C3]](s64)
; CHECK: G_STORE [[LOAD3]](s32), [[PTR_ADD2]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null` + 4, addrspace 1)
; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null` + 4, addrspace 1)
; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C4]](s64)
; CHECK: G_STORE [[LOAD4]](s32), [[PTR_ADD3]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null` + 8, addrspace 1)
; CHECK: G_STORE [[LOAD5]](s16), [[COPY1]](p1) :: (store 2 into `i16 addrspace(1)* null`, addrspace 1)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK: S_SETPC_B64_return [[COPY2]]
; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null` + 8, addrspace 1)
; CHECK: G_STORE [[LOAD3]](s16), [[COPY3]](p1) :: (store 2 into `i16 addrspace(1)* null`, addrspace 1)
; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK: S_SETPC_B64_return [[COPY4]]
%arg0.load = load [3 x i32], [3 x i32] addrspace(5)* %arg0
%arg1.load = load i16, i16 addrspace(5)* %arg1
store [3 x i32] %arg0.load, [3 x i32] addrspace(1)* null
@ -1929,15 +1929,15 @@ define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrs
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5)
; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 8, addrspace 5)
; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[COPY32:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
; CHECK: [[COPY33:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1)
; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[C]](p1)
; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store 4 into `i32 addrspace(1)* null`, addrspace 1)
; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[LOAD1]](p5) :: (dereferenceable load 1 from %ir.arg2, addrspace 5)
; CHECK: G_STORE [[LOAD2]](s8), [[COPY33]](p1) :: (store 1 into `i8 addrspace(1)* null`, addrspace 1)
; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]]
; CHECK: S_SETPC_B64_return [[COPY34]]
; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY32]](p5) :: (dereferenceable load 1 from %ir.arg2, addrspace 5)
; CHECK: G_STORE [[LOAD1]](s8), [[COPY34]](p1) :: (store 1 into `i8 addrspace(1)* null`, addrspace 1)
; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY33]]
; CHECK: S_SETPC_B64_return [[COPY35]]
store i32 %arg1, i32 addrspace(1)* null
%arg2.load = load i8, i8 addrspace(5)* %arg2
store i8 %arg2.load, i8 addrspace(1)* null
@ -1990,17 +1990,17 @@ define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* by
; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5)
; CHECK: [[COPY32:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5)
; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5)
; CHECK: [[COPY33:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1)
; CHECK: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store 4 into `i32 addrspace(1)* null`, addrspace 1)
; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (dereferenceable load 1 from %ir.arg1, addrspace 5)
; CHECK: G_STORE [[LOAD2]](s8), [[COPY33]](p1) :: (store 1 into `i8 addrspace(1)* null`, addrspace 1)
; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]]
; CHECK: S_SETPC_B64_return [[COPY34]]
; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[C]](p1)
; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store 4 into `i32 addrspace(1)* null`, addrspace 1)
; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY32]](p5) :: (dereferenceable load 1 from %ir.arg1, addrspace 5)
; CHECK: G_STORE [[LOAD1]](s8), [[COPY34]](p1) :: (store 1 into `i8 addrspace(1)* null`, addrspace 1)
; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY33]]
; CHECK: S_SETPC_B64_return [[COPY35]]
store i32 %arg2, i32 addrspace(1)* null
%arg1.load = load i8, i8 addrspace(5)* %arg1
store i8 %arg1.load, i8 addrspace(1)* null