GlobalISel: Preserve source value information for outgoing byval args

Pass through the original argument IR value in order to preserve the
aliasing information in the memcpy memory operands.
This commit is contained in:
Matt Arsenault 2021-03-14 10:26:31 -04:00
parent 61f834cc09
commit b9a0384983
6 changed files with 88 additions and 18 deletions

View File

@ -23,6 +23,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include <cstdint>
@ -38,7 +39,6 @@ class MachineIRBuilder;
struct MachinePointerInfo;
class MachineRegisterInfo;
class TargetLowering;
class Value;
class CallLowering {
const TargetLowering *TLI;
@ -65,10 +65,17 @@ public:
// if the argument was an incoming arg.
SmallVector<Register, 2> OrigRegs;
/// Optionally track the original IR value for the argument. This may not be
/// meaningful in all contexts. This should only be used on for forwarding
/// through to use for aliasing information in MachinePointerInfo for memory
/// arguments.
const Value *OrigValue = nullptr;
ArgInfo(ArrayRef<Register> Regs, Type *Ty,
ArrayRef<ISD::ArgFlagsTy> Flags = ArrayRef<ISD::ArgFlagsTy>(),
bool IsFixed = true)
: BaseArgInfo(Ty, Flags, IsFixed), Regs(Regs.begin(), Regs.end()) {
bool IsFixed = true, const Value *OrigValue = nullptr)
: BaseArgInfo(Ty, Flags, IsFixed), Regs(Regs.begin(), Regs.end()),
OrigValue(OrigValue) {
if (!Regs.empty() && Flags.empty())
this->Flags.push_back(ISD::ArgFlagsTy());
// FIXME: We should have just one way of saying "no register".
@ -77,6 +84,11 @@ public:
"only void types should have no register");
}
ArgInfo(ArrayRef<Register> Regs, const Value &OrigValue,
ArrayRef<ISD::ArgFlagsTy> Flags = ArrayRef<ISD::ArgFlagsTy>(),
bool IsFixed = true)
: ArgInfo(Regs, OrigValue.getType(), Flags, IsFixed, &OrigValue) {}
ArgInfo() : BaseArgInfo() {}
};

View File

@ -112,7 +112,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
unsigned i = 0;
unsigned NumFixedArgs = CB.getFunctionType()->getNumParams();
for (auto &Arg : CB.args()) {
ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i),
ArgInfo OrigArg{ArgRegs[i], *Arg.get(), getAttributesForArgIdx(CB, i),
i < NumFixedArgs};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB);
@ -204,7 +204,8 @@ void CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
// No splitting to do, but we want to replace the original type (e.g. [1 x
// double] -> double).
SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
OrigArg.Flags[0], OrigArg.IsFixed);
OrigArg.Flags[0], OrigArg.IsFixed,
OrigArg.OrigValue);
return;
}
@ -667,18 +668,19 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
Register StackAddr =
Handler.getStackAddress(MemSize, Offset, DstMPO, Flags);
const LLT PtrTy = MRI.getType(StackAddr);
// FIXME: We do not have access to the original IR value here to
// preserve the aliasing information.
MachinePointerInfo SrcMPO(PtrTy.getAddressSpace());
MachinePointerInfo SrcMPO(Args[i].OrigValue);
if (!Args[i].OrigValue) {
// We still need to accurately track the stack address space if we
// don't know the underlying value.
const LLT PtrTy = MRI.getType(StackAddr);
SrcMPO = MachinePointerInfo(PtrTy.getAddressSpace());
}
Align DstAlign = std::max(Flags.getNonZeroByValAlign(),
inferAlignFromPtrInfo(MF, DstMPO));
// TODO: Theoretically the source value could have a higher alignment,
// but we don't have that here
Align SrcAlign = Flags.getNonZeroByValAlign();
Align SrcAlign = std::max(Flags.getNonZeroByValAlign(),
inferAlignFromPtrInfo(MF, SrcMPO));
Handler.copyArgumentMemory(Args[i], StackAddr, Args[i].Regs[0],
DstMPO, DstAlign, SrcMPO, SrcAlign,

View File

@ -552,6 +552,11 @@ Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
MPO.Offset);
}
if (const Value *V = MPO.V.dyn_cast<const Value *>()) {
const Module *M = MF.getFunction().getParent();
return V->getPointerAlignment(M->getDataLayout());
}
return Align(1);
}

View File

@ -462,7 +462,7 @@ bool AArch64CallLowering::lowerFormalArguments(
if (DL.getTypeStoreSize(Arg.getType()).isZero())
continue;
ArgInfo OrigArg{VRegs[i], Arg.getType()};
ArgInfo OrigArg{VRegs[i], Arg};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv());

View File

@ -656,7 +656,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(
}
}
ArgInfo OrigArg(VRegs[Idx], Arg.getType());
ArgInfo OrigArg(VRegs[Idx], Arg);
const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
setArgFlags(OrigArg, OrigArgIdx, DL, F);

View File

@ -3916,7 +3916,7 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32)
; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store 8 into stack, align 4, addrspace 5), (dereferenceable load 8, align 4, addrspace 5)
; CHECK: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store 8 into stack, align 4, addrspace 5), (dereferenceable load 8 from %ir.val, align 4, addrspace 5)
; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@ -3971,11 +3971,11 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32)
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store 12 into stack, align 4, addrspace 5), (dereferenceable load 12, align 4, addrspace 5)
; CHECK: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store 12 into stack, align 4, addrspace 5), (dereferenceable load 12 from %ir.incoming0, align 4, addrspace 5)
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32)
; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK: G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store 1 into stack + 32, align 32, addrspace 5), (dereferenceable load 1, align 32, addrspace 5)
; CHECK: G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store 1 into stack + 32, align 32, addrspace 5), (dereferenceable load 1 from %ir.incoming1, align 32, addrspace 5)
; CHECK: $vgpr0 = COPY [[C]](s32)
; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
@ -3995,6 +3995,57 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming
ret void
}
declare void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %arg0) #0
; Make sure we are aware of the higher alignment of the incoming value
; than implied by the outgoing byval alignment in the memory operand.
define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* align 256 %incoming_high_align) #0 {
; CHECK-LABEL: name: call_byval_a4i64_align4_higher_source_align
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0
; CHECK: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4
; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
; CHECK: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[COPY18:%[0-9]+]]:_(p5) = COPY $sgpr32
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C]](s32)
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0 :: (dereferenceable store 32 into stack, align 4, addrspace 5), (dereferenceable load 32 from %ir.incoming_high_align, align 256, addrspace 5)
; CHECK: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
; CHECK: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
; CHECK: $sgpr8_sgpr9 = COPY [[COPY12]](p4)
; CHECK: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
; CHECK: $sgpr12 = COPY [[COPY14]](s32)
; CHECK: $sgpr13 = COPY [[COPY15]](s32)
; CHECK: $sgpr14 = COPY [[COPY16]](s32)
; CHECK: $vgpr31 = COPY [[COPY17]](s32)
; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK: ADJCALLSTACKDOWN 0, 32, implicit-def $scc
; CHECK: [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]]
; CHECK: S_SETPC_B64_return [[COPY20]]
call void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %incoming_high_align)
ret void
}
define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
; CHECK-LABEL: name: test_call_external_void_func_v2i8
; CHECK: bb.1 (%ir-block.0):