forked from OSchip/llvm-project
GlobalISel: Preserve source value information for outgoing byval args
Pass through the original argument IR value in order to preserve the aliasing information in the memcpy memory operands.
This commit is contained in:
parent
61f834cc09
commit
b9a0384983
|
@ -23,6 +23,7 @@
|
|||
#include "llvm/IR/Attributes.h"
|
||||
#include "llvm/IR/CallingConv.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/MachineValueType.h"
|
||||
#include <cstdint>
|
||||
|
@ -38,7 +39,6 @@ class MachineIRBuilder;
|
|||
struct MachinePointerInfo;
|
||||
class MachineRegisterInfo;
|
||||
class TargetLowering;
|
||||
class Value;
|
||||
|
||||
class CallLowering {
|
||||
const TargetLowering *TLI;
|
||||
|
@ -65,10 +65,17 @@ public:
|
|||
// if the argument was an incoming arg.
|
||||
SmallVector<Register, 2> OrigRegs;
|
||||
|
||||
/// Optionally track the original IR value for the argument. This may not be
|
||||
/// meaningful in all contexts. This should only be used on for forwarding
|
||||
/// through to use for aliasing information in MachinePointerInfo for memory
|
||||
/// arguments.
|
||||
const Value *OrigValue = nullptr;
|
||||
|
||||
ArgInfo(ArrayRef<Register> Regs, Type *Ty,
|
||||
ArrayRef<ISD::ArgFlagsTy> Flags = ArrayRef<ISD::ArgFlagsTy>(),
|
||||
bool IsFixed = true)
|
||||
: BaseArgInfo(Ty, Flags, IsFixed), Regs(Regs.begin(), Regs.end()) {
|
||||
bool IsFixed = true, const Value *OrigValue = nullptr)
|
||||
: BaseArgInfo(Ty, Flags, IsFixed), Regs(Regs.begin(), Regs.end()),
|
||||
OrigValue(OrigValue) {
|
||||
if (!Regs.empty() && Flags.empty())
|
||||
this->Flags.push_back(ISD::ArgFlagsTy());
|
||||
// FIXME: We should have just one way of saying "no register".
|
||||
|
@ -77,6 +84,11 @@ public:
|
|||
"only void types should have no register");
|
||||
}
|
||||
|
||||
ArgInfo(ArrayRef<Register> Regs, const Value &OrigValue,
|
||||
ArrayRef<ISD::ArgFlagsTy> Flags = ArrayRef<ISD::ArgFlagsTy>(),
|
||||
bool IsFixed = true)
|
||||
: ArgInfo(Regs, OrigValue.getType(), Flags, IsFixed, &OrigValue) {}
|
||||
|
||||
ArgInfo() : BaseArgInfo() {}
|
||||
};
|
||||
|
||||
|
|
|
@ -112,7 +112,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
|
|||
unsigned i = 0;
|
||||
unsigned NumFixedArgs = CB.getFunctionType()->getNumParams();
|
||||
for (auto &Arg : CB.args()) {
|
||||
ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i),
|
||||
ArgInfo OrigArg{ArgRegs[i], *Arg.get(), getAttributesForArgIdx(CB, i),
|
||||
i < NumFixedArgs};
|
||||
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB);
|
||||
|
||||
|
@ -204,7 +204,8 @@ void CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
|
|||
// No splitting to do, but we want to replace the original type (e.g. [1 x
|
||||
// double] -> double).
|
||||
SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
|
||||
OrigArg.Flags[0], OrigArg.IsFixed);
|
||||
OrigArg.Flags[0], OrigArg.IsFixed,
|
||||
OrigArg.OrigValue);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -667,18 +668,19 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
|
|||
Register StackAddr =
|
||||
Handler.getStackAddress(MemSize, Offset, DstMPO, Flags);
|
||||
|
||||
const LLT PtrTy = MRI.getType(StackAddr);
|
||||
|
||||
// FIXME: We do not have access to the original IR value here to
|
||||
// preserve the aliasing information.
|
||||
MachinePointerInfo SrcMPO(PtrTy.getAddressSpace());
|
||||
MachinePointerInfo SrcMPO(Args[i].OrigValue);
|
||||
if (!Args[i].OrigValue) {
|
||||
// We still need to accurately track the stack address space if we
|
||||
// don't know the underlying value.
|
||||
const LLT PtrTy = MRI.getType(StackAddr);
|
||||
SrcMPO = MachinePointerInfo(PtrTy.getAddressSpace());
|
||||
}
|
||||
|
||||
Align DstAlign = std::max(Flags.getNonZeroByValAlign(),
|
||||
inferAlignFromPtrInfo(MF, DstMPO));
|
||||
|
||||
// TODO: Theoretically the source value could have a higher alignment,
|
||||
// but we don't have that here
|
||||
Align SrcAlign = Flags.getNonZeroByValAlign();
|
||||
Align SrcAlign = std::max(Flags.getNonZeroByValAlign(),
|
||||
inferAlignFromPtrInfo(MF, SrcMPO));
|
||||
|
||||
Handler.copyArgumentMemory(Args[i], StackAddr, Args[i].Regs[0],
|
||||
DstMPO, DstAlign, SrcMPO, SrcAlign,
|
||||
|
|
|
@ -552,6 +552,11 @@ Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
|
|||
MPO.Offset);
|
||||
}
|
||||
|
||||
if (const Value *V = MPO.V.dyn_cast<const Value *>()) {
|
||||
const Module *M = MF.getFunction().getParent();
|
||||
return V->getPointerAlignment(M->getDataLayout());
|
||||
}
|
||||
|
||||
return Align(1);
|
||||
}
|
||||
|
||||
|
|
|
@ -462,7 +462,7 @@ bool AArch64CallLowering::lowerFormalArguments(
|
|||
if (DL.getTypeStoreSize(Arg.getType()).isZero())
|
||||
continue;
|
||||
|
||||
ArgInfo OrigArg{VRegs[i], Arg.getType()};
|
||||
ArgInfo OrigArg{VRegs[i], Arg};
|
||||
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
|
||||
|
||||
splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv());
|
||||
|
|
|
@ -656,7 +656,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(
|
|||
}
|
||||
}
|
||||
|
||||
ArgInfo OrigArg(VRegs[Idx], Arg.getType());
|
||||
ArgInfo OrigArg(VRegs[Idx], Arg);
|
||||
const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
|
||||
setArgFlags(OrigArg, OrigArgIdx, DL, F);
|
||||
|
||||
|
|
|
@ -3916,7 +3916,7 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
|
|||
; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32)
|
||||
; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
|
||||
; CHECK: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store 8 into stack, align 4, addrspace 5), (dereferenceable load 8, align 4, addrspace 5)
|
||||
; CHECK: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store 8 into stack, align 4, addrspace 5), (dereferenceable load 8 from %ir.val, align 4, addrspace 5)
|
||||
; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
|
||||
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
|
||||
; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
|
||||
|
@ -3971,11 +3971,11 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming
|
|||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32)
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
|
||||
; CHECK: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store 12 into stack, align 4, addrspace 5), (dereferenceable load 12, align 4, addrspace 5)
|
||||
; CHECK: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store 12 into stack, align 4, addrspace 5), (dereferenceable load 12 from %ir.incoming0, align 4, addrspace 5)
|
||||
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
|
||||
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32)
|
||||
; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
|
||||
; CHECK: G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store 1 into stack + 32, align 32, addrspace 5), (dereferenceable load 1, align 32, addrspace 5)
|
||||
; CHECK: G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store 1 into stack + 32, align 32, addrspace 5), (dereferenceable load 1 from %ir.incoming1, align 32, addrspace 5)
|
||||
; CHECK: $vgpr0 = COPY [[C]](s32)
|
||||
; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
|
||||
|
@ -3995,6 +3995,57 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming
|
|||
ret void
|
||||
}
|
||||
|
||||
declare void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %arg0) #0
|
||||
|
||||
; Make sure we are aware of the higher alignment of the incoming value
|
||||
; than implied by the outgoing byval alignment in the memory operand.
|
||||
define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* align 256 %incoming_high_align) #0 {
|
||||
; CHECK-LABEL: name: call_byval_a4i64_align4_higher_source_align
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
|
||||
; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
|
||||
; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
|
||||
; CHECK: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0
|
||||
; CHECK: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
|
||||
; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4
|
||||
; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
|
||||
; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
|
||||
; CHECK: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
|
||||
; CHECK: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
|
||||
; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
|
||||
; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
|
||||
; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
|
||||
; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
|
||||
; CHECK: [[COPY18:%[0-9]+]]:_(p5) = COPY $sgpr32
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C]](s32)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
|
||||
; CHECK: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0 :: (dereferenceable store 32 into stack, align 4, addrspace 5), (dereferenceable load 32 from %ir.incoming_high_align, align 256, addrspace 5)
|
||||
; CHECK: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
|
||||
; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
|
||||
; CHECK: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
|
||||
; CHECK: $sgpr8_sgpr9 = COPY [[COPY12]](p4)
|
||||
; CHECK: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
|
||||
; CHECK: $sgpr12 = COPY [[COPY14]](s32)
|
||||
; CHECK: $sgpr13 = COPY [[COPY15]](s32)
|
||||
; CHECK: $sgpr14 = COPY [[COPY16]](s32)
|
||||
; CHECK: $vgpr31 = COPY [[COPY17]](s32)
|
||||
; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
|
||||
; CHECK: ADJCALLSTACKDOWN 0, 32, implicit-def $scc
|
||||
; CHECK: [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]]
|
||||
; CHECK: S_SETPC_B64_return [[COPY20]]
|
||||
call void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %incoming_high_align)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
|
||||
; CHECK-LABEL: name: test_call_external_void_func_v2i8
|
||||
; CHECK: bb.1 (%ir-block.0):
|
||||
|
|
Loading…
Reference in New Issue