Reland [X86] Codegen for preallocated

See https://reviews.llvm.org/D74651 for the preallocated IR constructs
and LangRef changes.

In X86TargetLowering::LowerCall(), if a call is preallocated, record
each argument's offset from the stack pointer and the total stack
adjustment. Associate the call Value with an integer index. Store the
info in X86MachineFunctionInfo with the integer index as the key.

This adds two new target independent ISDOpcodes and two new target
dependent Opcodes corresponding to @llvm.call.preallocated.{setup,arg}.

The setup ISelDAG node takes in a chain and outputs a chain and a
SrcValue of the preallocated call Value. It is lowered to a target
dependent node with the SrcValue replaced with the integer index key by
looking in X86MachineFunctionInfo. In
X86TargetLowering::EmitInstrWithCustomInserter() this is lowered to an
%esp adjustment, the exact amount determined by looking in
X86MachineFunctionInfo with the integer index key.

The arg ISelDAG node takes in a chain, a SrcValue of the preallocated
call Value, and the arg index int constant. It produces a chain and the
pointer fo the arg. It is lowered to a target dependent node with the
SrcValue replaced with the integer index key by looking in
X86MachineFunctionInfo. In
X86TargetLowering::EmitInstrWithCustomInserter() this is lowered to a
lea of the stack pointer plus an offset determined by looking in
X86MachineFunctionInfo with the integer index key.

Force any function containing a preallocated call to use the frame
pointer.

Does not yet handle a setup without a call, or a conditional call.
Does not yet handle musttail. That requires a LangRef change first.

Tried to look at all references to inalloca and see if they apply to
preallocated. I've made preallocated versions of tests testing inalloca
whenever possible and when they make sense (e.g. not alloca related,
inalloca edge cases).

Aside from the tests added here, I checked that this codegen produces
correct code for something like

```
struct A {
        A();
        A(A&&);
        ~A();
};

void bar() {
        foo(foo(foo(foo(foo(A(), 4), 5), 6), 7), 8);
}
```

by replacing the inalloca version of the .ll file with the appropriate
preallocated code. Running the executable produces the same results as
using the current inalloca implementation.

Reverted due to unexpectedly passing tests, added REQUIRES: asserts for reland.

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77689
This commit is contained in:
Arthur Eubanks 2020-03-16 12:32:36 -07:00
parent bfb2783726
commit 8a88755610
46 changed files with 696 additions and 49 deletions

View File

@ -906,6 +906,13 @@ enum NodeType {
VAEND,
VASTART,
// PREALLOCATED_SETUP - This has 2 operands: an input chain and a SRCVALUE
// with the preallocated call Value.
PREALLOCATED_SETUP,
// PREALLOCATED_ARG - This has 3 operands: an input chain, a SRCVALUE
// with the preallocated call Value, and a constant int.
PREALLOCATED_ARG,
/// SRCVALUE - This is a node type that holds a Value* that is used to
/// make reference to a value in the LLVM IR.
SRCVALUE,

View File

@ -35,6 +35,7 @@ namespace ISD {
unsigned IsReturned : 1; ///< Always returned
unsigned IsSplit : 1;
unsigned IsInAlloca : 1; ///< Passed with inalloca
unsigned IsPreallocated : 1; ///< ByVal without the copy
unsigned IsSplitEnd : 1; ///< Last part of a split
unsigned IsSwiftSelf : 1; ///< Swift self parameter
unsigned IsSwiftError : 1; ///< Swift error parameter
@ -56,9 +57,9 @@ namespace ISD {
public:
ArgFlagsTy()
: IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsNest(0),
IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0),
IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0), IsHva(0),
IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
IsReturned(0), IsSplit(0), IsInAlloca(0), IsPreallocated(0),
IsSplitEnd(0), IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0),
IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
IsCopyElisionCandidate(0), IsPointer(0), ByValSize(0),
PointerAddrSpace(0) {
@ -83,6 +84,9 @@ namespace ISD {
bool isInAlloca() const { return IsInAlloca; }
void setInAlloca() { IsInAlloca = 1; }
bool isPreallocated() const { return IsPreallocated; }
void setPreallocated() { IsPreallocated = 1; }
bool isSwiftSelf() const { return IsSwiftSelf; }
void setSwiftSelf() { IsSwiftSelf = 1; }

View File

@ -273,17 +273,20 @@ public:
bool IsNest : 1;
bool IsByVal : 1;
bool IsInAlloca : 1;
bool IsPreallocated : 1;
bool IsReturned : 1;
bool IsSwiftSelf : 1;
bool IsSwiftError : 1;
bool IsCFGuardTarget : 1;
MaybeAlign Alignment = None;
Type *ByValType = nullptr;
Type *PreallocatedType = nullptr;
ArgListEntry()
: IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false) {}
IsNest(false), IsByVal(false), IsInAlloca(false),
IsPreallocated(false), IsReturned(false), IsSwiftSelf(false),
IsSwiftError(false), IsCFGuardTarget(false) {}
void setAttributes(const CallBase *Call, unsigned ArgIdx);
};
@ -3608,6 +3611,7 @@ public:
bool IsReturnValueUsed : 1;
bool IsConvergent : 1;
bool IsPatchPoint : 1;
bool IsPreallocated : 1;
// IsTailCall should be modified by implementations of
// TargetLowering::LowerCall that perform tail call conversions.
@ -3631,7 +3635,7 @@ public:
CallLoweringInfo(SelectionDAG &DAG)
: RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false),
IsPatchPoint(false), DAG(DAG) {}
IsPatchPoint(false), IsPreallocated(false), DAG(DAG) {}
CallLoweringInfo &setDebugLoc(const SDLoc &dl) {
DL = dl;
@ -3737,6 +3741,11 @@ public:
return *this;
}
CallLoweringInfo &setIsPreallocated(bool Value = true) {
IsPreallocated = Value;
return *this;
}
CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) {
IsPostTypeLegalization = Value;
return *this;

View File

@ -110,6 +110,9 @@ public:
/// Return true if this argument has the inalloca attribute.
bool hasInAllocaAttr() const;
/// Return true if this argument has the preallocated attribute.
bool hasPreallocatedAttr() const;
/// Return true if this argument has the zext attribute.
bool hasZExtAttr() const;

View File

@ -623,6 +623,9 @@ public:
/// Return the byval type for the specified function parameter.
Type *getParamByValType(unsigned ArgNo) const;
/// Return the preallocated type for the specified function parameter.
Type *getParamPreallocatedType(unsigned ArgNo) const;
/// Get the stack alignment.
MaybeAlign getStackAlignment(unsigned Index) const;

View File

@ -1604,6 +1604,12 @@ public:
return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType();
}
/// Extract the preallocated type for a call or parameter.
Type *getParamPreallocatedType(unsigned ArgNo) const {
Type *Ty = Attrs.getParamPreallocatedType(ArgNo);
return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType();
}
/// Extract the number of dereferenceable bytes for a call or
/// parameter (0=unknown).
uint64_t getDereferenceableBytes(unsigned i) const {

View File

@ -127,6 +127,12 @@ HANDLE_TARGET_OPCODE(PATCHPOINT)
/// additionally expand this pseudo after register allocation.
HANDLE_TARGET_OPCODE(LOAD_STACK_GUARD)
/// These are used to support call sites that must have the stack adjusted
/// before the call (e.g. to initialize an argument passed by value).
/// See llvm.call.preallocated.{setup,arg} in the LangRef for more details.
HANDLE_TARGET_OPCODE(PREALLOCATED_SETUP)
HANDLE_TARGET_OPCODE(PREALLOCATED_ARG)
/// Call instruction with associated vm state for deoptimization and list
/// of live pointers for relocation by the garbage collector. It is
/// intended to support garbage collection with fully precise relocating

View File

@ -1173,6 +1173,18 @@ def LOAD_STACK_GUARD : StandardPseudoInstruction {
let hasSideEffects = 0;
bit isPseudo = 1;
}
def PREALLOCATED_SETUP : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins i32imm:$a);
let usesCustomInserter = 1;
let hasSideEffects = 1;
}
def PREALLOCATED_ARG : StandardPseudoInstruction {
let OutOperandList = (outs ptr_rc:$loc);
let InOperandList = (ins i32imm:$a, i32imm:$b);
let usesCustomInserter = 1;
let hasSideEffects = 1;
}
def LOCAL_ESCAPE : StandardPseudoInstruction {
// This instruction is really just a label. It has to be part of the chain so
// that it doesn't get dropped from the DAG, but it produces nothing and has

View File

@ -41,6 +41,11 @@ class CCIf<string predicate, CCAction A> : CCPredicateAction<A> {
class CCIfByVal<CCAction A> : CCIf<"ArgFlags.isByVal()", A> {
}
/// CCIfPreallocated - If the current argument has Preallocated parameter attribute,
/// apply Action A.
class CCIfPreallocated<CCAction A> : CCIf<"ArgFlags.isPreallocated()", A> {
}
/// CCIfSwiftSelf - If the current argument has swiftself parameter attribute,
/// apply Action A.
class CCIfSwiftSelf<CCAction A> : CCIf<"ArgFlags.isSwiftSelf()", A> {

View File

@ -96,10 +96,12 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
Flags.setSwiftError();
if (Attrs.hasAttribute(OpIdx, Attribute::ByVal))
Flags.setByVal();
if (Attrs.hasAttribute(OpIdx, Attribute::Preallocated))
Flags.setPreallocated();
if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca))
Flags.setInAlloca();
if (Flags.isByVal() || Flags.isInAlloca()) {
if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();

View File

@ -1214,7 +1214,16 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
// the various CC lowering callbacks.
Flags.setByVal();
}
if (Arg.IsByVal || Arg.IsInAlloca) {
if (Arg.IsPreallocated) {
Flags.setPreallocated();
// Set the byval flag for CCAssignFn callbacks that don't know about
// preallocated. This way we can know how many bytes we should've
// allocated and how many bytes a callee cleanup function will pop. If we
// port preallocated to more targets, we'll have to add custom
// preallocated handling in the various CC lowering callbacks.
Flags.setByVal();
}
if (Arg.IsByVal || Arg.IsInAlloca || Arg.IsPreallocated) {
PointerType *Ty = cast<PointerType>(Arg.Ty);
Type *ElementTy = Ty->getElementType();
unsigned FrameSize =

View File

@ -1873,9 +1873,6 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
}
SDValue SelectionDAG::getSrcValue(const Value *V) {
assert((!V || V->getType()->isPointerTy()) &&
"SrcValue is not a pointer?");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None);
ID.AddPointer(V);

View File

@ -5606,6 +5606,23 @@ void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
LowerCallTo(I, Callee, I.isTailCall());
}
/// Given a @llvm.call.preallocated.setup, return the corresponding
/// preallocated call.
static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
assert(cast<CallBase>(PreallocatedSetup)
->getCalledFunction()
->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
"expected call_preallocated_setup Value");
for (auto *U : PreallocatedSetup->users()) {
auto *UseCall = cast<CallBase>(U);
const Function *Fn = UseCall->getCalledFunction();
if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
return UseCall;
}
}
llvm_unreachable("expected corresponding call to preallocated setup/arg");
}
/// Lower the call to the specified intrinsic function.
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned Intrinsic) {
@ -5798,6 +5815,30 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
updateDAGForMaybeTailCall(MC);
return;
}
case Intrinsic::call_preallocated_setup: {
const CallBase *PreallocatedCall = FindPreallocatedCall(&I);
SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other,
getRoot(), SrcValue);
setValue(&I, Res);
DAG.setRoot(Res);
return;
}
case Intrinsic::call_preallocated_arg: {
const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0));
SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
SDValue Ops[3];
Ops[0] = getRoot();
Ops[1] = SrcValue;
Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
MVT::i32); // arg index
SDValue Res = DAG.getNode(
ISD::PREALLOCATED_ARG, sdl,
DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
}
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
const auto &DI = cast<DbgVariableIntrinsic>(I);
@ -7116,7 +7157,9 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
.setChain(getRoot())
.setCallee(RetTy, FTy, Callee, std::move(Args), CB)
.setTailCall(isTailCall)
.setConvergent(CB.isConvergent());
.setConvergent(CB.isConvergent())
.setIsPreallocated(
CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
if (Result.first.getNode()) {
@ -7642,9 +7685,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
// CFGuardTarget bundles are lowered in LowerCallTo.
assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
LLVMContext::OB_funclet,
LLVMContext::OB_cfguardtarget}) &&
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_funclet,
LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated}) &&
"Cannot lower calls with arbitrary operand bundles!");
SDValue Callee = getValue(I.getCalledOperand());
@ -8605,7 +8648,9 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
.setChain(getRoot())
.setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
.setDiscardResult(Call->use_empty())
.setIsPatchPoint(IsPatchPoint);
.setIsPatchPoint(IsPatchPoint)
.setIsPreallocated(
Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
}
/// Add a stack map intrinsic call's live variable operands to a stackmap
@ -9125,6 +9170,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setCFGuardTarget();
if (Args[i].IsByVal)
Flags.setByVal();
if (Args[i].IsPreallocated) {
Flags.setPreallocated();
// Set the byval flag for CCAssignFn callbacks that don't know about
// preallocated. This way we can know how many bytes we should've
// allocated and how many bytes a callee cleanup function will pop. If
// we port preallocated to more targets, we'll have to add custom
// preallocated handling in the various CC lowering callbacks.
Flags.setByVal();
}
if (Args[i].IsInAlloca) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
@ -9134,7 +9188,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// in the various CC lowering callbacks.
Flags.setByVal();
}
if (Args[i].IsByVal || Args[i].IsInAlloca) {
if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
@ -9448,7 +9502,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
// initializes the alloca. Don't elide copies from the same argument twice.
const Value *Val = SI->getValueOperand()->stripPointerCasts();
const auto *Arg = dyn_cast<Argument>(Val);
if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() ||
if (!Arg || Arg->hasPassPointeeByValueAttr() ||
Arg->getType()->isEmptyTy() ||
DL.getTypeStoreSize(Arg->getType()) !=
DL.getTypeAllocSize(AI->getAllocatedType()) ||
@ -9634,12 +9688,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// in the various CC lowering callbacks.
Flags.setByVal();
}
if (Arg.hasAttribute(Attribute::Preallocated)) {
Flags.setPreallocated();
// Set the byval flag for CCAssignFn callbacks that don't know about
// preallocated. This way we can know how many bytes we should've
// allocated and how many bytes a callee cleanup function will pop. If
// we port preallocated to more targets, we'll have to add custom
// preallocated handling in the various CC lowering callbacks.
Flags.setByVal();
}
if (F.getCallingConv() == CallingConv::X86_INTR) {
// IA Interrupt passes frame (1st parameter) by value in the stack.
if (ArgNo == 0)
Flags.setByVal();
}
if (Flags.isByVal() || Flags.isInAlloca()) {
if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
Type *ElementTy = Arg.getParamByValType();
// For ByVal, size and alignment should be passed from FE. BE will

View File

@ -393,6 +393,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::GC_TRANSITION_END: return "gc_transition.end";
case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
case ISD::FREEZE: return "freeze";
case ISD::PREALLOCATED_SETUP:
return "call_setup";
case ISD::PREALLOCATED_ARG:
return "call_alloc";
// Bit manipulation
case ISD::ABS: return "abs";

View File

@ -110,14 +110,18 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
Alignment = Call->getParamAlign(ArgIdx);
ByValType = nullptr;
if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
if (IsByVal)
ByValType = Call->getParamByValType(ArgIdx);
PreallocatedType = nullptr;
if (IsPreallocated)
PreallocatedType = Call->getParamPreallocatedType(ArgIdx);
}
/// Generate a libcall taking the given operands as arguments and returning a

View File

@ -1437,6 +1437,10 @@ Type *AttributeList::getParamByValType(unsigned Index) const {
return getAttributes(Index+FirstArgIndex).getByValType();
}
Type *AttributeList::getParamPreallocatedType(unsigned Index) const {
return getAttributes(Index + FirstArgIndex).getPreallocatedType();
}
MaybeAlign AttributeList::getStackAlignment(unsigned Index) const {
return getAttributes(Index).getStackAlignment();
}

View File

@ -114,6 +114,12 @@ bool Argument::hasInAllocaAttr() const {
return hasAttribute(Attribute::InAlloca);
}
bool Argument::hasPreallocatedAttr() const {
if (!getType()->isPointerTy())
return false;
return hasAttribute(Attribute::Preallocated);
}
bool Argument::hasPassPointeeByValueAttr() const {
if (!getType()->isPointerTy()) return false;
AttributeList Attrs = getParent()->getAttributes();

View File

@ -789,8 +789,9 @@ def CC_X86_32_Vector_Darwin : CallingConv<[
/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
/// values are spilled on the stack.
def CC_X86_32_Common : CallingConv<[
// Handles byval parameters.
// Handles byval/preallocated parameters.
CCIfByVal<CCPassByVal<4, 4>>,
CCIfPreallocated<CCPassByVal<4, 4>>,
// The first 3 float or double arguments, if marked 'inreg' and if the call
// is not a vararg call and if SSE2 is available, are passed in SSE registers.

View File

@ -3245,7 +3245,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
return false;
for (auto Flag : CLI.OutFlags)
if (Flag.isSwiftError())
if (Flag.isSwiftError() || Flag.isPreallocated())
return false;
SmallVector<MVT, 16> OutVTs;

View File

@ -57,7 +57,8 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo().hasVarSizedObjects() &&
!MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
!MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
!MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
}
/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
@ -67,6 +68,7 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
bool
X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) ||
MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
(hasFP(MF) && !TRI->needsStackRealignment(MF)) ||
TRI->hasBasePointer(MF);
}
@ -90,10 +92,10 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
TRI->needsStackRealignment(MF) ||
MFI.hasVarSizedObjects() ||
TRI->needsStackRealignment(MF) || MFI.hasVarSizedObjects() ||
MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
MFI.hasStackMap() || MFI.hasPatchPoint() ||
MFI.hasCopyImplyingStackAdjustment());

View File

@ -5552,6 +5552,39 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
CurDAG->RemoveDeadNode(Node);
return;
}
case ISD::PREALLOCATED_SETUP: {
auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
auto CallId = MFI->getPreallocatedIdForCallSite(
cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
SDValue Chain = Node->getOperand(0);
SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
MachineSDNode *New = CurDAG->getMachineNode(
TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain);
ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain
CurDAG->RemoveDeadNode(Node);
return;
}
case ISD::PREALLOCATED_ARG: {
auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
auto CallId = MFI->getPreallocatedIdForCallSite(
cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
SDValue Chain = Node->getOperand(0);
SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
SDValue ArgIndex = Node->getOperand(2);
SDValue Ops[3];
Ops[0] = CallIdValue;
Ops[1] = ArgIndex;
Ops[2] = Chain;
MachineSDNode *New = CurDAG->getMachineNode(
TargetOpcode::PREALLOCATED_ARG, dl,
CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()),
MVT::Other),
Ops);
ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer
ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain
CurDAG->RemoveDeadNode(Node);
return;
}
}
SelectCode(Node);

View File

@ -3945,6 +3945,21 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (ArgLocs.back().getLocMemOffset() != 0)
report_fatal_error("any parameter with the inalloca attribute must be "
"the only memory argument");
} else if (CLI.IsPreallocated) {
assert(ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register "
"parameter");
SmallVector<size_t, 4> PreallocatedOffsets;
for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
}
}
auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
NumBytesToPush = 0;
}
if (!IsSibcall && !IsMustTail)
@ -3972,9 +3987,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
++I, ++OutIndex) {
assert(OutIndex < Outs.size() && "Invalid Out index");
// Skip inalloca arguments, they have already been written.
// Skip inalloca/preallocated arguments, they have already been written.
ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
if (Flags.isInAlloca())
if (Flags.isInAlloca() || Flags.isPreallocated())
continue;
CCValAssign &VA = ArgLocs[I];
@ -4161,8 +4176,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(VA.isMemLoc());
SDValue Arg = OutVals[OutsIndex];
ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
// Skip inalloca arguments. They don't require any work.
if (Flags.isInAlloca())
// Skip inalloca/preallocated arguments. They don't require any work.
if (Flags.isInAlloca() || Flags.isPreallocated())
continue;
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
@ -33076,6 +33091,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BB->addLiveIn(BasePtr);
return BB;
}
case TargetOpcode::PREALLOCATED_SETUP: {
assert(Subtarget.is32Bit() && "preallocated only used in 32-bit");
auto MFI = MF->getInfo<X86MachineFunctionInfo>();
MFI->setHasPreallocatedCall(true);
int64_t PreallocatedId = MI.getOperand(0).getImm();
size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId);
assert(StackAdjustment != 0 && "0 stack adjustment");
LLVM_DEBUG(dbgs() << "PREALLOCATED_SETUP stack adjustment "
<< StackAdjustment << "\n");
BuildMI(*BB, MI, DL, TII->get(X86::SUB32ri), X86::ESP)
.addReg(X86::ESP)
.addImm(StackAdjustment);
MI.eraseFromParent();
return BB;
}
case TargetOpcode::PREALLOCATED_ARG: {
assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit");
int64_t PreallocatedId = MI.getOperand(1).getImm();
int64_t ArgIdx = MI.getOperand(2).getImm();
auto MFI = MF->getInfo<X86MachineFunctionInfo>();
size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx];
LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx
<< ", arg offset " << ArgOffset << "\n");
// stack pointer + offset
addRegOffset(
BuildMI(*BB, MI, DL, TII->get(X86::LEA32r), MI.getOperand(0).getReg()),
X86::ESP, false, ArgOffset);
MI.eraseFromParent();
return BB;
}
}
}

View File

@ -13,6 +13,8 @@
#ifndef LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
@ -103,6 +105,13 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// True if this function has WIN_ALLOCA instructions.
bool HasWinAlloca = false;
/// True if this function has any preallocated calls.
bool HasPreallocatedCall = false;
ValueMap<const Value *, size_t> PreallocatedIds;
SmallVector<size_t, 0> PreallocatedStackSizes;
SmallVector<SmallVector<size_t, 4>, 0> PreallocatedArgOffsets;
private:
/// ForwardedMustTailRegParms - A list of virtual and physical registers
/// that must be forwarded to every musttail call.
@ -184,6 +193,36 @@ public:
bool hasWinAlloca() const { return HasWinAlloca; }
void setHasWinAlloca(bool v) { HasWinAlloca = v; }
bool hasPreallocatedCall() const { return HasPreallocatedCall; }
void setHasPreallocatedCall(bool v) { HasPreallocatedCall = v; }
size_t getPreallocatedIdForCallSite(const Value *CS) {
auto Insert = PreallocatedIds.insert({CS, PreallocatedIds.size()});
if (Insert.second) {
PreallocatedStackSizes.push_back(0);
PreallocatedArgOffsets.emplace_back();
}
return Insert.first->second;
}
void setPreallocatedStackSize(size_t Id, size_t StackSize) {
PreallocatedStackSizes[Id] = StackSize;
}
size_t getPreallocatedStackSize(const size_t Id) {
assert(PreallocatedStackSizes[Id] != 0 && "stack size not set");
return PreallocatedStackSizes[Id];
}
void setPreallocatedArgOffsets(size_t Id, ArrayRef<size_t> AO) {
PreallocatedArgOffsets[Id].assign(AO.begin(), AO.end());
}
const ArrayRef<size_t> getPreallocatedArgOffsets(const size_t Id) {
assert(!PreallocatedArgOffsets[Id].empty() && "arg offsets not set");
return PreallocatedArgOffsets[Id];
}
};
} // End llvm namespace

View File

@ -627,6 +627,10 @@ static bool CantUseSP(const MachineFrameInfo &MFI) {
}
bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
if (X86FI->hasPreallocatedCall())
return true;
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (!EnableBasePointer)

View File

@ -1016,8 +1016,8 @@ static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
// CI should not has any ABI-impacting function attributes.
static const Attribute::AttrKind ABIAttrs[] = {
Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
Attribute::InReg, Attribute::Returned, Attribute::SwiftSelf,
Attribute::SwiftError};
Attribute::Preallocated, Attribute::InReg, Attribute::Returned,
Attribute::SwiftSelf, Attribute::SwiftError};
AttributeList Attrs = CI.getAttributes();
for (auto AK : ABIAttrs)
if (Attrs.hasParamAttribute(0, AK))

View File

@ -1363,7 +1363,8 @@ bool Attributor::isValidFunctionSignatureRewrite(
AttributeList FnAttributeList = Fn->getAttributes();
if (FnAttributeList.hasAttrSomewhere(Attribute::Nest) ||
FnAttributeList.hasAttrSomewhere(Attribute::StructRet) ||
FnAttributeList.hasAttrSomewhere(Attribute::InAlloca)) {
FnAttributeList.hasAttrSomewhere(Attribute::InAlloca) ||
FnAttributeList.hasAttrSomewhere(Attribute::Preallocated)) {
LLVM_DEBUG(
dbgs() << "[Attributor] Cannot rewrite due to complex attribute\n");
return false;

View File

@ -4455,7 +4455,8 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
AAValueSimplifyImpl::initialize(A);
if (!getAnchorScope() || getAnchorScope()->isDeclaration())
indicatePessimisticFixpoint();
if (hasAttr({Attribute::InAlloca, Attribute::StructRet, Attribute::Nest},
if (hasAttr({Attribute::InAlloca, Attribute::Preallocated,
Attribute::StructRet, Attribute::Nest},
/* IgnoreSubsumingPositions */ true))
indicatePessimisticFixpoint();
@ -5695,7 +5696,7 @@ struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {
// TODO: From readattrs.ll: "inalloca parameters are always
// considered written"
if (hasAttr({Attribute::InAlloca})) {
if (hasAttr({Attribute::InAlloca, Attribute::Preallocated})) {
removeKnownBits(NO_WRITES);
removeAssumedBits(NO_WRITES);
}

View File

@ -483,9 +483,10 @@ DeadArgumentEliminationPass::SurveyUses(const Value *V,
// We consider arguments of non-internal functions to be intrinsically alive as
// well as arguments to functions which have their "address taken".
void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
// Functions with inalloca parameters are expecting args in a particular
// register and memory layout.
if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca)) {
// Functions with inalloca/preallocated parameters are expecting args in a
// particular register and memory layout.
if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
MarkLive(F);
return;
}

View File

@ -447,7 +447,7 @@ determinePointerReadAttrs(Argument *A,
SmallPtrSet<Use *, 32> Visited;
// inalloca arguments are always clobbered by the call.
if (A->hasInAllocaAttr())
if (A->hasInAllocaAttr() || A->hasPreallocatedAttr())
return Attribute::None;
bool IsRead = false;

View File

@ -2333,6 +2333,7 @@ OptimizeFunctions(Module &M,
// wouldn't be safe in the presence of inalloca.
// FIXME: We should also hoist alloca affected by this to the entry
// block if possible.
// FIXME: handle preallocated
if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
!F->hasAddressTaken()) {
RemoveAttribute(F, Attribute::InAlloca);

View File

@ -4737,6 +4737,7 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {
//
// Similarly, avoid folding away bitcasts of byval calls.
if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated) ||
Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
return false;

View File

@ -246,6 +246,20 @@ entry:
; CHECK: calll _addrof_i32
; CHECK: retl
define void @avoid_preallocated(i32* preallocated(i32) %x) {
entry:
%x.p.p = alloca i32*
store i32* %x, i32** %x.p.p
call void @addrof_i32(i32* %x)
ret void
}
; CHECK-LABEL: _avoid_preallocated:
; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]]
; CHECK: pushl %[[reg]]
; CHECK: calll _addrof_i32
; CHECK: retl
; Don't elide the copy when the alloca is escaped with a store.
define void @escape_with_store(i32 %x) {
%x1 = alloca i32

View File

@ -22,6 +22,8 @@
; Each member pointer creates a thunk. The ones with inalloca are required to
; tail calls by the ABI, even at O0.
; TODO: add tests for preallocated/musttail once supported
%struct.B = type { i32 (...)** }
%struct.A = type { i32 }

View File

@ -1,6 +1,8 @@
; RUN: llc -verify-machineinstrs -mtriple=i686-- < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=i686-- -O0 < %s | FileCheck %s
; TODO: add tests for preallocated/musttail once supported
; CHECK-LABEL: t1:
; CHECK: jmp {{_?}}t1_callee
define x86_thiscallcc void @t1(i8* %this) {

View File

@ -0,0 +1,23 @@
; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
; REQUIRES: asserts
; XFAIL: *
declare token @llvm.call.preallocated.setup(i32)
declare i8* @llvm.call.preallocated.arg(token, i32)
%Foo = type { i32, i32 }
declare void @init(%Foo*)
declare void @foo_p(%Foo* preallocated(%Foo))
define void @no_call() {
; CHECK-LABEL: _no_call:
%t = call token @llvm.call.preallocated.setup(i32 1)
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
call void @init(%Foo* %b)
ret void
}

View File

@ -0,0 +1,18 @@
; RUN: llc %s -mtriple=x86_64-windows-msvc -o /dev/null 2>&1
; REQUIRES: asserts
; XFAIL: *
declare token @llvm.call.preallocated.setup(i32)
declare i8* @llvm.call.preallocated.arg(token, i32)
%Foo = type { i32, i32 }
declare x86_thiscallcc void @f(i32, %Foo* preallocated(%Foo))
define void @g() {
%t = call token @llvm.call.preallocated.setup(i32 1)
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
call void @f(i32 0, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
ret void
}

View File

@ -0,0 +1,187 @@
; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
declare token @llvm.call.preallocated.setup(i32)
declare i8* @llvm.call.preallocated.arg(token, i32)
%Foo = type { i32, i32 }
declare void @init(%Foo*)
declare void @foo_p(%Foo* preallocated(%Foo))
define void @one_preallocated() {
; CHECK-LABEL: _one_preallocated:
%t = call token @llvm.call.preallocated.setup(i32 1)
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
; CHECK: subl $8, %esp
; CHECK: calll _foo_p
call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
ret void
}
define void @one_preallocated_two_blocks() {
; CHECK-LABEL: _one_preallocated_two_blocks:
%t = call token @llvm.call.preallocated.setup(i32 1)
br label %second
second:
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
; CHECK: subl $8, %esp
; CHECK: calll _foo_p
call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
ret void
}
define void @preallocated_with_store() {
; CHECK-LABEL: _preallocated_with_store:
; CHECK: subl $8, %esp
%t = call token @llvm.call.preallocated.setup(i32 1)
; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
%p0 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
%p1 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
store i32 13, i32* %p0
store i32 42, i32* %p1
; CHECK-DAG: movl $13, ([[REGISTER]])
; CHECK-DAG: movl $42, 4([[REGISTER]])
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
; CHECK-NOT: pushl
; CHECK: calll _foo_p
call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
ret void
}
define void @preallocated_with_init() {
; CHECK-LABEL: _preallocated_with_init:
; CHECK: subl $8, %esp
%t = call token @llvm.call.preallocated.setup(i32 1)
; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
; CHECK: pushl [[REGISTER]]
; CHECK: calll _init
call void @init(%Foo* %b)
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
; CHECK-NOT: pushl
; CHECK: calll _foo_p
call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
ret void
}
declare void @foo_p_p(%Foo* preallocated(%Foo), %Foo* preallocated(%Foo))
define void @two_preallocated() {
; CHECK-LABEL: _two_preallocated:
%t = call token @llvm.call.preallocated.setup(i32 2)
%a1 = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b1 = bitcast i8* %a1 to %Foo*
%a2 = call i8* @llvm.call.preallocated.arg(token %t, i32 1) preallocated(%Foo)
%b2 = bitcast i8* %a2 to %Foo*
; CHECK: subl $16, %esp
; CHECK: calll _foo_p_p
call void @foo_p_p(%Foo* preallocated(%Foo) %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t)]
ret void
}
declare void @foo_p_int(%Foo* preallocated(%Foo), i32)
define void @one_preallocated_one_normal() {
; CHECK-LABEL: _one_preallocated_one_normal:
; CHECK: subl $12, %esp
%t = call token @llvm.call.preallocated.setup(i32 1)
; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
; CHECK: pushl [[REGISTER]]
; CHECK: calll _init
call void @init(%Foo* %b)
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
; CHECK-NOT: pushl
; CHECK: movl $2, 8(%esp)
; CHECK: calll _foo_p_int
call void @foo_p_int(%Foo* preallocated(%Foo) %b, i32 2) ["preallocated"(token %t)]
ret void
}
declare void @foo_ret_p(%Foo* sret, %Foo* preallocated(%Foo))
define void @nested_with_init() {
; CHECK-LABEL: _nested_with_init:
%tmp = alloca %Foo
%t1 = call token @llvm.call.preallocated.setup(i32 1)
; CHECK: subl $12, %esp
%a1 = call i8* @llvm.call.preallocated.arg(token %t1, i32 0) preallocated(%Foo)
%b1 = bitcast i8* %a1 to %Foo*
; CHECK: leal 4(%esp), [[REGISTER1:%[a-z]+]]
%t2 = call token @llvm.call.preallocated.setup(i32 1)
; CHECK: subl $12, %esp
%a2 = call i8* @llvm.call.preallocated.arg(token %t2, i32 0) preallocated(%Foo)
; CHECK: leal 4(%esp), [[REGISTER2:%[a-z]+]]
%b2 = bitcast i8* %a2 to %Foo*
call void @init(%Foo* %b2)
; CHECK: pushl [[REGISTER2]]
; CHECK: calll _init
call void @foo_ret_p(%Foo* %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t2)]
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
; CHECK-NOT: pushl
; CHECK: calll _foo_ret_p
call void @foo_ret_p(%Foo* %tmp, %Foo* preallocated(%Foo) %b1) ["preallocated"(token %t1)]
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
; CHECK-NOT: pushl
; CHECK: calll _foo_ret_p
ret void
}
declare void @foo_inreg_p(i32 inreg, %Foo* preallocated(%Foo))
define void @inreg() {
; CHECK-LABEL: _inreg:
%t = call token @llvm.call.preallocated.setup(i32 1)
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
; CHECK: subl $8, %esp
; CHECK: movl $9, %eax
; CHECK: calll _foo_inreg_p
call void @foo_inreg_p(i32 9, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
ret void
}
declare x86_thiscallcc void @foo_thiscall_p(i8*, %Foo* preallocated(%Foo))
define void @thiscall() {
; CHECK-LABEL: _thiscall:
%t = call token @llvm.call.preallocated.setup(i32 1)
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
; CHECK: subl $8, %esp
; CHECK: xorl %ecx, %ecx
; CHECK: calll _foo_thiscall_p
call x86_thiscallcc void @foo_thiscall_p(i8* null, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
ret void
}
declare x86_stdcallcc void @foo_stdcall_p(%Foo* preallocated(%Foo))
declare x86_stdcallcc void @i(i32)
define void @stdcall() {
; CHECK-LABEL: _stdcall:
%t = call token @llvm.call.preallocated.setup(i32 1)
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
; CHECK: subl $8, %esp
; CHECK: calll _foo_stdcall_p@8
call x86_stdcallcc void @foo_stdcall_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
; CHECK-NOT: %esp
; CHECK: pushl
; CHECK: calll _i@4
call x86_stdcallcc void @i(i32 0)
ret void
}

View File

@ -1,5 +1,8 @@
; RUN: llc < %s -enable-shrink-wrap=true | FileCheck %s
; TODO: add preallocated versions of tests
; we don't yet support conditionally called preallocated calls after the setup
; chkstk cannot come before the usual prologue, since it adjusts ESP.
; If chkstk is used in the prologue, we also have to be careful about preserving
; EAX if it is used.

View File

@ -9,6 +9,21 @@ target triple = "i386-pc-windows-msvc19.0.24215"
declare x86_stdcallcc void @tail_std(i32)
declare void @capture(i32*)
define x86_thiscallcc void @preallocated(i32* %this, i32* preallocated(i32) %args) {
entry:
%val = load i32, i32* %args
store i32 0, i32* %args
tail call x86_stdcallcc void @tail_std(i32 %val)
ret void
}
; CHECK-LABEL: _preallocated: # @preallocated
; CHECK: movl 4(%esp), %[[reg:[^ ]*]]
; CHECK: movl $0, 4(%esp)
; CHECK: pushl %[[reg]]
; CHECK: calll _tail_std@4
; CHECK: retl $4
define x86_thiscallcc void @inalloca(i32* %this, i32* inalloca %args) {
entry:
%val = load i32, i32* %args

View File

@ -6,6 +6,8 @@
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
declare void @f(i32)
declare token @llvm.call.preallocated.setup(i32)
declare i8* @llvm.call.preallocated.arg(token, i32)
; Test1: Replace argument with constant
define internal void @test1(i32 %a) {
@ -280,6 +282,24 @@ define i32* @complicated_args_inalloca() {
ret i32* %call
}
define internal i32* @test_preallocated(i32* preallocated(i32) %a) {
; CHECK-LABEL: define {{[^@]+}}@test_preallocated
; CHECK-SAME: (i32* noalias nofree returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]])
; CHECK-NEXT: ret i32* [[A]]
;
ret i32* %a
}
define i32* @complicated_args_preallocated() {
; CHECK-LABEL: define {{[^@]+}}@complicated_args_preallocated()
; CHECK-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 1)
; CHECK-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree writeonly preallocated(i32) align 536870912 null)
; CHECK-NEXT: ret i32* [[CALL]]
;
%c = call token @llvm.call.preallocated.setup(i32 1)
%call = call i32* @test_preallocated(i32* preallocated(i32) null) ["preallocated"(token %c)]
ret i32* %call
}
define internal void @test_sret(%struct.X* sret %a, %struct.X** %b) {
;
; CHECK-LABEL: define {{[^@]+}}@test_sret

View File

@ -1,5 +1,8 @@
; RUN: opt < %s -deadargelim -S | FileCheck %s
declare token @llvm.call.preallocated.setup(i32)
declare i8* @llvm.call.preallocated.arg(token, i32)
%Ty = type <{ i32, i32 }>
; Check if the pass doesn't modify anything that doesn't need changing. We feed
@ -44,4 +47,22 @@ define i32 @caller2() {
ret i32 %v
}
; We can't remove 'this' here, as that would put argmem in ecx instead of
; memory.
define internal x86_thiscallcc i32 @unused_this_preallocated(i32* %this, i32* preallocated(i32) %argmem) {
%v = load i32, i32* %argmem
ret i32 %v
}
; CHECK-LABEL: define internal x86_thiscallcc i32 @unused_this_preallocated(i32* %this, i32* preallocated(i32) %argmem)
define i32 @caller3() {
%t = alloca i32
%c = call token @llvm.call.preallocated.setup(i32 1)
%M = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32)
%m = bitcast i8* %M to i32*
store i32 42, i32* %m
%v = call x86_thiscallcc i32 @unused_this_preallocated(i32* %t, i32* preallocated(i32) %m) ["preallocated"(token %c)]
ret i32 %v
}
; CHECK: attributes #0 = { nounwind }

View File

@ -58,6 +58,16 @@ define void @test9_2(%struct.x* inalloca %a) nounwind {
ret void
}
; Test for preallocated handling.
define void @test9_3(%struct.x* preallocated(%struct.x) %a) nounwind {
; CHECK-LABEL: @test9_3(
; CHECK-NEXT: ret void
;
%tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
store i32 1, i32* %tmp2, align 4
ret void
}
; DSE should delete the dead trampoline.
declare void @test11f()
define void @test11() {

View File

@ -169,6 +169,16 @@ define void @test9_2(%struct.x* inalloca %a) nounwind {
ret void
}
; Test for preallocated handling.
define void @test9_3(%struct.x* preallocated(%struct.x) %a) nounwind {
; CHECK-LABEL: @test9_3(
; CHECK-NEXT: ret void
;
%tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
store i32 1, i32* %tmp2, align 4
ret void
}
; va_arg has fuzzy dependence, the store shouldn't be zapped.
define double @test10(i8* %X) {
; CHECK-LABEL: @test10(

View File

@ -56,6 +56,12 @@ define void @test7_1(i32* inalloca %a) {
ret void
}
; CHECK: define void @test7_2(i32* nocapture preallocated(i32) %a)
; preallocated parameters are always considered written
define void @test7_2(i32* preallocated(i32) %a) {
ret void
}
; CHECK: define i32* @test8_1(i32* readnone returned %p)
define i32* @test8_1(i32* %p) {
entry:

View File

@ -1,5 +1,8 @@
; RUN: opt < %s -globalopt -S | FileCheck %s
declare token @llvm.call.preallocated.setup(i32)
declare i8* @llvm.call.preallocated.arg(token, i32)
define internal i32 @f(i32* %m) {
; CHECK-LABEL: define internal fastcc i32 @f
%v = load i32, i32* %m
@ -32,6 +35,13 @@ define internal i32 @inalloca(i32* inalloca %p) {
ret i32 %rv
}
define internal i32 @preallocated(i32* preallocated(i32) %p) {
; TODO: handle preallocated:
; CHECK-NOT-LABEL: define internal fastcc i32 @preallocated(i32* %p)
%rv = load i32, i32* %p
ret i32 %rv
}
define void @call_things() {
%m = alloca i32
call i32 @f(i32* %m)
@ -40,6 +50,11 @@ define void @call_things() {
call i32 @j(i32* %m)
%args = alloca inalloca i32
call i32 @inalloca(i32* inalloca %args)
; TODO: handle preallocated
;%c = call token @llvm.call.preallocated.setup(i32 1)
;%N = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32)
;%n = bitcast i8* %N to i32*
;call i32 @preallocated(i32* preallocated(i32) %n) ["preallocated"(token %c)]
ret void
}

View File

@ -0,0 +1,28 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-p:32:32"
target triple = "i686-pc-win32"
declare token @llvm.call.preallocated.setup(i32)
declare i8* @llvm.call.preallocated.arg(token, i32)
declare void @takes_i32(i32)
declare void @takes_i32_preallocated(i32* preallocated(i32))
define void @f() {
; CHECK-LABEL: define void @f()
%t = call token @llvm.call.preallocated.setup(i32 1)
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(i32)
%arg = bitcast i8* %a to i32*
call void bitcast (void (i32)* @takes_i32 to void (i32*)*)(i32* preallocated(i32) %arg) ["preallocated"(token %t)]
; CHECK: call void bitcast{{.*}}@takes_i32
ret void
}
define void @g() {
; CHECK-LABEL: define void @g()
call void bitcast (void (i32*)* @takes_i32_preallocated to void (i32)*)(i32 0)
; CHECK: call void bitcast{{.*}}@takes_i32_preallocated
ret void
}