forked from OSchip/llvm-project
Reland [X86] Codegen for preallocated
See https://reviews.llvm.org/D74651 for the preallocated IR constructs and LangRef changes. In X86TargetLowering::LowerCall(), if a call is preallocated, record each argument's offset from the stack pointer and the total stack adjustment. Associate the call Value with an integer index. Store the info in X86MachineFunctionInfo with the integer index as the key. This adds two new target independent ISDOpcodes and two new target dependent Opcodes corresponding to @llvm.call.preallocated.{setup,arg}. The setup ISelDAG node takes in a chain and outputs a chain and a SrcValue of the preallocated call Value. It is lowered to a target dependent node with the SrcValue replaced with the integer index key by looking in X86MachineFunctionInfo. In X86TargetLowering::EmitInstrWithCustomInserter() this is lowered to an %esp adjustment, the exact amount determined by looking in X86MachineFunctionInfo with the integer index key. The arg ISelDAG node takes in a chain, a SrcValue of the preallocated call Value, and the arg index int constant. It produces a chain and the pointer fo the arg. It is lowered to a target dependent node with the SrcValue replaced with the integer index key by looking in X86MachineFunctionInfo. In X86TargetLowering::EmitInstrWithCustomInserter() this is lowered to a lea of the stack pointer plus an offset determined by looking in X86MachineFunctionInfo with the integer index key. Force any function containing a preallocated call to use the frame pointer. Does not yet handle a setup without a call, or a conditional call. Does not yet handle musttail. That requires a LangRef change first. Tried to look at all references to inalloca and see if they apply to preallocated. I've made preallocated versions of tests testing inalloca whenever possible and when they make sense (e.g. not alloca related, inalloca edge cases). Aside from the tests added here, I checked that this codegen produces correct code for something like ``` struct A { A(); A(A&&); ~A(); }; void bar() { foo(foo(foo(foo(foo(A(), 4), 5), 6), 7), 8); } ``` by replacing the inalloca version of the .ll file with the appropriate preallocated code. Running the executable produces the same results as using the current inalloca implementation. Reverted due to unexpectedly passing tests, added REQUIRES: asserts for reland. Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D77689
This commit is contained in:
parent
bfb2783726
commit
8a88755610
|
@ -906,6 +906,13 @@ enum NodeType {
|
|||
VAEND,
|
||||
VASTART,
|
||||
|
||||
// PREALLOCATED_SETUP - This has 2 operands: an input chain and a SRCVALUE
|
||||
// with the preallocated call Value.
|
||||
PREALLOCATED_SETUP,
|
||||
// PREALLOCATED_ARG - This has 3 operands: an input chain, a SRCVALUE
|
||||
// with the preallocated call Value, and a constant int.
|
||||
PREALLOCATED_ARG,
|
||||
|
||||
/// SRCVALUE - This is a node type that holds a Value* that is used to
|
||||
/// make reference to a value in the LLVM IR.
|
||||
SRCVALUE,
|
||||
|
|
|
@ -35,6 +35,7 @@ namespace ISD {
|
|||
unsigned IsReturned : 1; ///< Always returned
|
||||
unsigned IsSplit : 1;
|
||||
unsigned IsInAlloca : 1; ///< Passed with inalloca
|
||||
unsigned IsPreallocated : 1; ///< ByVal without the copy
|
||||
unsigned IsSplitEnd : 1; ///< Last part of a split
|
||||
unsigned IsSwiftSelf : 1; ///< Swift self parameter
|
||||
unsigned IsSwiftError : 1; ///< Swift error parameter
|
||||
|
@ -56,9 +57,9 @@ namespace ISD {
|
|||
public:
|
||||
ArgFlagsTy()
|
||||
: IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsNest(0),
|
||||
IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0),
|
||||
IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0), IsHva(0),
|
||||
IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
|
||||
IsReturned(0), IsSplit(0), IsInAlloca(0), IsPreallocated(0),
|
||||
IsSplitEnd(0), IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0),
|
||||
IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
|
||||
IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
|
||||
IsCopyElisionCandidate(0), IsPointer(0), ByValSize(0),
|
||||
PointerAddrSpace(0) {
|
||||
|
@ -83,6 +84,9 @@ namespace ISD {
|
|||
bool isInAlloca() const { return IsInAlloca; }
|
||||
void setInAlloca() { IsInAlloca = 1; }
|
||||
|
||||
bool isPreallocated() const { return IsPreallocated; }
|
||||
void setPreallocated() { IsPreallocated = 1; }
|
||||
|
||||
bool isSwiftSelf() const { return IsSwiftSelf; }
|
||||
void setSwiftSelf() { IsSwiftSelf = 1; }
|
||||
|
||||
|
|
|
@ -273,17 +273,20 @@ public:
|
|||
bool IsNest : 1;
|
||||
bool IsByVal : 1;
|
||||
bool IsInAlloca : 1;
|
||||
bool IsPreallocated : 1;
|
||||
bool IsReturned : 1;
|
||||
bool IsSwiftSelf : 1;
|
||||
bool IsSwiftError : 1;
|
||||
bool IsCFGuardTarget : 1;
|
||||
MaybeAlign Alignment = None;
|
||||
Type *ByValType = nullptr;
|
||||
Type *PreallocatedType = nullptr;
|
||||
|
||||
ArgListEntry()
|
||||
: IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
|
||||
IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
|
||||
IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false) {}
|
||||
IsNest(false), IsByVal(false), IsInAlloca(false),
|
||||
IsPreallocated(false), IsReturned(false), IsSwiftSelf(false),
|
||||
IsSwiftError(false), IsCFGuardTarget(false) {}
|
||||
|
||||
void setAttributes(const CallBase *Call, unsigned ArgIdx);
|
||||
};
|
||||
|
@ -3608,6 +3611,7 @@ public:
|
|||
bool IsReturnValueUsed : 1;
|
||||
bool IsConvergent : 1;
|
||||
bool IsPatchPoint : 1;
|
||||
bool IsPreallocated : 1;
|
||||
|
||||
// IsTailCall should be modified by implementations of
|
||||
// TargetLowering::LowerCall that perform tail call conversions.
|
||||
|
@ -3631,7 +3635,7 @@ public:
|
|||
CallLoweringInfo(SelectionDAG &DAG)
|
||||
: RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
|
||||
DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false),
|
||||
IsPatchPoint(false), DAG(DAG) {}
|
||||
IsPatchPoint(false), IsPreallocated(false), DAG(DAG) {}
|
||||
|
||||
CallLoweringInfo &setDebugLoc(const SDLoc &dl) {
|
||||
DL = dl;
|
||||
|
@ -3737,6 +3741,11 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
CallLoweringInfo &setIsPreallocated(bool Value = true) {
|
||||
IsPreallocated = Value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) {
|
||||
IsPostTypeLegalization = Value;
|
||||
return *this;
|
||||
|
|
|
@ -110,6 +110,9 @@ public:
|
|||
/// Return true if this argument has the inalloca attribute.
|
||||
bool hasInAllocaAttr() const;
|
||||
|
||||
/// Return true if this argument has the preallocated attribute.
|
||||
bool hasPreallocatedAttr() const;
|
||||
|
||||
/// Return true if this argument has the zext attribute.
|
||||
bool hasZExtAttr() const;
|
||||
|
||||
|
|
|
@ -623,6 +623,9 @@ public:
|
|||
/// Return the byval type for the specified function parameter.
|
||||
Type *getParamByValType(unsigned ArgNo) const;
|
||||
|
||||
/// Return the preallocated type for the specified function parameter.
|
||||
Type *getParamPreallocatedType(unsigned ArgNo) const;
|
||||
|
||||
/// Get the stack alignment.
|
||||
MaybeAlign getStackAlignment(unsigned Index) const;
|
||||
|
||||
|
|
|
@ -1604,6 +1604,12 @@ public:
|
|||
return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType();
|
||||
}
|
||||
|
||||
/// Extract the preallocated type for a call or parameter.
|
||||
Type *getParamPreallocatedType(unsigned ArgNo) const {
|
||||
Type *Ty = Attrs.getParamPreallocatedType(ArgNo);
|
||||
return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType();
|
||||
}
|
||||
|
||||
/// Extract the number of dereferenceable bytes for a call or
|
||||
/// parameter (0=unknown).
|
||||
uint64_t getDereferenceableBytes(unsigned i) const {
|
||||
|
|
|
@ -127,6 +127,12 @@ HANDLE_TARGET_OPCODE(PATCHPOINT)
|
|||
/// additionally expand this pseudo after register allocation.
|
||||
HANDLE_TARGET_OPCODE(LOAD_STACK_GUARD)
|
||||
|
||||
/// These are used to support call sites that must have the stack adjusted
|
||||
/// before the call (e.g. to initialize an argument passed by value).
|
||||
/// See llvm.call.preallocated.{setup,arg} in the LangRef for more details.
|
||||
HANDLE_TARGET_OPCODE(PREALLOCATED_SETUP)
|
||||
HANDLE_TARGET_OPCODE(PREALLOCATED_ARG)
|
||||
|
||||
/// Call instruction with associated vm state for deoptimization and list
|
||||
/// of live pointers for relocation by the garbage collector. It is
|
||||
/// intended to support garbage collection with fully precise relocating
|
||||
|
|
|
@ -1173,6 +1173,18 @@ def LOAD_STACK_GUARD : StandardPseudoInstruction {
|
|||
let hasSideEffects = 0;
|
||||
bit isPseudo = 1;
|
||||
}
|
||||
def PREALLOCATED_SETUP : StandardPseudoInstruction {
|
||||
let OutOperandList = (outs);
|
||||
let InOperandList = (ins i32imm:$a);
|
||||
let usesCustomInserter = 1;
|
||||
let hasSideEffects = 1;
|
||||
}
|
||||
def PREALLOCATED_ARG : StandardPseudoInstruction {
|
||||
let OutOperandList = (outs ptr_rc:$loc);
|
||||
let InOperandList = (ins i32imm:$a, i32imm:$b);
|
||||
let usesCustomInserter = 1;
|
||||
let hasSideEffects = 1;
|
||||
}
|
||||
def LOCAL_ESCAPE : StandardPseudoInstruction {
|
||||
// This instruction is really just a label. It has to be part of the chain so
|
||||
// that it doesn't get dropped from the DAG, but it produces nothing and has
|
||||
|
|
|
@ -41,6 +41,11 @@ class CCIf<string predicate, CCAction A> : CCPredicateAction<A> {
|
|||
class CCIfByVal<CCAction A> : CCIf<"ArgFlags.isByVal()", A> {
|
||||
}
|
||||
|
||||
/// CCIfPreallocated - If the current argument has Preallocated parameter attribute,
|
||||
/// apply Action A.
|
||||
class CCIfPreallocated<CCAction A> : CCIf<"ArgFlags.isPreallocated()", A> {
|
||||
}
|
||||
|
||||
/// CCIfSwiftSelf - If the current argument has swiftself parameter attribute,
|
||||
/// apply Action A.
|
||||
class CCIfSwiftSelf<CCAction A> : CCIf<"ArgFlags.isSwiftSelf()", A> {
|
||||
|
|
|
@ -96,10 +96,12 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
|
|||
Flags.setSwiftError();
|
||||
if (Attrs.hasAttribute(OpIdx, Attribute::ByVal))
|
||||
Flags.setByVal();
|
||||
if (Attrs.hasAttribute(OpIdx, Attribute::Preallocated))
|
||||
Flags.setPreallocated();
|
||||
if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca))
|
||||
Flags.setInAlloca();
|
||||
|
||||
if (Flags.isByVal() || Flags.isInAlloca()) {
|
||||
if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
|
||||
Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
|
||||
|
||||
auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
|
||||
|
|
|
@ -1214,7 +1214,16 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
|
|||
// the various CC lowering callbacks.
|
||||
Flags.setByVal();
|
||||
}
|
||||
if (Arg.IsByVal || Arg.IsInAlloca) {
|
||||
if (Arg.IsPreallocated) {
|
||||
Flags.setPreallocated();
|
||||
// Set the byval flag for CCAssignFn callbacks that don't know about
|
||||
// preallocated. This way we can know how many bytes we should've
|
||||
// allocated and how many bytes a callee cleanup function will pop. If we
|
||||
// port preallocated to more targets, we'll have to add custom
|
||||
// preallocated handling in the various CC lowering callbacks.
|
||||
Flags.setByVal();
|
||||
}
|
||||
if (Arg.IsByVal || Arg.IsInAlloca || Arg.IsPreallocated) {
|
||||
PointerType *Ty = cast<PointerType>(Arg.Ty);
|
||||
Type *ElementTy = Ty->getElementType();
|
||||
unsigned FrameSize =
|
||||
|
|
|
@ -1873,9 +1873,6 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
|
|||
}
|
||||
|
||||
SDValue SelectionDAG::getSrcValue(const Value *V) {
|
||||
assert((!V || V->getType()->isPointerTy()) &&
|
||||
"SrcValue is not a pointer?");
|
||||
|
||||
FoldingSetNodeID ID;
|
||||
AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None);
|
||||
ID.AddPointer(V);
|
||||
|
|
|
@ -5606,6 +5606,23 @@ void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
|
|||
LowerCallTo(I, Callee, I.isTailCall());
|
||||
}
|
||||
|
||||
/// Given a @llvm.call.preallocated.setup, return the corresponding
|
||||
/// preallocated call.
|
||||
static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
|
||||
assert(cast<CallBase>(PreallocatedSetup)
|
||||
->getCalledFunction()
|
||||
->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
|
||||
"expected call_preallocated_setup Value");
|
||||
for (auto *U : PreallocatedSetup->users()) {
|
||||
auto *UseCall = cast<CallBase>(U);
|
||||
const Function *Fn = UseCall->getCalledFunction();
|
||||
if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
|
||||
return UseCall;
|
||||
}
|
||||
}
|
||||
llvm_unreachable("expected corresponding call to preallocated setup/arg");
|
||||
}
|
||||
|
||||
/// Lower the call to the specified intrinsic function.
|
||||
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
||||
unsigned Intrinsic) {
|
||||
|
@ -5798,6 +5815,30 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
|||
updateDAGForMaybeTailCall(MC);
|
||||
return;
|
||||
}
|
||||
case Intrinsic::call_preallocated_setup: {
|
||||
const CallBase *PreallocatedCall = FindPreallocatedCall(&I);
|
||||
SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
|
||||
SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other,
|
||||
getRoot(), SrcValue);
|
||||
setValue(&I, Res);
|
||||
DAG.setRoot(Res);
|
||||
return;
|
||||
}
|
||||
case Intrinsic::call_preallocated_arg: {
|
||||
const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0));
|
||||
SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
|
||||
SDValue Ops[3];
|
||||
Ops[0] = getRoot();
|
||||
Ops[1] = SrcValue;
|
||||
Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
|
||||
MVT::i32); // arg index
|
||||
SDValue Res = DAG.getNode(
|
||||
ISD::PREALLOCATED_ARG, sdl,
|
||||
DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops);
|
||||
setValue(&I, Res);
|
||||
DAG.setRoot(Res.getValue(1));
|
||||
return;
|
||||
}
|
||||
case Intrinsic::dbg_addr:
|
||||
case Intrinsic::dbg_declare: {
|
||||
const auto &DI = cast<DbgVariableIntrinsic>(I);
|
||||
|
@ -7116,7 +7157,9 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
|
|||
.setChain(getRoot())
|
||||
.setCallee(RetTy, FTy, Callee, std::move(Args), CB)
|
||||
.setTailCall(isTailCall)
|
||||
.setConvergent(CB.isConvergent());
|
||||
.setConvergent(CB.isConvergent())
|
||||
.setIsPreallocated(
|
||||
CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
|
||||
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
|
||||
|
||||
if (Result.first.getNode()) {
|
||||
|
@ -7642,9 +7685,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
|
|||
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
|
||||
// have to do anything here to lower funclet bundles.
|
||||
// CFGuardTarget bundles are lowered in LowerCallTo.
|
||||
assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
|
||||
LLVMContext::OB_funclet,
|
||||
LLVMContext::OB_cfguardtarget}) &&
|
||||
assert(!I.hasOperandBundlesOtherThan(
|
||||
{LLVMContext::OB_deopt, LLVMContext::OB_funclet,
|
||||
LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated}) &&
|
||||
"Cannot lower calls with arbitrary operand bundles!");
|
||||
|
||||
SDValue Callee = getValue(I.getCalledOperand());
|
||||
|
@ -8605,7 +8648,9 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
|
|||
.setChain(getRoot())
|
||||
.setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
|
||||
.setDiscardResult(Call->use_empty())
|
||||
.setIsPatchPoint(IsPatchPoint);
|
||||
.setIsPatchPoint(IsPatchPoint)
|
||||
.setIsPreallocated(
|
||||
Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
|
||||
}
|
||||
|
||||
/// Add a stack map intrinsic call's live variable operands to a stackmap
|
||||
|
@ -9125,6 +9170,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
|
|||
Flags.setCFGuardTarget();
|
||||
if (Args[i].IsByVal)
|
||||
Flags.setByVal();
|
||||
if (Args[i].IsPreallocated) {
|
||||
Flags.setPreallocated();
|
||||
// Set the byval flag for CCAssignFn callbacks that don't know about
|
||||
// preallocated. This way we can know how many bytes we should've
|
||||
// allocated and how many bytes a callee cleanup function will pop. If
|
||||
// we port preallocated to more targets, we'll have to add custom
|
||||
// preallocated handling in the various CC lowering callbacks.
|
||||
Flags.setByVal();
|
||||
}
|
||||
if (Args[i].IsInAlloca) {
|
||||
Flags.setInAlloca();
|
||||
// Set the byval flag for CCAssignFn callbacks that don't know about
|
||||
|
@ -9134,7 +9188,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
|
|||
// in the various CC lowering callbacks.
|
||||
Flags.setByVal();
|
||||
}
|
||||
if (Args[i].IsByVal || Args[i].IsInAlloca) {
|
||||
if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) {
|
||||
PointerType *Ty = cast<PointerType>(Args[i].Ty);
|
||||
Type *ElementTy = Ty->getElementType();
|
||||
|
||||
|
@ -9448,7 +9502,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
|
|||
// initializes the alloca. Don't elide copies from the same argument twice.
|
||||
const Value *Val = SI->getValueOperand()->stripPointerCasts();
|
||||
const auto *Arg = dyn_cast<Argument>(Val);
|
||||
if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() ||
|
||||
if (!Arg || Arg->hasPassPointeeByValueAttr() ||
|
||||
Arg->getType()->isEmptyTy() ||
|
||||
DL.getTypeStoreSize(Arg->getType()) !=
|
||||
DL.getTypeAllocSize(AI->getAllocatedType()) ||
|
||||
|
@ -9634,12 +9688,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
|
|||
// in the various CC lowering callbacks.
|
||||
Flags.setByVal();
|
||||
}
|
||||
if (Arg.hasAttribute(Attribute::Preallocated)) {
|
||||
Flags.setPreallocated();
|
||||
// Set the byval flag for CCAssignFn callbacks that don't know about
|
||||
// preallocated. This way we can know how many bytes we should've
|
||||
// allocated and how many bytes a callee cleanup function will pop. If
|
||||
// we port preallocated to more targets, we'll have to add custom
|
||||
// preallocated handling in the various CC lowering callbacks.
|
||||
Flags.setByVal();
|
||||
}
|
||||
if (F.getCallingConv() == CallingConv::X86_INTR) {
|
||||
// IA Interrupt passes frame (1st parameter) by value in the stack.
|
||||
if (ArgNo == 0)
|
||||
Flags.setByVal();
|
||||
}
|
||||
if (Flags.isByVal() || Flags.isInAlloca()) {
|
||||
if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
|
||||
Type *ElementTy = Arg.getParamByValType();
|
||||
|
||||
// For ByVal, size and alignment should be passed from FE. BE will
|
||||
|
|
|
@ -393,6 +393,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
|||
case ISD::GC_TRANSITION_END: return "gc_transition.end";
|
||||
case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
|
||||
case ISD::FREEZE: return "freeze";
|
||||
case ISD::PREALLOCATED_SETUP:
|
||||
return "call_setup";
|
||||
case ISD::PREALLOCATED_ARG:
|
||||
return "call_alloc";
|
||||
|
||||
// Bit manipulation
|
||||
case ISD::ABS: return "abs";
|
||||
|
|
|
@ -110,14 +110,18 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
|
|||
IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
|
||||
IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
|
||||
IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
|
||||
IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
|
||||
IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
|
||||
IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
|
||||
IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
|
||||
IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
|
||||
Alignment = Call->getParamAlign(ArgIdx);
|
||||
ByValType = nullptr;
|
||||
if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
|
||||
if (IsByVal)
|
||||
ByValType = Call->getParamByValType(ArgIdx);
|
||||
PreallocatedType = nullptr;
|
||||
if (IsPreallocated)
|
||||
PreallocatedType = Call->getParamPreallocatedType(ArgIdx);
|
||||
}
|
||||
|
||||
/// Generate a libcall taking the given operands as arguments and returning a
|
||||
|
|
|
@ -1437,6 +1437,10 @@ Type *AttributeList::getParamByValType(unsigned Index) const {
|
|||
return getAttributes(Index+FirstArgIndex).getByValType();
|
||||
}
|
||||
|
||||
Type *AttributeList::getParamPreallocatedType(unsigned Index) const {
|
||||
return getAttributes(Index + FirstArgIndex).getPreallocatedType();
|
||||
}
|
||||
|
||||
MaybeAlign AttributeList::getStackAlignment(unsigned Index) const {
|
||||
return getAttributes(Index).getStackAlignment();
|
||||
}
|
||||
|
|
|
@ -114,6 +114,12 @@ bool Argument::hasInAllocaAttr() const {
|
|||
return hasAttribute(Attribute::InAlloca);
|
||||
}
|
||||
|
||||
bool Argument::hasPreallocatedAttr() const {
|
||||
if (!getType()->isPointerTy())
|
||||
return false;
|
||||
return hasAttribute(Attribute::Preallocated);
|
||||
}
|
||||
|
||||
bool Argument::hasPassPointeeByValueAttr() const {
|
||||
if (!getType()->isPointerTy()) return false;
|
||||
AttributeList Attrs = getParent()->getAttributes();
|
||||
|
|
|
@ -789,8 +789,9 @@ def CC_X86_32_Vector_Darwin : CallingConv<[
|
|||
/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
|
||||
/// values are spilled on the stack.
|
||||
def CC_X86_32_Common : CallingConv<[
|
||||
// Handles byval parameters.
|
||||
// Handles byval/preallocated parameters.
|
||||
CCIfByVal<CCPassByVal<4, 4>>,
|
||||
CCIfPreallocated<CCPassByVal<4, 4>>,
|
||||
|
||||
// The first 3 float or double arguments, if marked 'inreg' and if the call
|
||||
// is not a vararg call and if SSE2 is available, are passed in SSE registers.
|
||||
|
|
|
@ -3245,7 +3245,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
|
|||
return false;
|
||||
|
||||
for (auto Flag : CLI.OutFlags)
|
||||
if (Flag.isSwiftError())
|
||||
if (Flag.isSwiftError() || Flag.isPreallocated())
|
||||
return false;
|
||||
|
||||
SmallVector<MVT, 16> OutVTs;
|
||||
|
|
|
@ -57,7 +57,8 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
|
|||
|
||||
bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
|
||||
return !MF.getFrameInfo().hasVarSizedObjects() &&
|
||||
!MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
|
||||
!MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
|
||||
!MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
|
||||
}
|
||||
|
||||
/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
|
||||
|
@ -67,6 +68,7 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
|
|||
bool
|
||||
X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
|
||||
return hasReservedCallFrame(MF) ||
|
||||
MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
|
||||
(hasFP(MF) && !TRI->needsStackRealignment(MF)) ||
|
||||
TRI->hasBasePointer(MF);
|
||||
}
|
||||
|
@ -90,10 +92,10 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
|
|||
bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
|
||||
TRI->needsStackRealignment(MF) ||
|
||||
MFI.hasVarSizedObjects() ||
|
||||
TRI->needsStackRealignment(MF) || MFI.hasVarSizedObjects() ||
|
||||
MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() ||
|
||||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
|
||||
MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
|
||||
MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
|
||||
MFI.hasStackMap() || MFI.hasPatchPoint() ||
|
||||
MFI.hasCopyImplyingStackAdjustment());
|
||||
|
|
|
@ -5552,6 +5552,39 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
|||
CurDAG->RemoveDeadNode(Node);
|
||||
return;
|
||||
}
|
||||
case ISD::PREALLOCATED_SETUP: {
|
||||
auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
|
||||
auto CallId = MFI->getPreallocatedIdForCallSite(
|
||||
cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
|
||||
SDValue Chain = Node->getOperand(0);
|
||||
SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
|
||||
MachineSDNode *New = CurDAG->getMachineNode(
|
||||
TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain);
|
||||
ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain
|
||||
CurDAG->RemoveDeadNode(Node);
|
||||
return;
|
||||
}
|
||||
case ISD::PREALLOCATED_ARG: {
|
||||
auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
|
||||
auto CallId = MFI->getPreallocatedIdForCallSite(
|
||||
cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
|
||||
SDValue Chain = Node->getOperand(0);
|
||||
SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
|
||||
SDValue ArgIndex = Node->getOperand(2);
|
||||
SDValue Ops[3];
|
||||
Ops[0] = CallIdValue;
|
||||
Ops[1] = ArgIndex;
|
||||
Ops[2] = Chain;
|
||||
MachineSDNode *New = CurDAG->getMachineNode(
|
||||
TargetOpcode::PREALLOCATED_ARG, dl,
|
||||
CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()),
|
||||
MVT::Other),
|
||||
Ops);
|
||||
ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer
|
||||
ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain
|
||||
CurDAG->RemoveDeadNode(Node);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
SelectCode(Node);
|
||||
|
|
|
@ -3945,6 +3945,21 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
|||
if (ArgLocs.back().getLocMemOffset() != 0)
|
||||
report_fatal_error("any parameter with the inalloca attribute must be "
|
||||
"the only memory argument");
|
||||
} else if (CLI.IsPreallocated) {
|
||||
assert(ArgLocs.back().isMemLoc() &&
|
||||
"cannot use preallocated attribute on a register "
|
||||
"parameter");
|
||||
SmallVector<size_t, 4> PreallocatedOffsets;
|
||||
for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
|
||||
if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
|
||||
PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
|
||||
}
|
||||
}
|
||||
auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
|
||||
size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
|
||||
MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
|
||||
MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
|
||||
NumBytesToPush = 0;
|
||||
}
|
||||
|
||||
if (!IsSibcall && !IsMustTail)
|
||||
|
@ -3972,9 +3987,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
|||
for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
|
||||
++I, ++OutIndex) {
|
||||
assert(OutIndex < Outs.size() && "Invalid Out index");
|
||||
// Skip inalloca arguments, they have already been written.
|
||||
// Skip inalloca/preallocated arguments, they have already been written.
|
||||
ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
|
||||
if (Flags.isInAlloca())
|
||||
if (Flags.isInAlloca() || Flags.isPreallocated())
|
||||
continue;
|
||||
|
||||
CCValAssign &VA = ArgLocs[I];
|
||||
|
@ -4161,8 +4176,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
|||
assert(VA.isMemLoc());
|
||||
SDValue Arg = OutVals[OutsIndex];
|
||||
ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
|
||||
// Skip inalloca arguments. They don't require any work.
|
||||
if (Flags.isInAlloca())
|
||||
// Skip inalloca/preallocated arguments. They don't require any work.
|
||||
if (Flags.isInAlloca() || Flags.isPreallocated())
|
||||
continue;
|
||||
// Create frame index.
|
||||
int32_t Offset = VA.getLocMemOffset()+FPDiff;
|
||||
|
@ -33076,6 +33091,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|||
BB->addLiveIn(BasePtr);
|
||||
return BB;
|
||||
}
|
||||
case TargetOpcode::PREALLOCATED_SETUP: {
|
||||
assert(Subtarget.is32Bit() && "preallocated only used in 32-bit");
|
||||
auto MFI = MF->getInfo<X86MachineFunctionInfo>();
|
||||
MFI->setHasPreallocatedCall(true);
|
||||
int64_t PreallocatedId = MI.getOperand(0).getImm();
|
||||
size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId);
|
||||
assert(StackAdjustment != 0 && "0 stack adjustment");
|
||||
LLVM_DEBUG(dbgs() << "PREALLOCATED_SETUP stack adjustment "
|
||||
<< StackAdjustment << "\n");
|
||||
BuildMI(*BB, MI, DL, TII->get(X86::SUB32ri), X86::ESP)
|
||||
.addReg(X86::ESP)
|
||||
.addImm(StackAdjustment);
|
||||
MI.eraseFromParent();
|
||||
return BB;
|
||||
}
|
||||
case TargetOpcode::PREALLOCATED_ARG: {
|
||||
assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit");
|
||||
int64_t PreallocatedId = MI.getOperand(1).getImm();
|
||||
int64_t ArgIdx = MI.getOperand(2).getImm();
|
||||
auto MFI = MF->getInfo<X86MachineFunctionInfo>();
|
||||
size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx];
|
||||
LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx
|
||||
<< ", arg offset " << ArgOffset << "\n");
|
||||
// stack pointer + offset
|
||||
addRegOffset(
|
||||
BuildMI(*BB, MI, DL, TII->get(X86::LEA32r), MI.getOperand(0).getReg()),
|
||||
X86::ESP, false, ArgOffset);
|
||||
MI.eraseFromParent();
|
||||
return BB;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
#ifndef LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
|
||||
#define LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
|
||||
|
@ -103,6 +105,13 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
|
|||
/// True if this function has WIN_ALLOCA instructions.
|
||||
bool HasWinAlloca = false;
|
||||
|
||||
/// True if this function has any preallocated calls.
|
||||
bool HasPreallocatedCall = false;
|
||||
|
||||
ValueMap<const Value *, size_t> PreallocatedIds;
|
||||
SmallVector<size_t, 0> PreallocatedStackSizes;
|
||||
SmallVector<SmallVector<size_t, 4>, 0> PreallocatedArgOffsets;
|
||||
|
||||
private:
|
||||
/// ForwardedMustTailRegParms - A list of virtual and physical registers
|
||||
/// that must be forwarded to every musttail call.
|
||||
|
@ -184,6 +193,36 @@ public:
|
|||
|
||||
bool hasWinAlloca() const { return HasWinAlloca; }
|
||||
void setHasWinAlloca(bool v) { HasWinAlloca = v; }
|
||||
|
||||
bool hasPreallocatedCall() const { return HasPreallocatedCall; }
|
||||
void setHasPreallocatedCall(bool v) { HasPreallocatedCall = v; }
|
||||
|
||||
size_t getPreallocatedIdForCallSite(const Value *CS) {
|
||||
auto Insert = PreallocatedIds.insert({CS, PreallocatedIds.size()});
|
||||
if (Insert.second) {
|
||||
PreallocatedStackSizes.push_back(0);
|
||||
PreallocatedArgOffsets.emplace_back();
|
||||
}
|
||||
return Insert.first->second;
|
||||
}
|
||||
|
||||
void setPreallocatedStackSize(size_t Id, size_t StackSize) {
|
||||
PreallocatedStackSizes[Id] = StackSize;
|
||||
}
|
||||
|
||||
size_t getPreallocatedStackSize(const size_t Id) {
|
||||
assert(PreallocatedStackSizes[Id] != 0 && "stack size not set");
|
||||
return PreallocatedStackSizes[Id];
|
||||
}
|
||||
|
||||
void setPreallocatedArgOffsets(size_t Id, ArrayRef<size_t> AO) {
|
||||
PreallocatedArgOffsets[Id].assign(AO.begin(), AO.end());
|
||||
}
|
||||
|
||||
const ArrayRef<size_t> getPreallocatedArgOffsets(const size_t Id) {
|
||||
assert(!PreallocatedArgOffsets[Id].empty() && "arg offsets not set");
|
||||
return PreallocatedArgOffsets[Id];
|
||||
}
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
|
|
@ -627,6 +627,10 @@ static bool CantUseSP(const MachineFrameInfo &MFI) {
|
|||
}
|
||||
|
||||
bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
|
||||
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
|
||||
if (X86FI->hasPreallocatedCall())
|
||||
return true;
|
||||
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
|
||||
if (!EnableBasePointer)
|
||||
|
|
|
@ -1016,8 +1016,8 @@ static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
|
|||
// CI should not has any ABI-impacting function attributes.
|
||||
static const Attribute::AttrKind ABIAttrs[] = {
|
||||
Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
|
||||
Attribute::InReg, Attribute::Returned, Attribute::SwiftSelf,
|
||||
Attribute::SwiftError};
|
||||
Attribute::Preallocated, Attribute::InReg, Attribute::Returned,
|
||||
Attribute::SwiftSelf, Attribute::SwiftError};
|
||||
AttributeList Attrs = CI.getAttributes();
|
||||
for (auto AK : ABIAttrs)
|
||||
if (Attrs.hasParamAttribute(0, AK))
|
||||
|
|
|
@ -1363,7 +1363,8 @@ bool Attributor::isValidFunctionSignatureRewrite(
|
|||
AttributeList FnAttributeList = Fn->getAttributes();
|
||||
if (FnAttributeList.hasAttrSomewhere(Attribute::Nest) ||
|
||||
FnAttributeList.hasAttrSomewhere(Attribute::StructRet) ||
|
||||
FnAttributeList.hasAttrSomewhere(Attribute::InAlloca)) {
|
||||
FnAttributeList.hasAttrSomewhere(Attribute::InAlloca) ||
|
||||
FnAttributeList.hasAttrSomewhere(Attribute::Preallocated)) {
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "[Attributor] Cannot rewrite due to complex attribute\n");
|
||||
return false;
|
||||
|
|
|
@ -4455,7 +4455,8 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
|
|||
AAValueSimplifyImpl::initialize(A);
|
||||
if (!getAnchorScope() || getAnchorScope()->isDeclaration())
|
||||
indicatePessimisticFixpoint();
|
||||
if (hasAttr({Attribute::InAlloca, Attribute::StructRet, Attribute::Nest},
|
||||
if (hasAttr({Attribute::InAlloca, Attribute::Preallocated,
|
||||
Attribute::StructRet, Attribute::Nest},
|
||||
/* IgnoreSubsumingPositions */ true))
|
||||
indicatePessimisticFixpoint();
|
||||
|
||||
|
@ -5695,7 +5696,7 @@ struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {
|
|||
|
||||
// TODO: From readattrs.ll: "inalloca parameters are always
|
||||
// considered written"
|
||||
if (hasAttr({Attribute::InAlloca})) {
|
||||
if (hasAttr({Attribute::InAlloca, Attribute::Preallocated})) {
|
||||
removeKnownBits(NO_WRITES);
|
||||
removeAssumedBits(NO_WRITES);
|
||||
}
|
||||
|
|
|
@ -483,9 +483,10 @@ DeadArgumentEliminationPass::SurveyUses(const Value *V,
|
|||
// We consider arguments of non-internal functions to be intrinsically alive as
|
||||
// well as arguments to functions which have their "address taken".
|
||||
void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
|
||||
// Functions with inalloca parameters are expecting args in a particular
|
||||
// register and memory layout.
|
||||
if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca)) {
|
||||
// Functions with inalloca/preallocated parameters are expecting args in a
|
||||
// particular register and memory layout.
|
||||
if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
|
||||
F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
|
||||
MarkLive(F);
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -447,7 +447,7 @@ determinePointerReadAttrs(Argument *A,
|
|||
SmallPtrSet<Use *, 32> Visited;
|
||||
|
||||
// inalloca arguments are always clobbered by the call.
|
||||
if (A->hasInAllocaAttr())
|
||||
if (A->hasInAllocaAttr() || A->hasPreallocatedAttr())
|
||||
return Attribute::None;
|
||||
|
||||
bool IsRead = false;
|
||||
|
|
|
@ -2333,6 +2333,7 @@ OptimizeFunctions(Module &M,
|
|||
// wouldn't be safe in the presence of inalloca.
|
||||
// FIXME: We should also hoist alloca affected by this to the entry
|
||||
// block if possible.
|
||||
// FIXME: handle preallocated
|
||||
if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
|
||||
!F->hasAddressTaken()) {
|
||||
RemoveAttribute(F, Attribute::InAlloca);
|
||||
|
|
|
@ -4737,6 +4737,7 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {
|
|||
//
|
||||
// Similarly, avoid folding away bitcasts of byval calls.
|
||||
if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
|
||||
Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated) ||
|
||||
Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
|
||||
return false;
|
||||
|
||||
|
|
|
@ -246,6 +246,20 @@ entry:
|
|||
; CHECK: calll _addrof_i32
|
||||
; CHECK: retl
|
||||
|
||||
define void @avoid_preallocated(i32* preallocated(i32) %x) {
|
||||
entry:
|
||||
%x.p.p = alloca i32*
|
||||
store i32* %x, i32** %x.p.p
|
||||
call void @addrof_i32(i32* %x)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _avoid_preallocated:
|
||||
; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]]
|
||||
; CHECK: pushl %[[reg]]
|
||||
; CHECK: calll _addrof_i32
|
||||
; CHECK: retl
|
||||
|
||||
; Don't elide the copy when the alloca is escaped with a store.
|
||||
define void @escape_with_store(i32 %x) {
|
||||
%x1 = alloca i32
|
||||
|
|
|
@ -22,6 +22,8 @@
|
|||
; Each member pointer creates a thunk. The ones with inalloca are required to
|
||||
; tail calls by the ABI, even at O0.
|
||||
|
||||
; TODO: add tests for preallocated/musttail once supported
|
||||
|
||||
%struct.B = type { i32 (...)** }
|
||||
%struct.A = type { i32 }
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
; RUN: llc -verify-machineinstrs -mtriple=i686-- < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=i686-- -O0 < %s | FileCheck %s
|
||||
|
||||
; TODO: add tests for preallocated/musttail once supported
|
||||
|
||||
; CHECK-LABEL: t1:
|
||||
; CHECK: jmp {{_?}}t1_callee
|
||||
define x86_thiscallcc void @t1(i8* %this) {
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
|
||||
; REQUIRES: asserts
|
||||
; XFAIL: *
|
||||
|
||||
declare token @llvm.call.preallocated.setup(i32)
|
||||
declare i8* @llvm.call.preallocated.arg(token, i32)
|
||||
|
||||
%Foo = type { i32, i32 }
|
||||
|
||||
declare void @init(%Foo*)
|
||||
|
||||
|
||||
|
||||
declare void @foo_p(%Foo* preallocated(%Foo))
|
||||
|
||||
define void @no_call() {
|
||||
; CHECK-LABEL: _no_call:
|
||||
%t = call token @llvm.call.preallocated.setup(i32 1)
|
||||
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
|
||||
%b = bitcast i8* %a to %Foo*
|
||||
call void @init(%Foo* %b)
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
; RUN: llc %s -mtriple=x86_64-windows-msvc -o /dev/null 2>&1
|
||||
; REQUIRES: asserts
|
||||
; XFAIL: *
|
||||
|
||||
declare token @llvm.call.preallocated.setup(i32)
|
||||
declare i8* @llvm.call.preallocated.arg(token, i32)
|
||||
|
||||
%Foo = type { i32, i32 }
|
||||
|
||||
declare x86_thiscallcc void @f(i32, %Foo* preallocated(%Foo))
|
||||
|
||||
define void @g() {
|
||||
%t = call token @llvm.call.preallocated.setup(i32 1)
|
||||
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
|
||||
%b = bitcast i8* %a to %Foo*
|
||||
call void @f(i32 0, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
|
||||
ret void
|
||||
}
|
|
@ -0,0 +1,187 @@
|
|||
; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
|
||||
|
||||
declare token @llvm.call.preallocated.setup(i32)
|
||||
declare i8* @llvm.call.preallocated.arg(token, i32)
|
||||
|
||||
%Foo = type { i32, i32 }
|
||||
|
||||
declare void @init(%Foo*)
|
||||
|
||||
|
||||
|
||||
declare void @foo_p(%Foo* preallocated(%Foo))
|
||||
|
||||
define void @one_preallocated() {
|
||||
; CHECK-LABEL: _one_preallocated:
|
||||
%t = call token @llvm.call.preallocated.setup(i32 1)
|
||||
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
|
||||
%b = bitcast i8* %a to %Foo*
|
||||
; CHECK: subl $8, %esp
|
||||
; CHECK: calll _foo_p
|
||||
call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @one_preallocated_two_blocks() {
|
||||
; CHECK-LABEL: _one_preallocated_two_blocks:
|
||||
%t = call token @llvm.call.preallocated.setup(i32 1)
|
||||
br label %second
|
||||
second:
|
||||
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
|
||||
%b = bitcast i8* %a to %Foo*
|
||||
; CHECK: subl $8, %esp
|
||||
; CHECK: calll _foo_p
|
||||
call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @preallocated_with_store() {
|
||||
; CHECK-LABEL: _preallocated_with_store:
|
||||
; CHECK: subl $8, %esp
|
||||
%t = call token @llvm.call.preallocated.setup(i32 1)
|
||||
; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
|
||||
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
|
||||
%b = bitcast i8* %a to %Foo*
|
||||
%p0 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
|
||||
%p1 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
|
||||
store i32 13, i32* %p0
|
||||
store i32 42, i32* %p1
|
||||
; CHECK-DAG: movl $13, ([[REGISTER]])
|
||||
; CHECK-DAG: movl $42, 4([[REGISTER]])
|
||||
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
|
||||
; CHECK-NOT: pushl
|
||||
; CHECK: calll _foo_p
|
||||
call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @preallocated_with_init() {
|
||||
; CHECK-LABEL: _preallocated_with_init:
|
||||
; CHECK: subl $8, %esp
|
||||
%t = call token @llvm.call.preallocated.setup(i32 1)
|
||||
; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
|
||||
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
|
||||
%b = bitcast i8* %a to %Foo*
|
||||
; CHECK: pushl [[REGISTER]]
|
||||
; CHECK: calll _init
|
||||
call void @init(%Foo* %b)
|
||||
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
|
||||
; CHECK-NOT: pushl
|
||||
; CHECK: calll _foo_p
|
||||
call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @foo_p_p(%Foo* preallocated(%Foo), %Foo* preallocated(%Foo))
|
||||
|
||||
define void @two_preallocated() {
|
||||
; CHECK-LABEL: _two_preallocated:
|
||||
%t = call token @llvm.call.preallocated.setup(i32 2)
|
||||
%a1 = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
|
||||
%b1 = bitcast i8* %a1 to %Foo*
|
||||
%a2 = call i8* @llvm.call.preallocated.arg(token %t, i32 1) preallocated(%Foo)
|
||||
%b2 = bitcast i8* %a2 to %Foo*
|
||||
; CHECK: subl $16, %esp
|
||||
; CHECK: calll _foo_p_p
|
||||
call void @foo_p_p(%Foo* preallocated(%Foo) %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t)]
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @foo_p_int(%Foo* preallocated(%Foo), i32)
|
||||
|
||||
define void @one_preallocated_one_normal() {
|
||||
; CHECK-LABEL: _one_preallocated_one_normal:
|
||||
; CHECK: subl $12, %esp
|
||||
%t = call token @llvm.call.preallocated.setup(i32 1)
|
||||
; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
|
||||
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
|
||||
%b = bitcast i8* %a to %Foo*
|
||||
; CHECK: pushl [[REGISTER]]
|
||||
; CHECK: calll _init
|
||||
call void @init(%Foo* %b)
|
||||
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
|
||||
; CHECK-NOT: pushl
|
||||
; CHECK: movl $2, 8(%esp)
|
||||
; CHECK: calll _foo_p_int
|
||||
call void @foo_p_int(%Foo* preallocated(%Foo) %b, i32 2) ["preallocated"(token %t)]
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @foo_ret_p(%Foo* sret, %Foo* preallocated(%Foo))
|
||||
|
||||
define void @nested_with_init() {
|
||||
; CHECK-LABEL: _nested_with_init:
|
||||
%tmp = alloca %Foo
|
||||
|
||||
%t1 = call token @llvm.call.preallocated.setup(i32 1)
|
||||
; CHECK: subl $12, %esp
|
||||
%a1 = call i8* @llvm.call.preallocated.arg(token %t1, i32 0) preallocated(%Foo)
|
||||
%b1 = bitcast i8* %a1 to %Foo*
|
||||
; CHECK: leal 4(%esp), [[REGISTER1:%[a-z]+]]
|
||||
|
||||
%t2 = call token @llvm.call.preallocated.setup(i32 1)
|
||||
; CHECK: subl $12, %esp
|
||||
%a2 = call i8* @llvm.call.preallocated.arg(token %t2, i32 0) preallocated(%Foo)
|
||||
; CHECK: leal 4(%esp), [[REGISTER2:%[a-z]+]]
|
||||
%b2 = bitcast i8* %a2 to %Foo*
|
||||
|
||||
call void @init(%Foo* %b2)
|
||||
; CHECK: pushl [[REGISTER2]]
|
||||
; CHECK: calll _init
|
||||
|
||||
call void @foo_ret_p(%Foo* %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t2)]
|
||||
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
|
||||
; CHECK-NOT: pushl
|
||||
; CHECK: calll _foo_ret_p
|
||||
call void @foo_ret_p(%Foo* %tmp, %Foo* preallocated(%Foo) %b1) ["preallocated"(token %t1)]
|
||||
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
|
||||
; CHECK-NOT: pushl
|
||||
; CHECK: calll _foo_ret_p
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @foo_inreg_p(i32 inreg, %Foo* preallocated(%Foo))
|
||||
|
||||
define void @inreg() {
|
||||
; CHECK-LABEL: _inreg:
|
||||
%t = call token @llvm.call.preallocated.setup(i32 1)
|
||||
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
|
||||
%b = bitcast i8* %a to %Foo*
|
||||
; CHECK: subl $8, %esp
|
||||
; CHECK: movl $9, %eax
|
||||
; CHECK: calll _foo_inreg_p
|
||||
call void @foo_inreg_p(i32 9, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
|
||||
ret void
|
||||
}
|
||||
|
||||
declare x86_thiscallcc void @foo_thiscall_p(i8*, %Foo* preallocated(%Foo))
|
||||
|
||||
define void @thiscall() {
|
||||
; CHECK-LABEL: _thiscall:
|
||||
%t = call token @llvm.call.preallocated.setup(i32 1)
|
||||
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
|
||||
%b = bitcast i8* %a to %Foo*
|
||||
; CHECK: subl $8, %esp
|
||||
; CHECK: xorl %ecx, %ecx
|
||||
; CHECK: calll _foo_thiscall_p
|
||||
call x86_thiscallcc void @foo_thiscall_p(i8* null, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
|
||||
ret void
|
||||
}
|
||||
|
||||
declare x86_stdcallcc void @foo_stdcall_p(%Foo* preallocated(%Foo))
|
||||
declare x86_stdcallcc void @i(i32)
|
||||
|
||||
define void @stdcall() {
|
||||
; CHECK-LABEL: _stdcall:
|
||||
%t = call token @llvm.call.preallocated.setup(i32 1)
|
||||
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
|
||||
%b = bitcast i8* %a to %Foo*
|
||||
; CHECK: subl $8, %esp
|
||||
; CHECK: calll _foo_stdcall_p@8
|
||||
call x86_stdcallcc void @foo_stdcall_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
|
||||
; CHECK-NOT: %esp
|
||||
; CHECK: pushl
|
||||
; CHECK: calll _i@4
|
||||
call x86_stdcallcc void @i(i32 0)
|
||||
ret void
|
||||
}
|
|
@ -1,5 +1,8 @@
|
|||
; RUN: llc < %s -enable-shrink-wrap=true | FileCheck %s
|
||||
|
||||
; TODO: add preallocated versions of tests
|
||||
; we don't yet support conditionally called preallocated calls after the setup
|
||||
|
||||
; chkstk cannot come before the usual prologue, since it adjusts ESP.
|
||||
; If chkstk is used in the prologue, we also have to be careful about preserving
|
||||
; EAX if it is used.
|
||||
|
|
|
@ -9,6 +9,21 @@ target triple = "i386-pc-windows-msvc19.0.24215"
|
|||
declare x86_stdcallcc void @tail_std(i32)
|
||||
declare void @capture(i32*)
|
||||
|
||||
define x86_thiscallcc void @preallocated(i32* %this, i32* preallocated(i32) %args) {
|
||||
entry:
|
||||
%val = load i32, i32* %args
|
||||
store i32 0, i32* %args
|
||||
tail call x86_stdcallcc void @tail_std(i32 %val)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _preallocated: # @preallocated
|
||||
; CHECK: movl 4(%esp), %[[reg:[^ ]*]]
|
||||
; CHECK: movl $0, 4(%esp)
|
||||
; CHECK: pushl %[[reg]]
|
||||
; CHECK: calll _tail_std@4
|
||||
; CHECK: retl $4
|
||||
|
||||
define x86_thiscallcc void @inalloca(i32* %this, i32* inalloca %args) {
|
||||
entry:
|
||||
%val = load i32, i32* %args
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
declare void @f(i32)
|
||||
declare token @llvm.call.preallocated.setup(i32)
|
||||
declare i8* @llvm.call.preallocated.arg(token, i32)
|
||||
|
||||
; Test1: Replace argument with constant
|
||||
define internal void @test1(i32 %a) {
|
||||
|
@ -280,6 +282,24 @@ define i32* @complicated_args_inalloca() {
|
|||
ret i32* %call
|
||||
}
|
||||
|
||||
define internal i32* @test_preallocated(i32* preallocated(i32) %a) {
|
||||
; CHECK-LABEL: define {{[^@]+}}@test_preallocated
|
||||
; CHECK-SAME: (i32* noalias nofree returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]])
|
||||
; CHECK-NEXT: ret i32* [[A]]
|
||||
;
|
||||
ret i32* %a
|
||||
}
|
||||
define i32* @complicated_args_preallocated() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@complicated_args_preallocated()
|
||||
; CHECK-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 1)
|
||||
; CHECK-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree writeonly preallocated(i32) align 536870912 null)
|
||||
; CHECK-NEXT: ret i32* [[CALL]]
|
||||
;
|
||||
%c = call token @llvm.call.preallocated.setup(i32 1)
|
||||
%call = call i32* @test_preallocated(i32* preallocated(i32) null) ["preallocated"(token %c)]
|
||||
ret i32* %call
|
||||
}
|
||||
|
||||
define internal void @test_sret(%struct.X* sret %a, %struct.X** %b) {
|
||||
;
|
||||
; CHECK-LABEL: define {{[^@]+}}@test_sret
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
; RUN: opt < %s -deadargelim -S | FileCheck %s
|
||||
|
||||
declare token @llvm.call.preallocated.setup(i32)
|
||||
declare i8* @llvm.call.preallocated.arg(token, i32)
|
||||
|
||||
%Ty = type <{ i32, i32 }>
|
||||
|
||||
; Check if the pass doesn't modify anything that doesn't need changing. We feed
|
||||
|
@ -44,4 +47,22 @@ define i32 @caller2() {
|
|||
ret i32 %v
|
||||
}
|
||||
|
||||
; We can't remove 'this' here, as that would put argmem in ecx instead of
|
||||
; memory.
|
||||
define internal x86_thiscallcc i32 @unused_this_preallocated(i32* %this, i32* preallocated(i32) %argmem) {
|
||||
%v = load i32, i32* %argmem
|
||||
ret i32 %v
|
||||
}
|
||||
; CHECK-LABEL: define internal x86_thiscallcc i32 @unused_this_preallocated(i32* %this, i32* preallocated(i32) %argmem)
|
||||
|
||||
define i32 @caller3() {
|
||||
%t = alloca i32
|
||||
%c = call token @llvm.call.preallocated.setup(i32 1)
|
||||
%M = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32)
|
||||
%m = bitcast i8* %M to i32*
|
||||
store i32 42, i32* %m
|
||||
%v = call x86_thiscallcc i32 @unused_this_preallocated(i32* %t, i32* preallocated(i32) %m) ["preallocated"(token %c)]
|
||||
ret i32 %v
|
||||
}
|
||||
|
||||
; CHECK: attributes #0 = { nounwind }
|
||||
|
|
|
@ -58,6 +58,16 @@ define void @test9_2(%struct.x* inalloca %a) nounwind {
|
|||
ret void
|
||||
}
|
||||
|
||||
; Test for preallocated handling.
|
||||
define void @test9_3(%struct.x* preallocated(%struct.x) %a) nounwind {
|
||||
; CHECK-LABEL: @test9_3(
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
|
||||
store i32 1, i32* %tmp2, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; DSE should delete the dead trampoline.
|
||||
declare void @test11f()
|
||||
define void @test11() {
|
||||
|
|
|
@ -169,6 +169,16 @@ define void @test9_2(%struct.x* inalloca %a) nounwind {
|
|||
ret void
|
||||
}
|
||||
|
||||
; Test for preallocated handling.
|
||||
define void @test9_3(%struct.x* preallocated(%struct.x) %a) nounwind {
|
||||
; CHECK-LABEL: @test9_3(
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
|
||||
store i32 1, i32* %tmp2, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; va_arg has fuzzy dependence, the store shouldn't be zapped.
|
||||
define double @test10(i8* %X) {
|
||||
; CHECK-LABEL: @test10(
|
||||
|
|
|
@ -56,6 +56,12 @@ define void @test7_1(i32* inalloca %a) {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define void @test7_2(i32* nocapture preallocated(i32) %a)
|
||||
; preallocated parameters are always considered written
|
||||
define void @test7_2(i32* preallocated(i32) %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define i32* @test8_1(i32* readnone returned %p)
|
||||
define i32* @test8_1(i32* %p) {
|
||||
entry:
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
; RUN: opt < %s -globalopt -S | FileCheck %s
|
||||
|
||||
declare token @llvm.call.preallocated.setup(i32)
|
||||
declare i8* @llvm.call.preallocated.arg(token, i32)
|
||||
|
||||
define internal i32 @f(i32* %m) {
|
||||
; CHECK-LABEL: define internal fastcc i32 @f
|
||||
%v = load i32, i32* %m
|
||||
|
@ -32,6 +35,13 @@ define internal i32 @inalloca(i32* inalloca %p) {
|
|||
ret i32 %rv
|
||||
}
|
||||
|
||||
define internal i32 @preallocated(i32* preallocated(i32) %p) {
|
||||
; TODO: handle preallocated:
|
||||
; CHECK-NOT-LABEL: define internal fastcc i32 @preallocated(i32* %p)
|
||||
%rv = load i32, i32* %p
|
||||
ret i32 %rv
|
||||
}
|
||||
|
||||
define void @call_things() {
|
||||
%m = alloca i32
|
||||
call i32 @f(i32* %m)
|
||||
|
@ -40,6 +50,11 @@ define void @call_things() {
|
|||
call i32 @j(i32* %m)
|
||||
%args = alloca inalloca i32
|
||||
call i32 @inalloca(i32* inalloca %args)
|
||||
; TODO: handle preallocated
|
||||
;%c = call token @llvm.call.preallocated.setup(i32 1)
|
||||
;%N = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32)
|
||||
;%n = bitcast i8* %N to i32*
|
||||
;call i32 @preallocated(i32* preallocated(i32) %n) ["preallocated"(token %c)]
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32"
|
||||
target triple = "i686-pc-win32"
|
||||
|
||||
|
||||
declare token @llvm.call.preallocated.setup(i32)
|
||||
declare i8* @llvm.call.preallocated.arg(token, i32)
|
||||
|
||||
declare void @takes_i32(i32)
|
||||
declare void @takes_i32_preallocated(i32* preallocated(i32))
|
||||
|
||||
define void @f() {
|
||||
; CHECK-LABEL: define void @f()
|
||||
%t = call token @llvm.call.preallocated.setup(i32 1)
|
||||
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(i32)
|
||||
%arg = bitcast i8* %a to i32*
|
||||
call void bitcast (void (i32)* @takes_i32 to void (i32*)*)(i32* preallocated(i32) %arg) ["preallocated"(token %t)]
|
||||
; CHECK: call void bitcast{{.*}}@takes_i32
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @g() {
|
||||
; CHECK-LABEL: define void @g()
|
||||
call void bitcast (void (i32*)* @takes_i32_preallocated to void (i32)*)(i32 0)
|
||||
; CHECK: call void bitcast{{.*}}@takes_i32_preallocated
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue