forked from OSchip/llvm-project
Elide argument copies during instruction selection
Summary: Avoids tons of prologue boilerplate when arguments are passed in memory and left in memory. This can happen in a debug build or in a release build when an argument alloca is escaped. This will dramatically affect the code size of x86 debug builds, because X86 fast isel doesn't handle arguments passed in memory at all. It only handles the x86_64 case of up to 6 basic register parameters. This is implemented by analyzing the entry block before ISel to identify copy elision candidates. A copy elision candidate is an argument that is used to fully initialize an alloca before any other possibly escaping uses of that alloca. If an argument is a copy elision candidate, we set a flag on the InputArg. If the the target generates loads from a fixed stack object that matches the size and alignment requirements of the alloca, the SelectionDAG builder will delete the stack object created for the alloca and replace it with the fixed stack object. The load is left behind to satisfy any remaining uses of the argument value. The store is now dead and is therefore elided. The fixed stack object is also marked as mutable, as it may now be modified by the user, and it would be invalid to rematerialize the initial load from it. Supersedes D28388 Fixes PR26328 Reviewers: chandlerc, MatzeB, qcolombet, inglorion, hans Subscribers: igorb, llvm-commits Differential Revision: https://reviews.llvm.org/D29668 llvm-svn: 296683
This commit is contained in:
parent
b7278af54b
commit
f7c0980c10
|
@ -559,8 +559,7 @@ public:
|
|||
return Objects[ObjectIdx+NumFixedObjects].isAliased;
|
||||
}
|
||||
|
||||
/// isImmutableObjectIndex - Returns true if the specified index corresponds
|
||||
/// to an immutable object.
|
||||
/// Returns true if the specified index corresponds to an immutable object.
|
||||
bool isImmutableObjectIndex(int ObjectIdx) const {
|
||||
// Tail calling functions can clobber their function arguments.
|
||||
if (HasTailCall)
|
||||
|
@ -570,6 +569,13 @@ public:
|
|||
return Objects[ObjectIdx+NumFixedObjects].isImmutable;
|
||||
}
|
||||
|
||||
/// Marks the immutability of an object.
|
||||
void setIsImmutableObjectIndex(int ObjectIdx, bool Immutable) {
|
||||
assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
|
||||
"Invalid Object Idx!");
|
||||
Objects[ObjectIdx+NumFixedObjects].isImmutable = Immutable;
|
||||
}
|
||||
|
||||
/// Returns true if the specified index corresponds to a spill slot.
|
||||
bool isSpillSlotObjectIndex(int ObjectIdx) const {
|
||||
assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
|
||||
|
|
|
@ -54,6 +54,7 @@ public:
|
|||
const TargetInstrInfo *TII;
|
||||
const TargetLowering *TLI;
|
||||
bool FastISelFailed;
|
||||
SmallPtrSet<const Instruction *, 4> ElidedArgCopyInstrs;
|
||||
|
||||
static char ID;
|
||||
|
||||
|
|
|
@ -45,6 +45,7 @@ namespace ISD {
|
|||
unsigned OrigAlign : 5; ///< Log 2 of original alignment
|
||||
unsigned IsInConsecutiveRegsLast : 1;
|
||||
unsigned IsInConsecutiveRegs : 1;
|
||||
unsigned IsCopyElisionCandidate : 1; ///< Argument copy elision candidate
|
||||
|
||||
unsigned ByValSize; ///< Byval struct size
|
||||
|
||||
|
@ -54,7 +55,8 @@ namespace ISD {
|
|||
IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0),
|
||||
IsSwiftSelf(0), IsSwiftError(0), IsHva(0), IsHvaStart(0),
|
||||
IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
|
||||
IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), ByValSize(0) {
|
||||
IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
|
||||
IsCopyElisionCandidate(0), ByValSize(0) {
|
||||
static_assert(sizeof(*this) == 2 * sizeof(unsigned), "flags are too big");
|
||||
}
|
||||
|
||||
|
@ -109,6 +111,9 @@ namespace ISD {
|
|||
bool isSplitEnd() const { return IsSplitEnd; }
|
||||
void setSplitEnd() { IsSplitEnd = 1; }
|
||||
|
||||
bool isCopyElisionCandidate() const { return IsCopyElisionCandidate; }
|
||||
void setCopyElisionCandidate() { IsCopyElisionCandidate = 1; }
|
||||
|
||||
unsigned getByValAlign() const { return (1U << ByValAlign) / 2; }
|
||||
void setByValAlign(unsigned A) {
|
||||
ByValAlign = Log2_32(A) + 1;
|
||||
|
|
|
@ -89,7 +89,7 @@ public:
|
|||
assert(!MInsn && "Already initialized?");
|
||||
|
||||
assert((!E || E->isValid()) && "Expected valid expression");
|
||||
assert(~FI && "Expected valid index");
|
||||
assert(FI != INT_MAX && "Expected valid index");
|
||||
|
||||
FrameIndexExprs.push_back({FI, E});
|
||||
}
|
||||
|
|
|
@ -8028,6 +8028,173 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
|
|||
return true;
|
||||
}
|
||||
|
||||
typedef DenseMap<const Argument *,
|
||||
std::pair<const AllocaInst *, const StoreInst *>>
|
||||
ArgCopyElisionMapTy;
|
||||
|
||||
/// Scan the entry block of the function in FuncInfo for arguments that look
|
||||
/// like copies into a local alloca. Record any copied arguments in
|
||||
/// ArgCopyElisionCandidates.
|
||||
static void
|
||||
findArgumentCopyElisionCandidates(const DataLayout &DL,
|
||||
FunctionLoweringInfo *FuncInfo,
|
||||
ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
|
||||
// Record the state of every static alloca used in the entry block. Argument
|
||||
// allocas are all used in the entry block, so we need approximately as many
|
||||
// entries as we have arguments.
|
||||
enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
|
||||
SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
|
||||
unsigned NumArgs = FuncInfo->Fn->getArgumentList().size();
|
||||
StaticAllocas.reserve(NumArgs * 2);
|
||||
|
||||
auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
|
||||
if (!V)
|
||||
return nullptr;
|
||||
V = V->stripPointerCasts();
|
||||
const auto *AI = dyn_cast<AllocaInst>(V);
|
||||
if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI))
|
||||
return nullptr;
|
||||
auto Iter = StaticAllocas.insert({AI, Unknown});
|
||||
return &Iter.first->second;
|
||||
};
|
||||
|
||||
// Look for stores of arguments to static allocas. Look through bitcasts and
|
||||
// GEPs to handle type coercions, as long as the alloca is fully initialized
|
||||
// by the store. Any non-store use of an alloca escapes it and any subsequent
|
||||
// unanalyzed store might write it.
|
||||
// FIXME: Handle structs initialized with multiple stores.
|
||||
for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
|
||||
// Look for stores, and handle non-store uses conservatively.
|
||||
const auto *SI = dyn_cast<StoreInst>(&I);
|
||||
if (!SI) {
|
||||
// We will look through cast uses, so ignore them completely.
|
||||
if (I.isCast())
|
||||
continue;
|
||||
// Ignore debug info intrinsics, they don't escape or store to allocas.
|
||||
if (isa<DbgInfoIntrinsic>(I))
|
||||
continue;
|
||||
// This is an unknown instruction. Assume it escapes or writes to all
|
||||
// static alloca operands.
|
||||
for (const Use &U : I.operands()) {
|
||||
if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
|
||||
*Info = StaticAllocaInfo::Clobbered;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the stored value is a static alloca, mark it as escaped.
|
||||
if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
|
||||
*Info = StaticAllocaInfo::Clobbered;
|
||||
|
||||
// Check if the destination is a static alloca.
|
||||
const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
|
||||
StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
|
||||
if (!Info)
|
||||
continue;
|
||||
const AllocaInst *AI = cast<AllocaInst>(Dst);
|
||||
|
||||
// Skip allocas that have been initialized or clobbered.
|
||||
if (*Info != StaticAllocaInfo::Unknown)
|
||||
continue;
|
||||
|
||||
// Check if the stored value is an argument, and that this store fully
|
||||
// initializes the alloca. Don't elide copies from the same argument twice.
|
||||
const Value *Val = SI->getValueOperand()->stripPointerCasts();
|
||||
const auto *Arg = dyn_cast<Argument>(Val);
|
||||
if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() ||
|
||||
Arg->getType()->isEmptyTy() ||
|
||||
DL.getTypeStoreSize(Arg->getType()) !=
|
||||
DL.getTypeAllocSize(AI->getAllocatedType()) ||
|
||||
ArgCopyElisionCandidates.count(Arg)) {
|
||||
*Info = StaticAllocaInfo::Clobbered;
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n');
|
||||
|
||||
// Mark this alloca and store for argument copy elision.
|
||||
*Info = StaticAllocaInfo::Elidable;
|
||||
ArgCopyElisionCandidates.insert({Arg, {AI, SI}});
|
||||
|
||||
// Stop scanning if we've seen all arguments. This will happen early in -O0
|
||||
// builds, which is useful, because -O0 builds have large entry blocks and
|
||||
// many allocas.
|
||||
if (ArgCopyElisionCandidates.size() == NumArgs)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to elide argument copies from memory into a local alloca. Succeeds if
|
||||
/// ArgVal is a load from a suitable fixed stack object.
|
||||
static void tryToElideArgumentCopy(
|
||||
FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains,
|
||||
DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
|
||||
SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
|
||||
ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
|
||||
SDValue ArgVal, bool &ArgHasUses) {
|
||||
// Check if this is a load from a fixed stack object.
|
||||
auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
|
||||
if (!LNode)
|
||||
return;
|
||||
auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
|
||||
if (!FINode)
|
||||
return;
|
||||
|
||||
// Check that the fixed stack object is the right size and alignment.
|
||||
// Look at the alignment that the user wrote on the alloca instead of looking
|
||||
// at the stack object.
|
||||
auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
|
||||
assert(ArgCopyIter != ArgCopyElisionCandidates.end());
|
||||
const AllocaInst *AI = ArgCopyIter->second.first;
|
||||
int FixedIndex = FINode->getIndex();
|
||||
int &AllocaIndex = FuncInfo->StaticAllocaMap[AI];
|
||||
int OldIndex = AllocaIndex;
|
||||
MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();
|
||||
if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
|
||||
DEBUG(dbgs() << " argument copy elision failed due to bad fixed stack "
|
||||
"object size\n");
|
||||
return;
|
||||
}
|
||||
unsigned RequiredAlignment = AI->getAlignment();
|
||||
if (!RequiredAlignment) {
|
||||
RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment(
|
||||
AI->getAllocatedType());
|
||||
}
|
||||
if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
|
||||
DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
|
||||
"greater than stack argument alignment ("
|
||||
<< RequiredAlignment << " vs "
|
||||
<< MFI.getObjectAlignment(FixedIndex) << ")\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// Perform the elision. Delete the old stack object and replace its only use
|
||||
// in the variable info map. Mark the stack object as mutable.
|
||||
DEBUG({
|
||||
dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
|
||||
<< " Replacing frame index " << OldIndex << " with " << FixedIndex
|
||||
<< '\n';
|
||||
});
|
||||
MFI.RemoveStackObject(OldIndex);
|
||||
MFI.setIsImmutableObjectIndex(FixedIndex, false);
|
||||
AllocaIndex = FixedIndex;
|
||||
ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
|
||||
Chains.push_back(ArgVal.getValue(1));
|
||||
|
||||
// Avoid emitting code for the store implementing the copy.
|
||||
const StoreInst *SI = ArgCopyIter->second.second;
|
||||
ElidedArgCopyInstrs.insert(SI);
|
||||
|
||||
// Check for uses of the argument again so that we can avoid exporting ArgVal
|
||||
// if it is't used by anything other than the store.
|
||||
for (const Value *U : Arg.users()) {
|
||||
if (U != SI) {
|
||||
ArgHasUses = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SelectionDAGISel::LowerArguments(const Function &F) {
|
||||
SelectionDAG &DAG = SDB->DAG;
|
||||
SDLoc dl = SDB->getCurSDLoc();
|
||||
|
@ -8050,6 +8217,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
|
|||
Ins.push_back(RetArg);
|
||||
}
|
||||
|
||||
// Look for stores of arguments to static allocas. Mark such arguments with a
|
||||
// flag to ask the target to give us the memory location of that argument if
|
||||
// available.
|
||||
ArgCopyElisionMapTy ArgCopyElisionCandidates;
|
||||
findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates);
|
||||
|
||||
// Set up the incoming argument description vector.
|
||||
unsigned Idx = 0;
|
||||
for (const Argument &Arg : F.args()) {
|
||||
|
@ -8127,6 +8300,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
|
|||
if (NeedsRegBlock)
|
||||
Flags.setInConsecutiveRegs();
|
||||
Flags.setOrigAlign(OriginalAlignment);
|
||||
if (ArgCopyElisionCandidates.count(&Arg))
|
||||
Flags.setCopyElisionCandidate();
|
||||
|
||||
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
|
||||
unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
|
||||
|
@ -8199,19 +8374,33 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
|
|||
++i;
|
||||
}
|
||||
|
||||
SmallVector<SDValue, 4> Chains;
|
||||
DenseMap<int, int> ArgCopyElisionFrameIndexMap;
|
||||
for (const Argument &Arg : F.args()) {
|
||||
++Idx;
|
||||
SmallVector<SDValue, 4> ArgValues;
|
||||
SmallVector<EVT, 4> ValueVTs;
|
||||
ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
|
||||
unsigned NumValues = ValueVTs.size();
|
||||
if (NumValues == 0)
|
||||
continue;
|
||||
|
||||
bool ArgHasUses = !Arg.use_empty();
|
||||
|
||||
// Elide the copying store if the target loaded this argument from a
|
||||
// suitable fixed stack object.
|
||||
if (Ins[i].Flags.isCopyElisionCandidate()) {
|
||||
tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
|
||||
ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
|
||||
InVals[i], ArgHasUses);
|
||||
}
|
||||
|
||||
// If this argument is unused then remember its value. It is used to generate
|
||||
// debugging information.
|
||||
bool isSwiftErrorArg =
|
||||
TLI->supportSwiftError() &&
|
||||
F.getAttributes().hasAttribute(Idx, Attribute::SwiftError);
|
||||
if (Arg.use_empty() && NumValues && !isSwiftErrorArg) {
|
||||
if (!ArgHasUses && !isSwiftErrorArg) {
|
||||
SDB->setUnusedArgValue(&Arg, InVals[i]);
|
||||
|
||||
// Also remember any frame index for use in FastISel.
|
||||
|
@ -8228,16 +8417,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
|
|||
// Even an apparant 'unused' swifterror argument needs to be returned. So
|
||||
// we do generate a copy for it that can be used on return from the
|
||||
// function.
|
||||
if (!Arg.use_empty() || isSwiftErrorArg) {
|
||||
if (ArgHasUses || isSwiftErrorArg) {
|
||||
Optional<ISD::NodeType> AssertOp;
|
||||
if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
|
||||
AssertOp = ISD::AssertSext;
|
||||
else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
|
||||
AssertOp = ISD::AssertZext;
|
||||
|
||||
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
|
||||
NumParts, PartVT, VT,
|
||||
nullptr, AssertOp));
|
||||
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
|
||||
PartVT, VT, nullptr, AssertOp));
|
||||
}
|
||||
|
||||
i += NumParts;
|
||||
|
@ -8291,8 +8479,26 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
|
|||
}
|
||||
}
|
||||
|
||||
if (!Chains.empty()) {
|
||||
Chains.push_back(NewRoot);
|
||||
NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
|
||||
}
|
||||
|
||||
DAG.setRoot(NewRoot);
|
||||
|
||||
assert(i == InVals.size() && "Argument register count mismatch!");
|
||||
|
||||
// If any argument copy elisions occurred and we have debug info, update the
|
||||
// stale frame indices used in the dbg.declare variable info table.
|
||||
MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
|
||||
if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) {
|
||||
for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
|
||||
auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
|
||||
if (I != ArgCopyElisionFrameIndexMap.end())
|
||||
VI.Slot = I->second;
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, if the target has anything special to do, allow it to do so.
|
||||
EmitFunctionEntryCode();
|
||||
}
|
||||
|
|
|
@ -713,8 +713,10 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
|
|||
bool &HadTailCall) {
|
||||
// Lower the instructions. If a call is emitted as a tail call, cease emitting
|
||||
// nodes for this block.
|
||||
for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
|
||||
SDB->visit(*I);
|
||||
for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
|
||||
if (!ElidedArgCopyInstrs.count(&*I))
|
||||
SDB->visit(*I);
|
||||
}
|
||||
|
||||
// Make sure the root of the DAG is up-to-date.
|
||||
CurDAG->setRoot(SDB->getControlRoot());
|
||||
|
@ -1564,7 +1566,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
|
|||
const Instruction *Inst = &*std::prev(BI);
|
||||
|
||||
// If we no longer require this instruction, skip it.
|
||||
if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
|
||||
if (isFoldedOrDeadInstruction(Inst, FuncInfo) ||
|
||||
ElidedArgCopyInstrs.count(Inst)) {
|
||||
--NumFastIselRemaining;
|
||||
continue;
|
||||
}
|
||||
|
@ -1694,6 +1697,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
|
|||
|
||||
FinishBasicBlock();
|
||||
FuncInfo->PHINodesToUpdate.clear();
|
||||
ElidedArgCopyInstrs.clear();
|
||||
}
|
||||
|
||||
propagateSwiftErrorVRegs(FuncInfo);
|
||||
|
|
|
@ -2691,6 +2691,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
|
|||
CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
|
||||
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
|
||||
EVT ValVT;
|
||||
MVT PtrVT = getPointerTy(DAG.getDataLayout());
|
||||
|
||||
// If value is passed by pointer we have address passed instead of the value
|
||||
// itself. No need to extend if the mask value and location share the same
|
||||
|
@ -2729,30 +2730,71 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
|
|||
if (CallConv == CallingConv::X86_INTR) {
|
||||
MFI.setObjectOffset(FI, Offset);
|
||||
}
|
||||
return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
|
||||
} else {
|
||||
int FI = MFI.CreateFixedObject(ValVT.getSizeInBits()/8,
|
||||
VA.getLocMemOffset(), isImmutable);
|
||||
|
||||
// Set SExt or ZExt flag.
|
||||
if (VA.getLocInfo() == CCValAssign::ZExt) {
|
||||
MFI.setObjectZExt(FI, true);
|
||||
} else if (VA.getLocInfo() == CCValAssign::SExt) {
|
||||
MFI.setObjectSExt(FI, true);
|
||||
}
|
||||
|
||||
// Adjust SP offset of interrupt parameter.
|
||||
if (CallConv == CallingConv::X86_INTR) {
|
||||
MFI.setObjectOffset(FI, Offset);
|
||||
}
|
||||
|
||||
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
|
||||
SDValue Val = DAG.getLoad(
|
||||
ValVT, dl, Chain, FIN,
|
||||
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
|
||||
return ExtendedInMem ?
|
||||
DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
|
||||
return DAG.getFrameIndex(FI, PtrVT);
|
||||
}
|
||||
|
||||
// This is an argument in memory. We might be able to perform copy elision.
|
||||
if (Flags.isCopyElisionCandidate()) {
|
||||
EVT ArgVT = Ins[i].ArgVT;
|
||||
SDValue PartAddr;
|
||||
if (Ins[i].PartOffset == 0) {
|
||||
// If this is a one-part value or the first part of a multi-part value,
|
||||
// create a stack object for the entire argument value type and return a
|
||||
// load from our portion of it. This assumes that if the first part of an
|
||||
// argument is in memory, the rest will also be in memory.
|
||||
int FI = MFI.CreateFixedObject(ArgVT.getSizeInBits() / 8,
|
||||
VA.getLocMemOffset(), /*Immutable=*/false);
|
||||
PartAddr = DAG.getFrameIndex(FI, PtrVT);
|
||||
return DAG.getLoad(
|
||||
ValVT, dl, Chain, PartAddr,
|
||||
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
|
||||
} else {
|
||||
// This is not the first piece of an argument in memory. See if there is
|
||||
// already a fixed stack object including this offset. If so, assume it
|
||||
// was created by the PartOffset == 0 branch above and create a load from
|
||||
// the appropriate offset into it.
|
||||
int64_t PartBegin = VA.getLocMemOffset();
|
||||
int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
|
||||
int FI = MFI.getObjectIndexBegin();
|
||||
for (; MFI.isFixedObjectIndex(FI); ++FI) {
|
||||
int64_t ObjBegin = MFI.getObjectOffset(FI);
|
||||
int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
|
||||
if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
|
||||
break;
|
||||
}
|
||||
if (MFI.isFixedObjectIndex(FI)) {
|
||||
SDValue Addr =
|
||||
DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
|
||||
DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
|
||||
return DAG.getLoad(
|
||||
ValVT, dl, Chain, Addr,
|
||||
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
|
||||
Ins[i].PartOffset));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
|
||||
VA.getLocMemOffset(), isImmutable);
|
||||
|
||||
// Set SExt or ZExt flag.
|
||||
if (VA.getLocInfo() == CCValAssign::ZExt) {
|
||||
MFI.setObjectZExt(FI, true);
|
||||
} else if (VA.getLocInfo() == CCValAssign::SExt) {
|
||||
MFI.setObjectSExt(FI, true);
|
||||
}
|
||||
|
||||
// Adjust SP offset of interrupt parameter.
|
||||
if (CallConv == CallingConv::X86_INTR) {
|
||||
MFI.setObjectOffset(FI, Offset);
|
||||
}
|
||||
|
||||
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
|
||||
SDValue Val = DAG.getLoad(
|
||||
ValVT, dl, Chain, FIN,
|
||||
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
|
||||
return ExtendedInMem ? DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)
|
||||
: Val;
|
||||
}
|
||||
|
||||
// FIXME: Get this from tablegen.
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
; rdar://13625505
|
||||
; Here we have 9 fixed integer arguments the 9th argument in on stack, the
|
||||
; varargs start right after at 8-byte alignment.
|
||||
define void @fn9(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp {
|
||||
define void @fn9(i32* %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp {
|
||||
; CHECK-LABEL: fn9:
|
||||
; 9th fixed argument
|
||||
; CHECK: ldr {{w[0-9]+}}, [sp, #64]
|
||||
|
@ -30,7 +30,6 @@ define void @fn9(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7,
|
|||
%a10 = alloca i32, align 4
|
||||
%a11 = alloca i32, align 4
|
||||
%a12 = alloca i32, align 4
|
||||
store i32 %a1, i32* %1, align 4
|
||||
store i32 %a2, i32* %2, align 4
|
||||
store i32 %a3, i32* %3, align 4
|
||||
store i32 %a4, i32* %4, align 4
|
||||
|
@ -39,6 +38,7 @@ define void @fn9(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7,
|
|||
store i32 %a7, i32* %7, align 4
|
||||
store i32 %a8, i32* %8, align 4
|
||||
store i32 %a9, i32* %9, align 4
|
||||
store i32 %a9, i32* %a1
|
||||
%10 = bitcast i8** %args to i8*
|
||||
call void @llvm.va_start(i8* %10)
|
||||
%11 = va_arg i8** %args, i32
|
||||
|
@ -93,7 +93,7 @@ define i32 @main() nounwind ssp {
|
|||
%10 = load i32, i32* %a10, align 4
|
||||
%11 = load i32, i32* %a11, align 4
|
||||
%12 = load i32, i32* %a12, align 4
|
||||
call void (i32, i32, i32, i32, i32, i32, i32, i32, i32, ...) @fn9(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12)
|
||||
call void (i32*, i32, i32, i32, i32, i32, i32, i32, i32, ...) @fn9(i32* %a1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12)
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
; RUN: llc -mtriple=armv7-linux < %s | FileCheck %s
|
||||
|
||||
declare arm_aapcscc void @addrof_i32(i32*)
|
||||
declare arm_aapcscc void @addrof_i64(i64*)
|
||||
|
||||
define arm_aapcscc void @simple(i32, i32, i32, i32, i32 %x) {
|
||||
entry:
|
||||
%x.addr = alloca i32
|
||||
store i32 %x, i32* %x.addr
|
||||
call void @addrof_i32(i32* %x.addr)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: simple:
|
||||
; CHECK: push {r11, lr}
|
||||
; CHECK: add r0, sp, #8
|
||||
; CHECK: bl addrof_i32
|
||||
; CHECK: pop {r11, pc}
|
||||
|
||||
|
||||
; We need to load %x before calling addrof_i32 now because it could mutate %x in
|
||||
; place.
|
||||
|
||||
define arm_aapcscc i32 @use_arg(i32, i32, i32, i32, i32 %x) {
|
||||
entry:
|
||||
%x.addr = alloca i32
|
||||
store i32 %x, i32* %x.addr
|
||||
call void @addrof_i32(i32* %x.addr)
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
; CHECK-LABEL: use_arg:
|
||||
; CHECK: push {[[csr:[^ ]*]], lr}
|
||||
; CHECK: ldr [[csr]], [sp, #8]
|
||||
; CHECK: add r0, sp, #8
|
||||
; CHECK: bl addrof_i32
|
||||
; CHECK: mov r0, [[csr]]
|
||||
; CHECK: pop {[[csr]], pc}
|
||||
|
||||
|
||||
define arm_aapcscc i64 @split_i64(i32, i32, i32, i32, i64 %x) {
|
||||
entry:
|
||||
%x.addr = alloca i64, align 4
|
||||
store i64 %x, i64* %x.addr, align 4
|
||||
call void @addrof_i64(i64* %x.addr)
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
; CHECK-LABEL: split_i64:
|
||||
; CHECK: push {r4, r5, r11, lr}
|
||||
; CHECK: sub sp, sp, #8
|
||||
; CHECK: ldr r4, [sp, #28]
|
||||
; CHECK: ldr r5, [sp, #24]
|
||||
; CHECK: mov r0, sp
|
||||
; CHECK: str r4, [sp, #4]
|
||||
; CHECK: str r5, [sp]
|
||||
; CHECK: bl addrof_i64
|
||||
; CHECK: mov r0, r5
|
||||
; CHECK: mov r1, r4
|
||||
; CHECK: add sp, sp, #8
|
||||
; CHECK: pop {r4, r5, r11, pc}
|
|
@ -236,8 +236,8 @@ entry:
|
|||
ret i32 %tmp
|
||||
|
||||
; CHECK-LABEL: va9:
|
||||
; CHECK: addiu $sp, $sp, -32
|
||||
; CHECK: lw $2, 52($sp)
|
||||
; CHECK: addiu $sp, $sp, -24
|
||||
; CHECK: lw $2, 44($sp)
|
||||
}
|
||||
|
||||
; double
|
||||
|
|
|
@ -8,9 +8,10 @@ target triple = "i386-apple-darwin10.0.0"
|
|||
@.str = internal constant [4 x i8] c"%p\0A\00" ; <[4 x i8]*> [#uses=1]
|
||||
@llvm.used = appending global [1 x i8*] [i8* bitcast (i8* (%struct.S*, i32, %struct.S*)* @_Z4test1SiS_ to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
|
||||
|
||||
; Verify that %esi gets spilled before the call.
|
||||
; Verify that %s1 gets spilled before the call.
|
||||
; CHECK: Z4test1SiS
|
||||
; CHECK: movl %esi,{{.*}}(%ebp)
|
||||
; CHECK: leal 8(%ebp), %[[reg:[^ ]*]]
|
||||
; CHECK: movl %[[reg]],{{.*}}(%ebp) ## 4-byte Spill
|
||||
; CHECK: calll __Z6throwsv
|
||||
|
||||
define i8* @_Z4test1SiS_(%struct.S* byval %s1, i32 %n, %struct.S* byval %s2) ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
|
||||
|
|
|
@ -0,0 +1,280 @@
|
|||
; RUN: llc -mtriple=i686-windows < %s | FileCheck %s
|
||||
|
||||
declare void @addrof_i32(i32*)
|
||||
declare void @addrof_i64(i64*)
|
||||
declare void @addrof_i128(i128*)
|
||||
declare void @addrof_i32_x3(i32*, i32*, i32*)
|
||||
|
||||
define void @simple(i32 %x) {
|
||||
entry:
|
||||
%x.addr = alloca i32
|
||||
store i32 %x, i32* %x.addr
|
||||
call void @addrof_i32(i32* %x.addr)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _simple:
|
||||
; CHECK: leal 4(%esp), %[[reg:[^ ]*]]
|
||||
; CHECK: pushl %[[reg]]
|
||||
; CHECK: calll _addrof_i32
|
||||
; CHECK: retl
|
||||
|
||||
|
||||
; We need to load %x before calling addrof_i32 now because it could mutate %x in
|
||||
; place.
|
||||
|
||||
define i32 @use_arg(i32 %x) {
|
||||
entry:
|
||||
%x.addr = alloca i32
|
||||
store i32 %x, i32* %x.addr
|
||||
call void @addrof_i32(i32* %x.addr)
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _use_arg:
|
||||
; CHECK: pushl %[[csr:[^ ]*]]
|
||||
; CHECK-DAG: movl 8(%esp), %[[csr]]
|
||||
; CHECK-DAG: leal 8(%esp), %[[reg:[^ ]*]]
|
||||
; CHECK: pushl %[[reg]]
|
||||
; CHECK: calll _addrof_i32
|
||||
; CHECK: movl %[[csr]], %eax
|
||||
; CHECK: popl %[[csr]]
|
||||
; CHECK: retl
|
||||
|
||||
|
||||
define i64 @split_i64(i64 %x) {
|
||||
entry:
|
||||
%x.addr = alloca i64, align 4
|
||||
store i64 %x, i64* %x.addr, align 4
|
||||
call void @addrof_i64(i64* %x.addr)
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _split_i64:
|
||||
; CHECK: pushl %ebp
|
||||
; CHECK: movl %esp, %ebp
|
||||
; CHECK: pushl %[[csr2:[^ ]*]]
|
||||
; CHECK: pushl %[[csr1:[^ ]*]]
|
||||
; CHECK: andl $-8, %esp
|
||||
; CHECK-DAG: movl 8(%ebp), %[[csr1]]
|
||||
; CHECK-DAG: movl 12(%ebp), %[[csr2]]
|
||||
; CHECK-DAG: leal 8(%ebp), %[[reg:[^ ]*]]
|
||||
; CHECK: pushl %[[reg]]
|
||||
; CHECK: calll _addrof_i64
|
||||
; CHECK-DAG: movl %[[csr1]], %eax
|
||||
; CHECK-DAG: movl %[[csr2]], %edx
|
||||
; CHECK: leal -8(%ebp), %esp
|
||||
; CHECK: popl %[[csr1]]
|
||||
; CHECK: popl %[[csr2]]
|
||||
; CHECK: popl %ebp
|
||||
; CHECK: retl
|
||||
|
||||
|
||||
; We can't copy elide when an i64 is split between registers and memory in a
|
||||
; fastcc function.
|
||||
|
||||
define fastcc i64 @fastcc_split_i64(i64* %p, i64 %x) {
|
||||
entry:
|
||||
%x.addr = alloca i64, align 4
|
||||
store i64 %x, i64* %x.addr, align 4
|
||||
call void @addrof_i64(i64* %x.addr)
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _fastcc_split_i64:
|
||||
; CHECK: pushl %ebp
|
||||
; CHECK: movl %esp, %ebp
|
||||
; CHECK-DAG: movl %edx, %[[r1:[^ ]*]]
|
||||
; CHECK-DAG: movl 8(%ebp), %[[r2:[^ ]*]]
|
||||
; CHECK-DAG: movl %[[r2]], 4(%esp)
|
||||
; CHECK-DAG: movl %[[r1]], (%esp)
|
||||
; CHECK: movl %esp, %[[reg:[^ ]*]]
|
||||
; CHECK: pushl %[[reg]]
|
||||
; CHECK: calll _addrof_i64
|
||||
; CHECK: popl %ebp
|
||||
; CHECK: retl
|
||||
|
||||
|
||||
; We can't copy elide when it would reduce the user requested alignment.
|
||||
|
||||
define void @high_alignment(i32 %x) {
|
||||
entry:
|
||||
%x.p = alloca i32, align 128
|
||||
store i32 %x, i32* %x.p
|
||||
call void @addrof_i32(i32* %x.p)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _high_alignment:
|
||||
; CHECK: andl $-128, %esp
|
||||
; CHECK: movl 8(%ebp), %[[reg:[^ ]*]]
|
||||
; CHECK: movl %[[reg]], (%esp)
|
||||
; CHECK: movl %esp, %[[reg:[^ ]*]]
|
||||
; CHECK: pushl %[[reg]]
|
||||
; CHECK: calll _addrof_i32
|
||||
; CHECK: retl
|
||||
|
||||
|
||||
; We can't copy elide when it would reduce the ABI required alignment.
|
||||
; FIXME: We should lower the ABI alignment of i64 on Windows, since MSVC
|
||||
; doesn't guarantee it.
|
||||
|
||||
define void @abi_alignment(i64 %x) {
|
||||
entry:
|
||||
%x.p = alloca i64
|
||||
store i64 %x, i64* %x.p
|
||||
call void @addrof_i64(i64* %x.p)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _abi_alignment:
|
||||
; CHECK: andl $-8, %esp
|
||||
; CHECK: movl 8(%ebp), %[[reg:[^ ]*]]
|
||||
; CHECK: movl %[[reg]], (%esp)
|
||||
; CHECK: movl %esp, %[[reg:[^ ]*]]
|
||||
; CHECK: pushl %[[reg]]
|
||||
; CHECK: calll _addrof_i64
|
||||
; CHECK: retl
|
||||
|
||||
|
||||
; The code we generate for this is unimportant. This is mostly a crash test.
|
||||
|
||||
define void @split_i128(i128* %sret, i128 %x) {
|
||||
entry:
|
||||
%x.addr = alloca i128
|
||||
store i128 %x, i128* %x.addr
|
||||
call void @addrof_i128(i128* %x.addr)
|
||||
store i128 %x, i128* %sret
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _split_i128:
|
||||
; CHECK: pushl %ebp
|
||||
; CHECK: calll _addrof_i128
|
||||
; CHECK: retl
|
||||
|
||||
|
||||
; Check that we load all of x, y, and z before the call.
|
||||
|
||||
define i32 @three_args(i32 %x, i32 %y, i32 %z) {
|
||||
entry:
|
||||
%z.addr = alloca i32, align 4
|
||||
%y.addr = alloca i32, align 4
|
||||
%x.addr = alloca i32, align 4
|
||||
store i32 %z, i32* %z.addr, align 4
|
||||
store i32 %y, i32* %y.addr, align 4
|
||||
store i32 %x, i32* %x.addr, align 4
|
||||
call void @addrof_i32_x3(i32* %x.addr, i32* %y.addr, i32* %z.addr)
|
||||
%s1 = add i32 %x, %y
|
||||
%sum = add i32 %s1, %z
|
||||
ret i32 %sum
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _three_args:
|
||||
; CHECK: pushl %[[csr:[^ ]*]]
|
||||
; CHECK-DAG: movl {{[0-9]+}}(%esp), %[[csr]]
|
||||
; CHECK-DAG: addl {{[0-9]+}}(%esp), %[[csr]]
|
||||
; CHECK-DAG: addl {{[0-9]+}}(%esp), %[[csr]]
|
||||
; CHECK-DAG: leal 8(%esp), %[[x:[^ ]*]]
|
||||
; CHECK-DAG: leal 12(%esp), %[[y:[^ ]*]]
|
||||
; CHECK-DAG: leal 16(%esp), %[[z:[^ ]*]]
|
||||
; CHECK: pushl %[[z]]
|
||||
; CHECK: pushl %[[y]]
|
||||
; CHECK: pushl %[[x]]
|
||||
; CHECK: calll _addrof_i32_x3
|
||||
; CHECK: movl %[[csr]], %eax
|
||||
; CHECK: popl %[[csr]]
|
||||
; CHECK: retl
|
||||
|
||||
|
||||
define void @two_args_same_alloca(i32 %x, i32 %y) {
|
||||
entry:
|
||||
%x.addr = alloca i32
|
||||
store i32 %x, i32* %x.addr
|
||||
store i32 %y, i32* %x.addr
|
||||
call void @addrof_i32(i32* %x.addr)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _two_args_same_alloca:
|
||||
; CHECK: movl 8(%esp), {{.*}}
|
||||
; CHECK: movl {{.*}}, 4(%esp)
|
||||
; CHECK: leal 4(%esp), %[[reg:[^ ]*]]
|
||||
; CHECK: pushl %[[reg]]
|
||||
; CHECK: calll _addrof_i32
|
||||
; CHECK: retl
|
||||
|
||||
|
||||
define void @avoid_byval(i32* byval %x) {
|
||||
entry:
|
||||
%x.p.p = alloca i32*
|
||||
store i32* %x, i32** %x.p.p
|
||||
call void @addrof_i32(i32* %x)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _avoid_byval:
|
||||
; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]]
|
||||
; CHECK: pushl %[[reg]]
|
||||
; CHECK: calll _addrof_i32
|
||||
; CHECK: retl
|
||||
|
||||
|
||||
define void @avoid_inalloca(i32* inalloca %x) {
|
||||
entry:
|
||||
%x.p.p = alloca i32*
|
||||
store i32* %x, i32** %x.p.p
|
||||
call void @addrof_i32(i32* %x)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _avoid_inalloca:
|
||||
; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]]
|
||||
; CHECK: pushl %[[reg]]
|
||||
; CHECK: calll _addrof_i32
|
||||
; CHECK: retl
|
||||
|
||||
|
||||
; Don't elide the copy when the alloca is escaped with a store.
|
||||
|
||||
define void @escape_with_store(i32 %x) {
|
||||
%x1 = alloca i32
|
||||
%x2 = alloca i32*
|
||||
store i32* %x1, i32** %x2
|
||||
%x3 = load i32*, i32** %x2
|
||||
store i32 0, i32* %x3
|
||||
store i32 %x, i32* %x1
|
||||
call void @addrof_i32(i32* %x1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _escape_with_store:
|
||||
; CHECK-DAG: movl {{.*}}(%esp), %[[reg:[^ ]*]]
|
||||
; CHECK-DAG: movl $0, [[offs:[0-9]*]](%esp)
|
||||
; CHECK: movl %[[reg]], [[offs]](%esp)
|
||||
; CHECK: calll _addrof_i32
|
||||
|
||||
|
||||
; This test case exposed issues with the use of TokenFactor.
|
||||
|
||||
define void @sret_and_elide(i32* sret %sret, i32 %v) {
|
||||
%v.p = alloca i32
|
||||
store i32 %v, i32* %v.p
|
||||
call void @addrof_i32(i32* %v.p)
|
||||
store i32 %v, i32* %sret
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _sret_and_elide:
|
||||
; CHECK: pushl
|
||||
; CHECK: pushl
|
||||
; CHECK: movl 12(%esp), %[[sret:[^ ]*]]
|
||||
; CHECK: movl 16(%esp), %[[v:[^ ]*]]
|
||||
; CHECK: leal 16(%esp), %[[reg:[^ ]*]]
|
||||
; CHECK: pushl %[[reg]]
|
||||
; CHECK: calll _addrof_i32
|
||||
; CHECK: movl %[[v]], (%[[sret]])
|
||||
; CHECK: movl %[[sret]], %eax
|
||||
; CHECK: popl
|
||||
; CHECK: popl
|
||||
; CHECK: retl
|
|
@ -1,31 +1,27 @@
|
|||
; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -optimize-regalloc -regalloc=basic -no-integrated-as | FileCheck %s
|
||||
; rdar://6992609
|
||||
|
||||
; CHECK: movl %ecx, 4([[ESP:%e..]])
|
||||
; CHECK: movl 4([[ESP]]), [[EDX:%e..]]
|
||||
; CHECK: movl [[EDX]], 4([[ESP]])
|
||||
target triple = "i386-apple-darwin9.0"
|
||||
@llvm.used = appending global [1 x i8*] [i8* bitcast (i64 (i64)* @_OSSwapInt64 to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
|
||||
|
||||
define i64 @_OSSwapInt64(i64 %_data) nounwind {
|
||||
entry:
|
||||
%retval = alloca i64 ; <i64*> [#uses=2]
|
||||
%_data.addr = alloca i64 ; <i64*> [#uses=4]
|
||||
store i64 %_data, i64* %_data.addr
|
||||
%tmp = load i64, i64* %_data.addr ; <i64> [#uses=1]
|
||||
%0 = call i64 asm "bswap %eax\0A\09bswap %edx\0A\09xchgl %eax, %edx", "=A,0,~{dirflag},~{fpsr},~{flags}"(i64 %tmp) nounwind ; <i64> [#uses=1]
|
||||
store i64 %0, i64* %_data.addr
|
||||
%tmp1 = load i64, i64* %_data.addr ; <i64> [#uses=1]
|
||||
store i64 %tmp1, i64* %retval
|
||||
%1 = load i64, i64* %retval ; <i64> [#uses=1]
|
||||
ret i64 %1
|
||||
%0 = call i64 asm "bswap %eax\0A\09bswap %edx\0A\09xchgl %eax, %%edx", "=A,0,~{dirflag},~{fpsr},~{flags}"(i64 %_data) nounwind
|
||||
ret i64 %0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: __OSSwapInt64:
|
||||
; CHECK-DAG: movl 8(%esp), %edx
|
||||
; CHECK-DAG: movl 4(%esp), %eax
|
||||
; CHECK: ## InlineAsm Start
|
||||
; CHECK: ## InlineAsm End
|
||||
; Everything is set up in EAX:EDX, return immediately.
|
||||
; CHECK-NEXT: retl
|
||||
|
||||
; The tied operands are not necessarily in the same order as the defs.
|
||||
; PR13742
|
||||
define i64 @swapped(i64 %x, i64 %y) nounwind {
|
||||
entry:
|
||||
%x0 = call { i64, i64 } asm "foo", "=r,=r,1,0,~{dirflag},~{fpsr},~{flags}"(i64 %x, i64 %y) nounwind
|
||||
%x1 = extractvalue { i64, i64 } %x0, 0
|
||||
ret i64 %x1
|
||||
%x0 = call { i64, i64 } asm "foo", "=r,=r,1,0,~{dirflag},~{fpsr},~{flags}"(i64 %x, i64 %y) nounwind
|
||||
%x1 = extractvalue { i64, i64 } %x0, 0
|
||||
ret i64 %x1
|
||||
}
|
||||
|
|
|
@ -30,14 +30,6 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
|
|||
; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm14, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm8, (%rsp)
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
|
||||
|
@ -46,14 +38,14 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
|
|||
; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm16 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm17 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm18 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm19 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm20 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm21 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm22 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm23 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp)
|
||||
|
@ -62,14 +54,14 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
|
|||
; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm8, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm14, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm16, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm17, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm18, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm19, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm20, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm21, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm22, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss %xmm23, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
||||
|
@ -104,11 +96,19 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
|
|||
; CHECK-NEXT: # implicit-def: %YMM3
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm3
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm3
|
||||
; CHECK-NEXT: # implicit-def: %ZMM16
|
||||
; CHECK-NEXT: vmovaps %zmm3, %zmm16
|
||||
; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm16, %zmm16
|
||||
; CHECK-NEXT: vmovaps %zmm16, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: # implicit-def: %ZMM24
|
||||
; CHECK-NEXT: vmovaps %zmm3, %zmm24
|
||||
; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm24, %zmm24
|
||||
; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0
|
||||
; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp) # 4-byte Spill
|
||||
; CHECK-NEXT: vmovss %xmm8, {{[0-9]+}}(%rsp) # 4-byte Spill
|
||||
; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp) # 4-byte Spill
|
||||
; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp) # 4-byte Spill
|
||||
; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp) # 4-byte Spill
|
||||
; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp) # 4-byte Spill
|
||||
; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp) # 4-byte Spill
|
||||
; CHECK-NEXT: vmovss %xmm14, (%rsp) # 4-byte Spill
|
||||
; CHECK-NEXT: movq %rbp, %rsp
|
||||
; CHECK-NEXT: popq %rbp
|
||||
; CHECK-NEXT: retq
|
||||
|
|
|
@ -1653,12 +1653,8 @@ define <4 x float> @test_mm_set1_ps(float %a0) nounwind {
|
|||
define void @test_mm_setcsr(i32 %a0) nounwind {
|
||||
; X32-LABEL: test_mm_setcsr:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: pushl %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl %esp, %ecx
|
||||
; X32-NEXT: movl %eax, (%esp)
|
||||
; X32-NEXT: ldmxcsr (%ecx)
|
||||
; X32-NEXT: popl %eax
|
||||
; X32-NEXT: leal 4(%esp), %eax
|
||||
; X32-NEXT: ldmxcsr (%eax)
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_setcsr:
|
||||
|
|
|
@ -59,4 +59,4 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
|
|||
|
||||
; CHECK: Address Line Column File ISA Discriminator Flags
|
||||
; CHECK: ------------------ ------ ------ ------ --- ------------- -------------
|
||||
; CHECK: 0x0000000000000011 2 0 1 0 42 {{$}}
|
||||
; CHECK: 0x000000000000000a 2 0 1 0 42 {{$}}
|
||||
|
|
Loading…
Reference in New Issue