diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index 69bfe707b7a6..c3fa3ccbe727 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -1152,6 +1152,15 @@ public: return false; } + /// Return true if a truncation from Ty1 to Ty2 is permitted when deciding + /// whether a call is in tail position. Typically this means that both results + /// would be assigned to the same register or stack slot, but it could mean + /// the target performs adequate checks of its own before proceeding with the + /// tail call. + virtual bool allowTruncateForTailCall(Type * /*Ty1*/, Type * /*Ty2*/) const { + return false; + } + virtual bool isTruncateFree(EVT /*VT1*/, EVT /*VT2*/) const { return false; } diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp index 87e3808eb879..ca08b5b6e0d5 100644 --- a/llvm/lib/CodeGen/Analysis.cpp +++ b/llvm/lib/CodeGen/Analysis.cpp @@ -208,155 +208,265 @@ static bool isNoopBitcast(Type *T1, Type *T2, TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2))); } -/// sameNoopInput - Return true if V1 == V2, else if either V1 or V2 is a noop -/// (i.e., lowers to no machine code), look through it (and any transitive noop -/// operands to it) and check if it has the same noop input value. This is -/// used to determine if a tail call can be formed. -static bool sameNoopInput(const Value *V1, const Value *V2, - SmallVectorImpl &Els1, - SmallVectorImpl &Els2, - const TargetLoweringBase &TLI) { - using std::swap; - bool swapParity = false; - bool equalEls = Els1 == Els2; +/// Look through operations that will be free to find the earliest source of +/// this value. +/// +/// @param ValLoc If V has aggegate type, we will be interested in a particular +/// scalar component. This records its address; the reverse of this list gives a +/// sequence of indices appropriate for an extractvalue to locate the important +/// value. This value is updated during the function and on exit will indicate +/// similar information for the Value returned. +/// +/// @param DataBits If this function looks through truncate instructions, this +/// will record the smallest size attained. +static const Value *getNoopInput(const Value *V, + SmallVectorImpl &ValLoc, + unsigned &DataBits, + const TargetLoweringBase &TLI) { while (true) { - if ((equalEls && V1 == V2) || isa(V1) || isa(V2)) { - if (swapParity) - // Revert to original Els1 and Els2 to avoid confusing recursive calls - swap(Els1, Els2); - return true; - } - // Try to look through V1; if V1 is not an instruction, it can't be looked // through. - const Instruction *I = dyn_cast(V1); + const Instruction *I = dyn_cast(V); + if (!I || I->getNumOperands() == 0) return V; const Value *NoopInput = 0; - if (I != 0 && I->getNumOperands() > 0) { - Value *Op = I->getOperand(0); - if (isa(I)) { - // Look through truly no-op truncates. - if (TLI.isTruncateFree(Op->getType(), I->getType())) - NoopInput = Op; - } else if (isa(I)) { - // Look through truly no-op bitcasts. - if (isNoopBitcast(Op->getType(), I->getType(), TLI)) - NoopInput = Op; - } else if (isa(I)) { - // Look through getelementptr - if (cast(I)->hasAllZeroIndices()) - NoopInput = Op; - } else if (isa(I)) { - // Look through inttoptr. - // Make sure this isn't a truncating or extending cast. We could - // support this eventually, but don't bother for now. - if (!isa(I->getType()) && - TLI.getPointerTy().getSizeInBits() == - cast(Op->getType())->getBitWidth()) - NoopInput = Op; - } else if (isa(I)) { - // Look through ptrtoint. - // Make sure this isn't a truncating or extending cast. We could - // support this eventually, but don't bother for now. - if (!isa(I->getType()) && - TLI.getPointerTy().getSizeInBits() == - cast(I->getType())->getBitWidth()) - NoopInput = Op; - } else if (isa(I)) { - // Look through call - for (User::const_op_iterator i = I->op_begin(), - // Skip Callee - e = I->op_end() - 1; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } - } - } else if (isa(I)) { - // Look through invoke - for (User::const_op_iterator i = I->op_begin(), - // Skip BB, BB, Callee - e = I->op_end() - 3; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } + + Value *Op = I->getOperand(0); + if (isa(I)) { + // Look through truly no-op bitcasts. + if (isNoopBitcast(Op->getType(), I->getType(), TLI)) + NoopInput = Op; + } else if (isa(I)) { + // Look through getelementptr + if (cast(I)->hasAllZeroIndices()) + NoopInput = Op; + } else if (isa(I)) { + // Look through inttoptr. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa(I->getType()) && + TLI.getPointerTy().getSizeInBits() == + cast(Op->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa(I)) { + // Look through ptrtoint. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa(I->getType()) && + TLI.getPointerTy().getSizeInBits() == + cast(I->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa(I) && + TLI.allowTruncateForTailCall(Op->getType(), I->getType())) { + DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits()); + NoopInput = Op; + } else if (isa(I)) { + // Look through call (skipping callee) + for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; } } + } else if (isa(I)) { + // Look through invoke (skipping BB, BB, Callee) + for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; + } + } + } else if (const InsertValueInst *IVI = dyn_cast(V)) { + // Value may come from either the aggregate or the scalar + ArrayRef InsertLoc = IVI->getIndices(); + if (std::equal(InsertLoc.rbegin(), InsertLoc.rend(), + ValLoc.rbegin())) { + // The type being inserted is a nested sub-type of the aggregate; we + // have to remove those initial indices to get the location we're + // interested in for the operand. + ValLoc.resize(ValLoc.size() - InsertLoc.size()); + NoopInput = IVI->getInsertedValueOperand(); + } else { + // The struct we're inserting into has the value we're interested in, no + // change of address. + NoopInput = Op; + } + } else if (const ExtractValueInst *EVI = dyn_cast(V)) { + // The part we're interested in will inevitably be some sub-section of the + // previous aggregate. Combine the two paths to obtain the true address of + // our element. + ArrayRef ExtractLoc = EVI->getIndices(); + std::copy(ExtractLoc.rbegin(), ExtractLoc.rend(), + std::back_inserter(ValLoc)); + NoopInput = Op; } + // Terminate if we couldn't find anything to look through. + if (!NoopInput) + return V; - if (NoopInput) { - V1 = NoopInput; - continue; - } - - // If we already swapped, avoid infinite loop - if (swapParity) - break; - - // Otherwise, swap V1<->V2, Els1<->Els2 - swap(V1, V2); - swap(Els1, Els2); - swapParity = !swapParity; + V = NoopInput; } - - for (unsigned n = 0; n < 2; ++n) { - if (isa(V1)) { - if (isa(V1->getType())) { - // Look through insertvalue - unsigned i, e; - for (i = 0, e = cast(V1->getType())->getNumElements(); - i != e; ++i) { - const Value *InScalar = FindInsertedValue(const_cast(V1), i); - if (InScalar == 0) - break; - Els1.push_back(i); - if (!sameNoopInput(InScalar, V2, Els1, Els2, TLI)) { - Els1.pop_back(); - break; - } - Els1.pop_back(); - } - if (i == e) { - if (swapParity) - swap(Els1, Els2); - return true; - } - } - } else if (!Els1.empty() && isa(V1)) { - const ExtractValueInst *EVI = cast(V1); - unsigned i = Els1.back(); - // If the scalar value being inserted is an extractvalue of the right - // index from the call, then everything is good. - if (isa(EVI->getOperand(0)->getType()) && - EVI->getNumIndices() == 1 && EVI->getIndices()[0] == i) { - // Look through extractvalue - Els1.pop_back(); - if (sameNoopInput(EVI->getOperand(0), V2, Els1, Els2, TLI)) { - Els1.push_back(i); - if (swapParity) - swap(Els1, Els2); - return true; - } - Els1.push_back(i); - } - } - - swap(V1, V2); - swap(Els1, Els2); - swapParity = !swapParity; - } - - if (swapParity) - swap(Els1, Els2); - return false; } +/// Return true if this scalar return value only has bits discarded on its path +/// from the "tail call" to the "ret". This includes the obvious noop +/// instructions handled by getNoopInput above as well as free truncations (or +/// extensions prior to the call). +static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, + SmallVectorImpl &RetIndices, + SmallVectorImpl &CallIndices, + const TargetLoweringBase &TLI) { + + // Trace the sub-value needed by the return value as far back up the graph as + // possible, in the hope that it will intersect with the value produced by the + // call. In the simple case with no "returned" attribute, the hope is actually + // that we end up back at the tail call instruction itself. + unsigned BitsRequired = UINT_MAX; + RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI); + + // If this slot in the value returned is undef, it doesn't matter what the + // call puts there, it'll be fine. + if (isa(RetVal)) + return true; + + // Now do a similar search up through the graph to find where the value + // actually returned by the "tail call" comes from. In the simple case without + // a "returned" attribute, the search will be blocked immediately and the loop + // a Noop. + unsigned BitsProvided = UINT_MAX; + CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI); + + // There's no hope if we can't actually trace them to (the same part of!) the + // same value. + if (CallVal != RetVal || CallIndices != RetIndices) + return false; + + // However, intervening truncates may have made the call non-tail. Make sure + // all the bits that are needed by the "ret" have been provided by the "tail + // call". FIXME: with sufficiently cunning bit-tracking, we could look through + // extensions too. + if (BitsProvided < BitsRequired) + return false; + + return true; +} + +/// For an aggregate type, determine whether a given index is within bounds or +/// not. +static bool indexReallyValid(CompositeType *T, unsigned Idx) { + if (ArrayType *AT = dyn_cast(T)) + return Idx < AT->getNumElements(); + + return Idx < cast(T)->getNumElements(); +} + +/// Move the given iterators to the next leaf type in depth first traversal. +/// +/// Performs a depth-first traversal of the type as specified by its arguments, +/// stopping at the next leaf node (which may be a legitimate scalar type or an +/// empty struct or array). +/// +/// @param SubTypes List of the partial components making up the type from +/// outermost to innermost non-empty aggregate. The element currently +/// represented is SubTypes.back()->getTypeAtIndex(Path.back() - 1). +/// +/// @param Path Set of extractvalue indices leading from the outermost type +/// (SubTypes[0]) to the leaf node currently represented. +/// +/// @returns true if a new type was found, false otherwise. Calling this +/// function again on a finished iterator will repeatedly return +/// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty +/// aggregate or a non-aggregate +static bool +advanceToNextLeafType(SmallVectorImpl &SubTypes, + SmallVectorImpl &Path) { + // First march back up the tree until we can successfully increment one of the + // coordinates in Path. + while (!Path.empty() && !indexReallyValid(SubTypes.back(), Path.back() + 1)) { + Path.pop_back(); + SubTypes.pop_back(); + } + + // If we reached the top, then the iterator is done. + if (Path.empty()) + return false; + + // We know there's *some* valid leaf now, so march back down the tree picking + // out the left-most element at each node. + ++Path.back(); + Type *DeeperType = SubTypes.back()->getTypeAtIndex(Path.back()); + while (DeeperType->isAggregateType()) { + CompositeType *CT = cast(DeeperType); + if (!indexReallyValid(CT, 0)) + return true; + + SubTypes.push_back(CT); + Path.push_back(0); + + DeeperType = CT->getTypeAtIndex(0U); + } + + return true; +} + +/// Find the first non-empty, scalar-like type in Next and setup the iterator +/// components. +/// +/// Assuming Next is an aggregate of some kind, this function will traverse the +/// tree from left to right (i.e. depth-first) looking for the first +/// non-aggregate type which will play a role in function return. +/// +/// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup +/// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first +/// i32 in that type. +static bool firstRealType(Type *Next, + SmallVectorImpl &SubTypes, + SmallVectorImpl &Path) { + // First initialise the iterator components to the first "leaf" node + // (i.e. node with no valid sub-type at any index, so {} does count as a leaf + // despite nominally being an aggregate). + while (Next->isAggregateType() && + indexReallyValid(cast(Next), 0)) { + SubTypes.push_back(cast(Next)); + Path.push_back(0); + Next = cast(Next)->getTypeAtIndex(0U); + } + + // If there's no Path now, Next was originally scalar already (or empty + // leaf). We're done. + if (Path.empty()) + return true; + + // Otherwise, use normal iteration to keep looking through the tree until we + // find a non-aggregate type. + while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()) { + if (!advanceToNextLeafType(SubTypes, Path)) + return false; + } + + return true; +} + +/// Set the iterator data-structures to the next non-empty, non-aggregate +/// subtype. +bool nextRealType(SmallVectorImpl &SubTypes, + SmallVectorImpl &Path) { + do { + if (!advanceToNextLeafType(SubTypes, Path)) + return false; + + assert(!Path.empty() && "found a leaf but didn't set the path?"); + } while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()); + + return true; +} + + /// Test if the given instruction is in a position to be optimized /// with a tail-call. This roughly means that it's in a block with /// a return and there's nothing that needs to be scheduled @@ -422,7 +532,50 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) return false; - // Otherwise, make sure the return value and I have the same value - SmallVector Els1, Els2; - return sameNoopInput(Ret->getOperand(0), I, Els1, Els2, TLI); + const Value *RetVal = Ret->getOperand(0), *CallVal = I; + SmallVector RetPath, CallPath; + SmallVector RetSubTypes, CallSubTypes; + + bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath); + bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath); + + // Nothing's actually returned, it doesn't matter what the callee put there + // it's a valid tail call. + if (RetEmpty) + return true; + + // Iterate pairwise through each of the value types making up the tail call + // and the corresponding return. For each one we want to know whether it's + // essentially going directly from the tail call to the ret, via operations + // that end up not generating any code. + // + // We allow a certain amount of covariance here. For example it's permitted + // for the tail call to define more bits than the ret actually cares about + // (e.g. via a truncate). + do { + if (CallEmpty) { + // We've exhausted the values produced by the tail call instruction, the + // rest are essentially undef. The type doesn't really matter, but we need + // *something*. + Type *SlotType = RetSubTypes.back()->getTypeAtIndex(RetPath.back()); + CallVal = UndefValue::get(SlotType); + } + + // The manipulations performed when we're looking through an insertvalue or + // an extractvalue would happen at the front of the RetPath list, so since + // we have to copy it anyway it's more efficient to create a reversed copy. + using std::copy; + SmallVector TmpRetPath, TmpCallPath; + copy(RetPath.rbegin(), RetPath.rend(), std::back_inserter(TmpRetPath)); + copy(CallPath.rbegin(), CallPath.rend(), std::back_inserter(TmpCallPath)); + + // Finally, we can check whether the value produced by the tail call at this + // index is compatible with the value we return. + if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, TLI)) + return false; + + CallEmpty = !nextRealType(CallSubTypes, CallPath); + } while(nextRealType(RetSubTypes, RetPath)); + + return true; } diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 6cb126f68bab..567facaed993 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1504,6 +1504,19 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32)); } +bool +HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { + // Assuming the caller does not have either a signext or zeroext modifier, and + // only one value is accepted, any reasonable truncation is allowed. + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) + return false; + + // FIXME: in principle up to 64-bit could be made safe, but it would be very + // fragile at the moment: any support for multiple value returns would be + // liable to disallow tail calls involving i64 -> iN truncation in many cases. + return Ty1->getPrimitiveSizeInBits() <= 32; +} + SDValue HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index e00f7878a085..4fe01071563d 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -95,6 +95,8 @@ namespace llvm { virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; virtual bool isTruncateFree(EVT VT1, EVT VT2) const; + virtual bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const; + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual const char *getTargetNodeName(unsigned Opcode) const; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 811334eafc2e..57e748b9c898 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -13321,6 +13321,20 @@ bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { return NumBits1 > NumBits2; } +bool X86TargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) + return false; + + if (!isTypeLegal(EVT::getEVT(Ty1))) + return false; + + assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); + + // Assuming the caller doesn't have a zeroext or signext return parameter, + // truncation all the way down to i1 is valid. + return true; +} + bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const { return isInt<32>(Imm); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index e09104a2c96d..487d82947d70 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -662,6 +662,8 @@ namespace llvm { virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; virtual bool isTruncateFree(EVT VT1, EVT VT2) const; + virtual bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const; + /// isZExtFree - Return true if any actual instruction that defines a /// value of type Ty1 implicit zero-extends the value to Ty2 in the result /// register. This does not necessarily include registers defined in diff --git a/llvm/test/CodeGen/Hexagon/tail-call-trunc.ll b/llvm/test/CodeGen/Hexagon/tail-call-trunc.ll new file mode 100644 index 000000000000..98214c7b1e97 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/tail-call-trunc.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +declare i32 @ret_i32() + +define i8 @test_i8() { +; CHECK-LABEL: test_i8: +; CHECK: jump ret_i32 + %res = tail call i32 @ret_i32() + %val = trunc i32 %res to i8 + ret i8 %val +} + +define i16 @test_i16() { +; CHECK-LABEL: test_i16: +; CHECK: jump ret_i32 + %res = tail call i32 @ret_i32() + %val = trunc i32 %res to i16 + ret i16 %val +} + +declare i64 @ret_i64() +define i32 @test_i32() { +; CHECK-LABEL: test_i32: +; CHECK: call ret_i64 + %res = tail call i64 @ret_i64() + %val = trunc i64 %res to i32 + ret i32 42 +} diff --git a/llvm/test/CodeGen/X86/returned-trunc-tail-calls.ll b/llvm/test/CodeGen/X86/returned-trunc-tail-calls.ll new file mode 100644 index 000000000000..10bd3b673be7 --- /dev/null +++ b/llvm/test/CodeGen/X86/returned-trunc-tail-calls.ll @@ -0,0 +1,97 @@ +; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s + +declare i32 @ret32(i32 returned) +declare i64 @ret64(i64 returned) + +define i64 @test1(i64 %val) { +; CHECK-LABEL: test1: +; CHECK-NOT: jmp +; CHECK: callq + %in = trunc i64 %val to i32 + tail call i32 @ret32(i32 returned %in) + ret i64 %val +} + +define i32 @test2(i64 %val) { +; CHECK-LABEL: test2: +; CHECK: jmp +; CHECK-NOT: callq + %in = trunc i64 %val to i32 + tail call i64 @ret64(i64 returned %val) + ret i32 %in +} + +define i32 @test3(i64 %in) { +; CHECK-LABEL: test3: +; CHECK: jmp +; CHECK-NOT: callq + %small = trunc i64 %in to i32 + tail call i32 @ret32(i32 returned %small) + ret i32 %small +} + +declare {i32, i8} @take_i32_i8({i32, i8} returned) +define { i8, i8 } @test_nocommon_value({i32, i32} %in) { +; CHECK-LABEL: test_nocommon_value +; CHECK: jmp + + %first = extractvalue {i32, i32} %in, 0 + %first.trunc = trunc i32 %first to i8 + + %second = extractvalue {i32, i32} %in, 1 + %second.trunc = trunc i32 %second to i8 + + %tmp = insertvalue {i32, i8} undef, i32 %first, 0 + %callval = insertvalue {i32, i8} %tmp, i8 %second.trunc, 1 + tail call {i32, i8} @take_i32_i8({i32, i8} returned %callval) + + %restmp = insertvalue {i8, i8} undef, i8 %first.trunc, 0 + %res = insertvalue {i8, i8} %restmp, i8 %second.trunc, 1 + ret {i8, i8} %res +} + +declare {i32, {i32, i32}} @give_i32_i32_i32() +define {{i32, i32}, i32} @test_structs_different_shape() { +; CHECK-LABEL: test_structs_different_shape +; CHECK: jmp + %val = tail call {i32, {i32, i32}} @give_i32_i32_i32() + + %first = extractvalue {i32, {i32, i32}} %val, 0 + %second = extractvalue {i32, {i32, i32}} %val, 1, 0 + %third = extractvalue {i32, {i32, i32}} %val, 1, 1 + + %restmp = insertvalue {{i32, i32}, i32} undef, i32 %first, 0, 0 + %reseventmper = insertvalue {{i32, i32}, i32} %restmp, i32 %second, 0, 1 + %res = insertvalue {{i32, i32}, i32} %reseventmper, i32 %third, 1 + + ret {{i32, i32}, i32} %res +} + +define i64 @test_undef_asymmetry() { +; CHECK: test_undef_asymmetry +; CHECK-NOT: jmp + tail call i64 @ret64(i64 returned undef) + ret i64 2 +} + +define {{}, {{}, i32, {}}, [1 x i32]} @evil_empty_aggregates() { +; CHECK-LABEL: evil_empty_aggregates +; CHECK: jmp + %agg = tail call {i32, {i32, i32}} @give_i32_i32_i32() + + %first = extractvalue {i32, {i32, i32}} %agg, 0 + %second = extractvalue {i32, {i32, i32}} %agg, 1, 0 + + %restmp = insertvalue {{}, {{}, i32, {}}, [1 x i32]} undef, i32 %first, 1, 1 + %res = insertvalue {{}, {{}, i32, {}}, [1 x i32]} %restmp, i32 %second, 2, 0 + ret {{}, {{}, i32, {}}, [1 x i32]} %res +} + +define i32 @structure_is_unimportant() { +; CHECK-LABEL: structure_is_unimportant +; CHECK: jmp + %val = tail call {i32, {i32, i32}} @give_i32_i32_i32() + + %res = extractvalue {i32, {i32, i32}} %val, 0 + ret i32 %res +} diff --git a/llvm/test/CodeGen/X86/tail-call-legality.ll b/llvm/test/CodeGen/X86/tail-call-legality.ll new file mode 100644 index 000000000000..119610430b18 --- /dev/null +++ b/llvm/test/CodeGen/X86/tail-call-legality.ll @@ -0,0 +1,32 @@ +; RUN: llc -march=x86 -o - < %s | FileCheck %s + +; This used to be classified as a tail call because of a mismatch in the +; arguments seen by Analysis.cpp and ISelLowering. As seen by ISelLowering, they +; both return {i32, i32, i32} (since i64 is illegal) which is fine for a tail +; call. + +; As seen by Analysis.cpp: i64 -> i32 is a valid trunc, second i32 passes +; straight through and the third is undef, also OK for a tail call. + +; Analysis.cpp was wrong. + +; FIXME: in principle we *could* support some tail calls involving truncations +; of illegal types: a single "trunc i64 %whatever to i32" is probably valid +; because of how the extra registers are laid out. + +declare {i64, i32} @test() + +define {i32, i32, i32} @test_pair_notail(i64 %in) { +; CHECK-LABEL: test_pair_notail +; CHECK-NOT: jmp + + %whole = tail call {i64, i32} @test() + %first = extractvalue {i64, i32} %whole, 0 + %first.trunc = trunc i64 %first to i32 + + %second = extractvalue {i64, i32} %whole, 1 + + %tmp = insertvalue {i32, i32, i32} undef, i32 %first.trunc, 0 + %res = insertvalue {i32, i32, i32} %tmp, i32 %second, 1 + ret {i32, i32, i32} %res +}