forked from OSchip/llvm-project
ARM: HFAs must be passed in consecutive registers
When using the ARM AAPCS, HFAs (Homogeneous Floating-point Aggregates) must be passed in a block of consecutive floating-point registers, or on the stack. This means that unused floating-point registers cannot be back-filled with part of an HFA, however this can currently happen. This patch, along with the corresponding clang patch (http://reviews.llvm.org/D3083) prevents this. llvm-svn: 208413
This commit is contained in:
parent
d6a20e5115
commit
c24f2171ca
|
@ -112,6 +112,23 @@ public:
|
|||
return Ret;
|
||||
}
|
||||
|
||||
// There is no need to differentiate between a pending CCValAssign and other
|
||||
// kinds, as they are stored in a different list.
|
||||
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT,
|
||||
LocInfo HTP) {
|
||||
return getReg(ValNo, ValVT, 0, LocVT, HTP);
|
||||
}
|
||||
|
||||
void convertToReg(unsigned RegNo) {
|
||||
Loc = RegNo;
|
||||
isMem = false;
|
||||
}
|
||||
|
||||
void convertToMem(unsigned Offset) {
|
||||
Loc = Offset;
|
||||
isMem = true;
|
||||
}
|
||||
|
||||
unsigned getValNo() const { return ValNo; }
|
||||
MVT getValVT() const { return ValVT; }
|
||||
|
||||
|
@ -164,6 +181,7 @@ private:
|
|||
|
||||
unsigned StackOffset;
|
||||
SmallVector<uint32_t, 16> UsedRegs;
|
||||
SmallVector<CCValAssign, 4> PendingLocs;
|
||||
|
||||
// ByValInfo and SmallVector<ByValInfo, 4> ByValRegs:
|
||||
//
|
||||
|
@ -317,6 +335,31 @@ public:
|
|||
return Reg;
|
||||
}
|
||||
|
||||
/// AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive
|
||||
/// registers. If this is not possible, return zero. Otherwise, return the first
|
||||
/// register of the block that were allocated, marking the entire block as allocated.
|
||||
unsigned AllocateRegBlock(const uint16_t *Regs, unsigned NumRegs, unsigned RegsRequired) {
|
||||
for (unsigned StartIdx = 0; StartIdx <= NumRegs - RegsRequired; ++StartIdx) {
|
||||
bool BlockAvailable = true;
|
||||
// Check for already-allocated regs in this block
|
||||
for (unsigned BlockIdx = 0; BlockIdx < RegsRequired; ++BlockIdx) {
|
||||
if (isAllocated(Regs[StartIdx + BlockIdx])) {
|
||||
BlockAvailable = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (BlockAvailable) {
|
||||
// Mark the entire block as allocated
|
||||
for (unsigned BlockIdx = 0; BlockIdx < RegsRequired; ++BlockIdx) {
|
||||
MarkAllocated(Regs[StartIdx + BlockIdx]);
|
||||
}
|
||||
return Regs[StartIdx];
|
||||
}
|
||||
}
|
||||
// No block was available
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Version of AllocateReg with list of registers to be shadowed.
|
||||
unsigned AllocateReg(const MCPhysReg *Regs, const MCPhysReg *ShadowRegs,
|
||||
unsigned NumRegs) {
|
||||
|
@ -411,6 +454,11 @@ public:
|
|||
|
||||
ParmContext getCallOrPrologue() const { return CallOrPrologue; }
|
||||
|
||||
// Get list of pending assignments
|
||||
SmallVectorImpl<llvm::CCValAssign> &getPendingLocs() {
|
||||
return PendingLocs;
|
||||
}
|
||||
|
||||
private:
|
||||
/// MarkAllocated - Mark a register and all of its aliases as allocated.
|
||||
void MarkAllocated(unsigned Reg);
|
||||
|
|
|
@ -47,8 +47,12 @@ namespace ISD {
|
|||
static const uint64_t InAllocaOffs = 12;
|
||||
static const uint64_t OrigAlign = 0x1FULL<<27;
|
||||
static const uint64_t OrigAlignOffs = 27;
|
||||
static const uint64_t ByValSize = 0xffffffffULL<<32; ///< Struct size
|
||||
static const uint64_t ByValSize = 0x3fffffffULL<<32; ///< Struct size
|
||||
static const uint64_t ByValSizeOffs = 32;
|
||||
static const uint64_t InConsecutiveRegsLast = 0x1ULL<<62; ///< Struct size
|
||||
static const uint64_t InConsecutiveRegsLastOffs = 62;
|
||||
static const uint64_t InConsecutiveRegs = 0x1ULL<<63; ///< Struct size
|
||||
static const uint64_t InConsecutiveRegsOffs = 63;
|
||||
|
||||
static const uint64_t One = 1ULL; ///< 1 of this type, for shifts
|
||||
|
||||
|
@ -80,6 +84,12 @@ namespace ISD {
|
|||
bool isReturned() const { return Flags & Returned; }
|
||||
void setReturned() { Flags |= One << ReturnedOffs; }
|
||||
|
||||
bool isInConsecutiveRegs() const { return Flags & InConsecutiveRegs; }
|
||||
void setInConsecutiveRegs() { Flags |= One << InConsecutiveRegsOffs; }
|
||||
|
||||
bool isInConsecutiveRegsLast() const { return Flags & InConsecutiveRegsLast; }
|
||||
void setInConsecutiveRegsLast() { Flags |= One << InConsecutiveRegsLastOffs; }
|
||||
|
||||
unsigned getByValAlign() const {
|
||||
return (unsigned)
|
||||
((One << ((Flags & ByValAlign) >> ByValAlignOffs)) / 2);
|
||||
|
|
|
@ -42,6 +42,11 @@ class CCIf<string predicate, CCAction A> : CCPredicateAction<A> {
|
|||
class CCIfByVal<CCAction A> : CCIf<"ArgFlags.isByVal()", A> {
|
||||
}
|
||||
|
||||
/// CCIfConsecutiveRegs - If the current argument has InConsecutiveRegs
|
||||
/// parameter attribute, apply Action A.
|
||||
class CCIfConsecutiveRegs<CCAction A> : CCIf<"ArgFlags.isInConsecutiveRegs()", A> {
|
||||
}
|
||||
|
||||
/// CCIfCC - Match if the current calling convention is 'CC'.
|
||||
class CCIfCC<string CC, CCAction A>
|
||||
: CCIf<!strconcat("State.getCallingConv() == ", CC), A> {}
|
||||
|
|
|
@ -2233,6 +2233,15 @@ public:
|
|||
return VT.bitsLT(MinVT) ? MinVT : VT;
|
||||
}
|
||||
|
||||
/// For some targets, an LLVM struct type must be broken down into multiple
|
||||
/// simple types, but the calling convention specifies that the entire struct
|
||||
/// must be passed in a block of consecutive registers.
|
||||
virtual bool
|
||||
functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv,
|
||||
bool isVarArg) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Returns a 0 terminated array of registers that can be safely used as
|
||||
/// scratch registers.
|
||||
virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
|
||||
|
|
|
@ -7128,8 +7128,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
|
|||
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
|
||||
SmallVector<EVT, 4> ValueVTs;
|
||||
ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
|
||||
for (unsigned Value = 0, NumValues = ValueVTs.size();
|
||||
Value != NumValues; ++Value) {
|
||||
Type *FinalType = Args[i].Ty;
|
||||
if (Args[i].isByVal)
|
||||
FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
|
||||
bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
|
||||
FinalType, CLI.CallConv, CLI.IsVarArg);
|
||||
for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
|
||||
++Value) {
|
||||
EVT VT = ValueVTs[Value];
|
||||
Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
|
||||
SDValue Op = SDValue(Args[i].Node.getNode(),
|
||||
|
@ -7171,6 +7176,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
|
|||
}
|
||||
if (Args[i].isNest)
|
||||
Flags.setNest();
|
||||
if (NeedsRegBlock) {
|
||||
Flags.setInConsecutiveRegs();
|
||||
if (Value == NumValues - 1)
|
||||
Flags.setInConsecutiveRegsLast();
|
||||
}
|
||||
Flags.setOrigAlign(OriginalAlignment);
|
||||
|
||||
MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
|
||||
|
@ -7356,6 +7366,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
|
|||
ComputeValueVTs(*TLI, I->getType(), ValueVTs);
|
||||
bool isArgValueUsed = !I->use_empty();
|
||||
unsigned PartBase = 0;
|
||||
Type *FinalType = I->getType();
|
||||
if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
|
||||
FinalType = cast<PointerType>(FinalType)->getElementType();
|
||||
bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
|
||||
FinalType, F.getCallingConv(), F.isVarArg());
|
||||
for (unsigned Value = 0, NumValues = ValueVTs.size();
|
||||
Value != NumValues; ++Value) {
|
||||
EVT VT = ValueVTs[Value];
|
||||
|
@ -7397,6 +7412,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
|
|||
}
|
||||
if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
|
||||
Flags.setNest();
|
||||
if (NeedsRegBlock) {
|
||||
Flags.setInConsecutiveRegs();
|
||||
if (Value == NumValues - 1)
|
||||
Flags.setInConsecutiveRegsLast();
|
||||
}
|
||||
Flags.setOrigAlign(OriginalAlignment);
|
||||
|
||||
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
|
||||
|
|
|
@ -160,6 +160,96 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
|||
State);
|
||||
}
|
||||
|
||||
static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
|
||||
ARM::S4, ARM::S5, ARM::S6, ARM::S7,
|
||||
ARM::S8, ARM::S9, ARM::S10, ARM::S11,
|
||||
ARM::S12, ARM::S13, ARM::S14, ARM::S15 };
|
||||
static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
|
||||
ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
|
||||
static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
|
||||
|
||||
// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
|
||||
// has InConsecutiveRegs set, and that the last member also has
|
||||
// InConsecutiveRegsLast set. We must process all members of the HA before
|
||||
// we can allocate it, as we need to know the total number of registers that
|
||||
// will be needed in order to (attempt to) allocate a contiguous block.
|
||||
static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||
CCValAssign::LocInfo &LocInfo,
|
||||
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
|
||||
SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
|
||||
|
||||
// AAPCS HFAs must have 1-4 elements, all of the same type
|
||||
assert(PendingHAMembers.size() < 4);
|
||||
if (PendingHAMembers.size() > 0)
|
||||
assert(PendingHAMembers[0].getLocVT() == LocVT);
|
||||
|
||||
// Add the argument to the list to be allocated once we know the size of the
|
||||
// HA
|
||||
PendingHAMembers.push_back(
|
||||
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
|
||||
|
||||
if (ArgFlags.isInConsecutiveRegsLast()) {
|
||||
assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 &&
|
||||
"Homogeneous aggregates must have between 1 and 4 members");
|
||||
|
||||
// Try to allocate a contiguous block of registers, each of the correct
|
||||
// size to hold one member.
|
||||
const uint16_t *RegList;
|
||||
unsigned NumRegs;
|
||||
switch (LocVT.SimpleTy) {
|
||||
case MVT::f32:
|
||||
RegList = SRegList;
|
||||
NumRegs = 16;
|
||||
break;
|
||||
case MVT::f64:
|
||||
RegList = DRegList;
|
||||
NumRegs = 8;
|
||||
break;
|
||||
case MVT::v2f64:
|
||||
RegList = QRegList;
|
||||
NumRegs = 4;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unexpected member type for HA");
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned RegResult =
|
||||
State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size());
|
||||
|
||||
if (RegResult) {
|
||||
for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin();
|
||||
It != PendingHAMembers.end(); ++It) {
|
||||
It->convertToReg(RegResult);
|
||||
State.addLoc(*It);
|
||||
++RegResult;
|
||||
}
|
||||
PendingHAMembers.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
// Register allocation failed, fall back to the stack
|
||||
|
||||
// Mark all VFP regs as unavailable (AAPCS rule C.2.vfp)
|
||||
for (unsigned regNo = 0; regNo < 16; ++regNo)
|
||||
State.AllocateReg(SRegList[regNo]);
|
||||
|
||||
unsigned Size = LocVT.getSizeInBits() / 8;
|
||||
unsigned Align = LocVT.SimpleTy == MVT::v2f64 ? 8 : Size;
|
||||
|
||||
for (auto It : PendingHAMembers) {
|
||||
It.convertToMem(State.AllocateStack(Size, Align));
|
||||
State.addLoc(It);
|
||||
}
|
||||
|
||||
// All pending members have now been allocated
|
||||
PendingHAMembers.clear();
|
||||
}
|
||||
|
||||
// This will be allocated by the last member of the HA
|
||||
return true;
|
||||
}
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
||||
|
|
|
@ -174,6 +174,9 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
|
|||
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
|
||||
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
|
||||
|
||||
// HFAs are passed in a contiguous block of registers, or on the stack
|
||||
CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_HA">>,
|
||||
|
||||
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
|
||||
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
|
||||
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/MC/MCSectionMachO.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
|
@ -1211,40 +1212,58 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
|
|||
|
||||
#include "ARMGenCallingConv.inc"
|
||||
|
||||
/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
|
||||
/// given CallingConvention value.
|
||||
CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
|
||||
bool Return,
|
||||
bool isVarArg) const {
|
||||
/// getEffectiveCallingConv - Get the effective calling convention, taking into
|
||||
/// account presence of floating point hardware and calling convention
|
||||
/// limitations, such as support for variadic functions.
|
||||
CallingConv::ID
|
||||
ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
|
||||
bool isVarArg) const {
|
||||
switch (CC) {
|
||||
default:
|
||||
llvm_unreachable("Unsupported calling convention");
|
||||
case CallingConv::Fast:
|
||||
if (Subtarget->hasVFP2() && !isVarArg) {
|
||||
if (!Subtarget->isAAPCS_ABI())
|
||||
return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
|
||||
// For AAPCS ABI targets, just use VFP variant of the calling convention.
|
||||
return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
|
||||
}
|
||||
// Fallthrough
|
||||
case CallingConv::C: {
|
||||
// Use target triple & subtarget features to do actual dispatch.
|
||||
case CallingConv::ARM_AAPCS:
|
||||
case CallingConv::ARM_APCS:
|
||||
case CallingConv::GHC:
|
||||
return CC;
|
||||
case CallingConv::ARM_AAPCS_VFP:
|
||||
return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
|
||||
case CallingConv::C:
|
||||
if (!Subtarget->isAAPCS_ABI())
|
||||
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
|
||||
return CallingConv::ARM_APCS;
|
||||
else if (Subtarget->hasVFP2() &&
|
||||
getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
|
||||
!isVarArg)
|
||||
return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
|
||||
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
|
||||
return CallingConv::ARM_AAPCS_VFP;
|
||||
else
|
||||
return CallingConv::ARM_AAPCS;
|
||||
case CallingConv::Fast:
|
||||
if (!Subtarget->isAAPCS_ABI()) {
|
||||
if (Subtarget->hasVFP2() && !isVarArg)
|
||||
return CallingConv::Fast;
|
||||
return CallingConv::ARM_APCS;
|
||||
} else if (Subtarget->hasVFP2() && !isVarArg)
|
||||
return CallingConv::ARM_AAPCS_VFP;
|
||||
else
|
||||
return CallingConv::ARM_AAPCS;
|
||||
}
|
||||
case CallingConv::ARM_AAPCS_VFP:
|
||||
if (!isVarArg)
|
||||
return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
|
||||
// Fallthrough
|
||||
case CallingConv::ARM_AAPCS:
|
||||
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
|
||||
}
|
||||
|
||||
/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
|
||||
/// CallingConvention.
|
||||
CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
|
||||
bool Return,
|
||||
bool isVarArg) const {
|
||||
switch (getEffectiveCallingConv(CC, isVarArg)) {
|
||||
default:
|
||||
llvm_unreachable("Unsupported calling convention");
|
||||
case CallingConv::ARM_APCS:
|
||||
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
|
||||
case CallingConv::ARM_AAPCS:
|
||||
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
|
||||
case CallingConv::ARM_AAPCS_VFP:
|
||||
return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
|
||||
case CallingConv::Fast:
|
||||
return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
|
||||
case CallingConv::GHC:
|
||||
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
|
||||
}
|
||||
|
@ -10628,3 +10647,77 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
|
|||
Val, Strex->getFunctionType()->getParamType(0)),
|
||||
Addr);
|
||||
}
|
||||
|
||||
enum HABaseType {
|
||||
HA_UNKNOWN = 0,
|
||||
HA_FLOAT,
|
||||
HA_DOUBLE,
|
||||
HA_VECT64,
|
||||
HA_VECT128
|
||||
};
|
||||
|
||||
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
|
||||
uint64_t &Members) {
|
||||
if (const StructType *ST = dyn_cast<StructType>(Ty)) {
|
||||
for (unsigned i = 0; i < ST->getNumElements(); ++i) {
|
||||
uint64_t SubMembers = 0;
|
||||
if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
|
||||
return false;
|
||||
Members += SubMembers;
|
||||
}
|
||||
} else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
|
||||
uint64_t SubMembers = 0;
|
||||
if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
|
||||
return false;
|
||||
Members += SubMembers * AT->getNumElements();
|
||||
} else if (Ty->isFloatTy()) {
|
||||
if (Base != HA_UNKNOWN && Base != HA_FLOAT)
|
||||
return false;
|
||||
Members = 1;
|
||||
Base = HA_FLOAT;
|
||||
} else if (Ty->isDoubleTy()) {
|
||||
if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
|
||||
return false;
|
||||
Members = 1;
|
||||
Base = HA_DOUBLE;
|
||||
} else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) {
|
||||
Members = 1;
|
||||
switch (Base) {
|
||||
case HA_FLOAT:
|
||||
case HA_DOUBLE:
|
||||
return false;
|
||||
case HA_VECT64:
|
||||
return VT->getBitWidth() == 64;
|
||||
case HA_VECT128:
|
||||
return VT->getBitWidth() == 128;
|
||||
case HA_UNKNOWN:
|
||||
switch (VT->getBitWidth()) {
|
||||
case 64:
|
||||
Base = HA_VECT64;
|
||||
return true;
|
||||
case 128:
|
||||
Base = HA_VECT128;
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (Members > 0 && Members <= 4);
|
||||
}
|
||||
|
||||
/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate.
|
||||
bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
|
||||
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
|
||||
if (getEffectiveCallingConv(CallConv, isVarArg) ==
|
||||
CallingConv::ARM_AAPCS_VFP) {
|
||||
HABaseType Base = HA_UNKNOWN;
|
||||
uint64_t Members = 0;
|
||||
bool result = isHomogeneousAggregate(Ty, Base, Members);
|
||||
DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n");
|
||||
return result;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -384,6 +384,11 @@ namespace llvm {
|
|||
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
|
||||
Type *Ty) const override;
|
||||
|
||||
/// \brief Returns true if an argument of type Ty needs to be passed in a
|
||||
/// contiguous block of registers in calling convention CallConv.
|
||||
bool functionArgumentNeedsConsecutiveRegisters(
|
||||
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
|
||||
|
||||
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
|
||||
AtomicOrdering Ord) const override;
|
||||
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
|
||||
|
@ -424,6 +429,8 @@ namespace llvm {
|
|||
SDValue &Root, SelectionDAG &DAG,
|
||||
SDLoc dl) const;
|
||||
|
||||
CallingConv::ID getEffectiveCallingConv(CallingConv::ID CC,
|
||||
bool isVarArg) const;
|
||||
CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
|
||||
bool isVarArg) const;
|
||||
SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
|
||||
|
@ -577,7 +584,6 @@ namespace llvm {
|
|||
OtherModImm
|
||||
};
|
||||
|
||||
|
||||
namespace ARM {
|
||||
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
|
||||
const TargetLibraryInfo *libInfo);
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
; RUN: llc < %s -float-abi=hard -debug-only arm-isel 2>&1 | FileCheck %s
|
||||
; RUN: llc < %s -float-abi=soft -debug-only arm-isel 2>&1 | FileCheck %s --check-prefix=SOFT
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
target triple = "armv7-none--eabi"
|
||||
|
||||
; SOFT-NOT: isHA
|
||||
|
||||
; CHECK: isHA: 1 { float }
|
||||
define void @f0b({ float } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { float, float }
|
||||
define void @f1({ float, float } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { float, float, float }
|
||||
define void @f1b({ float, float, float } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { float, float, float, float }
|
||||
define void @f1c({ float, float, float, float } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 0 { float, float, float, float, float }
|
||||
define void @f2({ float, float, float, float, float } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { double }
|
||||
define void @f3({ double } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { double, double, double, double }
|
||||
define void @f4({ double, double, double, double } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 0 { double, double, double, double, double }
|
||||
define void @f5({ double, double, double, double, double } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 0 { i32, i32 }
|
||||
define void @f5b({ i32, i32 } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { [1 x float] }
|
||||
define void @f6({ [1 x float] } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { [4 x float] }
|
||||
define void @f7({ [4 x float] } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 0 { [5 x float] }
|
||||
define void @f8({ [5 x float] } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 [1 x float]
|
||||
define void @f6b([1 x float] %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 [4 x float]
|
||||
define void @f7b([4 x float] %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 0 [5 x float]
|
||||
define void @f8b([5 x float] %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { [2 x float], [2 x float] }
|
||||
define void @f9({ [2 x float], [2 x float] } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { [1 x float], [3 x float] }
|
||||
define void @f9b({ [1 x float], [3 x float] } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 0 { [3 x float], [3 x float] }
|
||||
define void @f10({ [3 x float], [3 x float] } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { <2 x float> }
|
||||
define void @f11({ <2 x float> } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 0 { <3 x float> }
|
||||
define void @f12({ <3 x float> } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { <4 x float> }
|
||||
define void @f13({ <4 x float> } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { <2 x float>, <2 x float> }
|
||||
define void @f15({ <2 x float>, <2 x float> } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 0 { <2 x float>, float }
|
||||
define void @f15b({ <2 x float>, float } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 0 { <2 x float>, [2 x float] }
|
||||
define void @f15c({ <2 x float>, [2 x float] } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 0 { <2 x float>, <4 x float> }
|
||||
define void @f16({ <2 x float>, <4 x float> } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { <2 x double> }
|
||||
define void @f17({ <2 x double> } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { <2 x i32> }
|
||||
define void @f18({ <2 x i32> } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { <2 x i64>, <4 x i32> }
|
||||
define void @f19({ <2 x i64>, <4 x i32> } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 1 { [4 x <4 x float>] }
|
||||
define void @f20({ [4 x <4 x float>] } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: isHA: 0 { [5 x <4 x float>] }
|
||||
define void @f21({ [5 x <4 x float>] } %a) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-NOT: isHA
|
||||
define void @f22({ float } %a, ...) {
|
||||
ret void
|
||||
}
|
||||
|
|
@ -0,0 +1,94 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
|
||||
target triple = "armv7-none--gnueabihf"
|
||||
|
||||
%struct.s = type { float, float }
|
||||
%union.t = type { [4 x float] }
|
||||
|
||||
; Equivalent C code:
|
||||
; struct s { float a; float b; };
|
||||
; float foo(float a, double b, struct s c) { return c.a; }
|
||||
; Argument allocation:
|
||||
; a -> s0
|
||||
; b -> d1
|
||||
; c -> s4, s5
|
||||
; s1 is unused
|
||||
; return in s0
|
||||
define float @test1(float %a, double %b, %struct.s %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test1
|
||||
; CHECK: vmov.f32 s0, s4
|
||||
; CHECK-NOT: vmov.f32 s0, s1
|
||||
|
||||
%result = extractvalue %struct.s %c, 0
|
||||
ret float %result
|
||||
}
|
||||
|
||||
; Equivalent C code:
|
||||
; union t { float a[4] };
|
||||
; float foo(float a, double b, union s c) { return c.a[0]; }
|
||||
; Argument allocation:
|
||||
; a -> s0
|
||||
; b -> d1
|
||||
; c -> s4..s7
|
||||
define float @test2(float %a, double %b, %union.t %c) #0 {
|
||||
entry:
|
||||
; CHECK-LABEL: test2
|
||||
; CHECK: vmov.f32 s0, s4
|
||||
; CHECK-NOT: vmov.f32 s0, s1
|
||||
|
||||
%result = extractvalue %union.t %c, 0, 0
|
||||
ret float %result
|
||||
}
|
||||
|
||||
; Equivalent C code:
|
||||
; struct s { float a; float b; };
|
||||
; float foo(float a, double b, struct s c, float d) { return d; }
|
||||
; Argument allocation:
|
||||
; a -> s0
|
||||
; b -> d1
|
||||
; c -> s4, s5
|
||||
; d -> s1
|
||||
; return in s0
|
||||
define float @test3(float %a, double %b, %struct.s %c, float %d) {
|
||||
entry:
|
||||
; CHECK-LABEL: test3
|
||||
; CHECK: vmov.f32 s0, s1
|
||||
; CHECK-NOT: vmov.f32 s0, s5
|
||||
|
||||
ret float %d
|
||||
}
|
||||
|
||||
; Equivalent C code:
|
||||
; struct s { float a; float b; };
|
||||
; float foo(struct s a, struct s b) { return b.b; }
|
||||
; Argument allocation:
|
||||
; a -> s0, s1
|
||||
; b -> s2, s3
|
||||
; return in s0
|
||||
define float @test4(%struct.s %a, %struct.s %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: test4
|
||||
; CHECK: vmov.f32 s0, s3
|
||||
|
||||
%result = extractvalue %struct.s %b, 1
|
||||
ret float %result
|
||||
}
|
||||
|
||||
; Equivalent C code:
|
||||
; struct s { float a; float b; };
|
||||
; float foo(struct s a, float b, struct s c) { return c.a; }
|
||||
; Argument allocation:
|
||||
; a -> s0, s1
|
||||
; b -> s2
|
||||
; c -> s3, s4
|
||||
; return in s0
|
||||
define float @test5(%struct.s %a, float %b, %struct.s %c) {
|
||||
entry:
|
||||
; CHECK-LABEL: test5
|
||||
; CHECK: vmov.f32 s0, s3
|
||||
|
||||
%result = extractvalue %struct.s %c, 0
|
||||
ret float %result
|
||||
}
|
Loading…
Reference in New Issue