From 27fd307b83f79fa014ce2f2ab74061d249c8ddbe Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Mon, 28 Jan 2019 21:28:43 +0000 Subject: [PATCH] [ARM] Deduplicate table generated CC analysis code Create ARMCallingConv.cpp and emit code for calling convention analysis from there. llvm-svn: 352431 --- llvm/lib/Target/ARM/ARMCallingConv.cpp | 284 ++++++++++++++++++++++ llvm/lib/Target/ARM/ARMCallingConv.h | 299 +++--------------------- llvm/lib/Target/ARM/ARMCallingConv.td | 9 + llvm/lib/Target/ARM/ARMFastISel.cpp | 2 - llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 - llvm/lib/Target/ARM/CMakeLists.txt | 1 + 6 files changed, 323 insertions(+), 274 deletions(-) create mode 100644 llvm/lib/Target/ARM/ARMCallingConv.cpp diff --git a/llvm/lib/Target/ARM/ARMCallingConv.cpp b/llvm/lib/Target/ARM/ARMCallingConv.cpp new file mode 100644 index 000000000000..5ede7c67f7c2 --- /dev/null +++ b/llvm/lib/Target/ARM/ARMCallingConv.cpp @@ -0,0 +1,284 @@ +//=== ARMCallingConv.cpp - ARM Custom CC Routines ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the custom routines for the ARM Calling Convention that +// aren't done by tablegen, and includes the table generated implementations. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMCallingConv.h" +#include "ARMSubtarget.h" +#include "ARMRegisterInfo.h" +using namespace llvm; + +// APCS f64 is in register pairs, possibly split to stack +static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + CCState &State, bool CanFail) { + static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + + // Try to get the first register. + if (unsigned Reg = State.AllocateReg(RegList)) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + else { + // For the 2nd half of a v2f64, do not fail. + if (CanFail) + return false; + + // Put the whole thing on the stack. + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(8, 4), + LocVT, LocInfo)); + return true; + } + + // Try to get the second register. + if (unsigned Reg = State.AllocateReg(RegList)) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + else + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(4, 4), + LocVT, LocInfo)); + return true; +} + +static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) + return false; + if (LocVT == MVT::v2f64 && + !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) + return false; + return true; // we handled it +} + +// AAPCS f64 is in aligned register pairs +static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + CCState &State, bool CanFail) { + static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 }; + static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 }; + static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 }; + static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + + unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList); + if (Reg == 0) { + + // If we had R3 unallocated only, now we still must to waste it. + Reg = State.AllocateReg(GPRArgRegs); + assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64"); + + // For the 2nd half of a v2f64, do not just fail. + if (CanFail) + return false; + + // Put the whole thing on the stack. + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(8, 8), + LocVT, LocInfo)); + return true; + } + + unsigned i; + for (i = 0; i < 2; ++i) + if (HiRegList[i] == Reg) + break; + + unsigned T = State.AllocateReg(LoRegList[i]); + (void)T; + assert(T == LoRegList[i] && "Could not allocate register"); + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + LocVT, LocInfo)); + return true; +} + +static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) + return false; + if (LocVT == MVT::v2f64 && + !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) + return false; + return true; // we handled it +} + +static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, CCState &State) { + static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 }; + static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 }; + + unsigned Reg = State.AllocateReg(HiRegList, LoRegList); + if (Reg == 0) + return false; // we didn't handle it + + unsigned i; + for (i = 0; i < 2; ++i) + if (HiRegList[i] == Reg) + break; + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + LocVT, LocInfo)); + return true; +} + +static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) + return false; + if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) + return false; + return true; // we handled it +} + +static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + +static const MCPhysReg RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + +static const MCPhysReg SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, + ARM::S4, ARM::S5, ARM::S6, ARM::S7, + ARM::S8, ARM::S9, ARM::S10, ARM::S11, + ARM::S12, ARM::S13, ARM::S14, ARM::S15 }; +static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; +static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; + + +// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA +// has InConsecutiveRegs set, and that the last member also has +// InConsecutiveRegsLast set. We must process all members of the HA before +// we can allocate it, as we need to know the total number of registers that +// will be needed in order to (attempt to) allocate a contiguous block. +static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + SmallVectorImpl &PendingMembers = State.getPendingLocs(); + + // AAPCS HFAs must have 1-4 elements, all of the same type + if (PendingMembers.size() > 0) + assert(PendingMembers[0].getLocVT() == LocVT); + + // Add the argument to the list to be allocated once we know the size of the + // aggregate. Store the type's required alignmnent as extra info for later: in + // the [N x i64] case all trace has been removed by the time we actually get + // to do allocation. + PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo, + ArgFlags.getOrigAlign())); + + if (!ArgFlags.isInConsecutiveRegsLast()) + return true; + + // Try to allocate a contiguous block of registers, each of the correct + // size to hold one member. + auto &DL = State.getMachineFunction().getDataLayout(); + unsigned StackAlign = DL.getStackAlignment(); + unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign); + + ArrayRef RegList; + switch (LocVT.SimpleTy) { + case MVT::i32: { + RegList = RRegList; + unsigned RegIdx = State.getFirstUnallocated(RegList); + + // First consume all registers that would give an unaligned object. Whether + // we go on stack or in regs, no-one will be using them in future. + unsigned RegAlign = alignTo(Align, 4) / 4; + while (RegIdx % RegAlign != 0 && RegIdx < RegList.size()) + State.AllocateReg(RegList[RegIdx++]); + + break; + } + case MVT::f16: + case MVT::f32: + RegList = SRegList; + break; + case MVT::v4f16: + case MVT::f64: + RegList = DRegList; + break; + case MVT::v8f16: + case MVT::v2f64: + RegList = QRegList; + break; + default: + llvm_unreachable("Unexpected member type for block aggregate"); + break; + } + + unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size()); + if (RegResult) { + for (SmallVectorImpl::iterator It = PendingMembers.begin(); + It != PendingMembers.end(); ++It) { + It->convertToReg(RegResult); + State.addLoc(*It); + ++RegResult; + } + PendingMembers.clear(); + return true; + } + + // Register allocation failed, we'll be needing the stack + unsigned Size = LocVT.getSizeInBits() / 8; + if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) { + // If nothing else has used the stack until this point, a non-HFA aggregate + // can be split between regs and stack. + unsigned RegIdx = State.getFirstUnallocated(RegList); + for (auto &It : PendingMembers) { + if (RegIdx >= RegList.size()) + It.convertToMem(State.AllocateStack(Size, Size)); + else + It.convertToReg(State.AllocateReg(RegList[RegIdx++])); + + State.addLoc(It); + } + PendingMembers.clear(); + return true; + } else if (LocVT != MVT::i32) + RegList = SRegList; + + // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core) + for (auto Reg : RegList) + State.AllocateReg(Reg); + + // After the first item has been allocated, the rest are packed as tightly as + // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll + // be allocating a bunch of i32 slots). + unsigned RestAlign = std::min(Align, Size); + + for (auto &It : PendingMembers) { + It.convertToMem(State.AllocateStack(Size, Align)); + State.addLoc(It); + Align = RestAlign; + } + + // All pending members have now been allocated + PendingMembers.clear(); + + // This will be allocated by the last member of the aggregate + return true; +} + +// Include the table generated calling convention implementations. +#include "ARMGenCallingConv.inc" diff --git a/llvm/lib/Target/ARM/ARMCallingConv.h b/llvm/lib/Target/ARM/ARMCallingConv.h index acda1256d3f3..615634551d90 100644 --- a/llvm/lib/Target/ARM/ARMCallingConv.h +++ b/llvm/lib/Target/ARM/ARMCallingConv.h @@ -6,286 +6,45 @@ // //===----------------------------------------------------------------------===// // -// This file contains the custom routines for the ARM Calling Convention that -// aren't done by tablegen. +// This file declares the entry points for ARM calling convention analysis. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H #define LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H -#include "ARM.h" -#include "ARMBaseInstrInfo.h" -#include "ARMSubtarget.h" #include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/IR/CallingConv.h" namespace llvm { -// APCS f64 is in register pairs, possibly split to stack -static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - CCState &State, bool CanFail) { - static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; +bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State); +bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State); +bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State); +bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State); +bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State); +bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State); +bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State); +bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State); +bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State); - // Try to get the first register. - if (unsigned Reg = State.AllocateReg(RegList)) - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - else { - // For the 2nd half of a v2f64, do not fail. - if (CanFail) - return false; - - // Put the whole thing on the stack. - State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, - State.AllocateStack(8, 4), - LocVT, LocInfo)); - return true; - } - - // Try to get the second register. - if (unsigned Reg = State.AllocateReg(RegList)) - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - else - State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, - State.AllocateStack(4, 4), - LocVT, LocInfo)); - return true; -} - -static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) - return false; - if (LocVT == MVT::v2f64 && - !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) - return false; - return true; // we handled it -} - -// AAPCS f64 is in aligned register pairs -static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - CCState &State, bool CanFail) { - static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 }; - static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 }; - static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 }; - static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; - - unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList); - if (Reg == 0) { - - // If we had R3 unallocated only, now we still must to waste it. - Reg = State.AllocateReg(GPRArgRegs); - assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64"); - - // For the 2nd half of a v2f64, do not just fail. - if (CanFail) - return false; - - // Put the whole thing on the stack. - State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, - State.AllocateStack(8, 8), - LocVT, LocInfo)); - return true; - } - - unsigned i; - for (i = 0; i < 2; ++i) - if (HiRegList[i] == Reg) - break; - - unsigned T = State.AllocateReg(LoRegList[i]); - (void)T; - assert(T == LoRegList[i] && "Could not allocate register"); - - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], - LocVT, LocInfo)); - return true; -} - -static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) - return false; - if (LocVT == MVT::v2f64 && - !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) - return false; - return true; // we handled it -} - -static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, CCState &State) { - static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 }; - static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 }; - - unsigned Reg = State.AllocateReg(HiRegList, LoRegList); - if (Reg == 0) - return false; // we didn't handle it - - unsigned i; - for (i = 0; i < 2; ++i) - if (HiRegList[i] == Reg) - break; - - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], - LocVT, LocInfo)); - return true; -} - -static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) - return false; - if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) - return false; - return true; // we handled it -} - -static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, - State); -} - -static const MCPhysReg RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; - -static const MCPhysReg SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, - ARM::S4, ARM::S5, ARM::S6, ARM::S7, - ARM::S8, ARM::S9, ARM::S10, ARM::S11, - ARM::S12, ARM::S13, ARM::S14, ARM::S15 }; -static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; -static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; - - -// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA -// has InConsecutiveRegs set, and that the last member also has -// InConsecutiveRegsLast set. We must process all members of the HA before -// we can allocate it, as we need to know the total number of registers that -// will be needed in order to (attempt to) allocate a contiguous block. -static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - SmallVectorImpl &PendingMembers = State.getPendingLocs(); - - // AAPCS HFAs must have 1-4 elements, all of the same type - if (PendingMembers.size() > 0) - assert(PendingMembers[0].getLocVT() == LocVT); - - // Add the argument to the list to be allocated once we know the size of the - // aggregate. Store the type's required alignmnent as extra info for later: in - // the [N x i64] case all trace has been removed by the time we actually get - // to do allocation. - PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo, - ArgFlags.getOrigAlign())); - - if (!ArgFlags.isInConsecutiveRegsLast()) - return true; - - // Try to allocate a contiguous block of registers, each of the correct - // size to hold one member. - auto &DL = State.getMachineFunction().getDataLayout(); - unsigned StackAlign = DL.getStackAlignment(); - unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign); - - ArrayRef RegList; - switch (LocVT.SimpleTy) { - case MVT::i32: { - RegList = RRegList; - unsigned RegIdx = State.getFirstUnallocated(RegList); - - // First consume all registers that would give an unaligned object. Whether - // we go on stack or in regs, no-one will be using them in future. - unsigned RegAlign = alignTo(Align, 4) / 4; - while (RegIdx % RegAlign != 0 && RegIdx < RegList.size()) - State.AllocateReg(RegList[RegIdx++]); - - break; - } - case MVT::f16: - case MVT::f32: - RegList = SRegList; - break; - case MVT::v4f16: - case MVT::f64: - RegList = DRegList; - break; - case MVT::v8f16: - case MVT::v2f64: - RegList = QRegList; - break; - default: - llvm_unreachable("Unexpected member type for block aggregate"); - break; - } - - unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size()); - if (RegResult) { - for (SmallVectorImpl::iterator It = PendingMembers.begin(); - It != PendingMembers.end(); ++It) { - It->convertToReg(RegResult); - State.addLoc(*It); - ++RegResult; - } - PendingMembers.clear(); - return true; - } - - // Register allocation failed, we'll be needing the stack - unsigned Size = LocVT.getSizeInBits() / 8; - if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) { - // If nothing else has used the stack until this point, a non-HFA aggregate - // can be split between regs and stack. - unsigned RegIdx = State.getFirstUnallocated(RegList); - for (auto &It : PendingMembers) { - if (RegIdx >= RegList.size()) - It.convertToMem(State.AllocateStack(Size, Size)); - else - It.convertToReg(State.AllocateReg(RegList[RegIdx++])); - - State.addLoc(It); - } - PendingMembers.clear(); - return true; - } else if (LocVT != MVT::i32) - RegList = SRegList; - - // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core) - for (auto Reg : RegList) - State.AllocateReg(Reg); - - // After the first item has been allocated, the rest are packed as tightly as - // possible. (E.g. an incoming i64 would have starting Align of 8, but we'll - // be allocating a bunch of i32 slots). - unsigned RestAlign = std::min(Align, Size); - - for (auto &It : PendingMembers) { - It.convertToMem(State.AllocateStack(Size, Align)); - State.addLoc(It); - Align = RestAlign; - } - - // All pending members have now been allocated - PendingMembers.clear(); - - // This will be allocated by the last member of the aggregate - return true; -} - -} // End llvm namespace +} // namespace llvm #endif diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td index 1b8337fd6c80..ff5a5e28711c 100644 --- a/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/llvm/lib/Target/ARM/ARMCallingConv.td @@ -15,6 +15,7 @@ class CCIfAlign: //===----------------------------------------------------------------------===// // ARM APCS Calling Convention //===----------------------------------------------------------------------===// +let Entry = 1 in def CC_ARM_APCS : CallingConv<[ // Handles byval parameters. @@ -43,6 +44,7 @@ def CC_ARM_APCS : CallingConv<[ CCIfType<[v2f64], CCAssignToStack<16, 4>> ]>; +let Entry = 1 in def RetCC_ARM_APCS : CallingConv<[ CCIfType<[i1, i8, i16], CCPromoteToType>, CCIfType<[f32], CCBitConvertToType>, @@ -66,6 +68,7 @@ def RetCC_ARM_APCS : CallingConv<[ //===----------------------------------------------------------------------===// // ARM APCS Calling Convention for FastCC (when VFP2 or later is available) //===----------------------------------------------------------------------===// +let Entry = 1 in def FastCC_ARM_APCS : CallingConv<[ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, @@ -85,6 +88,7 @@ def FastCC_ARM_APCS : CallingConv<[ CCDelegateTo ]>; +let Entry = 1 in def RetFastCC_ARM_APCS : CallingConv<[ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, @@ -101,6 +105,7 @@ def RetFastCC_ARM_APCS : CallingConv<[ // ARM APCS Calling Convention for GHC //===----------------------------------------------------------------------===// +let Entry = 1 in def CC_ARM_APCS_GHC : CallingConv<[ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, @@ -151,6 +156,7 @@ def RetCC_ARM_AAPCS_Common : CallingConv<[ // ARM AAPCS (EABI) Calling Convention //===----------------------------------------------------------------------===// +let Entry = 1 in def CC_ARM_AAPCS : CallingConv<[ // Handles byval parameters. CCIfByVal>, @@ -173,6 +179,7 @@ def CC_ARM_AAPCS : CallingConv<[ CCDelegateTo ]>; +let Entry = 1 in def RetCC_ARM_AAPCS : CallingConv<[ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, @@ -195,6 +202,7 @@ def RetCC_ARM_AAPCS : CallingConv<[ // Also used for FastCC (when VFP2 or later is available) //===----------------------------------------------------------------------===// +let Entry = 1 in def CC_ARM_AAPCS_VFP : CallingConv<[ // Handles byval parameters. CCIfByVal>, @@ -219,6 +227,7 @@ def CC_ARM_AAPCS_VFP : CallingConv<[ CCDelegateTo ]>; +let Entry = 1 in def RetCC_ARM_AAPCS_VFP : CallingConv<[ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index 4a8341b64acd..cc1849fcf99e 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -244,8 +244,6 @@ class ARMFastISel final : public FastISel { } // end anonymous namespace -#include "ARMGenCallingConv.inc" - // DefinesOptionalPredicate - This is different from DefinesPredicate in that // we don't care about implicit defs here, just places we'll need to add a // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 469ceb9c2134..36434b9a438c 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1591,8 +1591,6 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, // Calling Convention Implementation //===----------------------------------------------------------------------===// -#include "ARMGenCallingConv.inc" - /// getEffectiveCallingConv - Get the effective calling convention, taking into /// account presence of floating point hardware and calling convention /// limitations, such as support for variadic functions. diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt index d635c0add577..2b198827fccb 100644 --- a/llvm/lib/Target/ARM/CMakeLists.txt +++ b/llvm/lib/Target/ARM/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_target(ARMCodeGen ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp + ARMCallingConv.cpp ARMCallLowering.cpp ARMCodeGenPrepare.cpp ARMConstantIslandPass.cpp