forked from OSchip/llvm-project
[X86] Vectorcall Calling Convention - Adding CodeGen Complete Support
The vectorcall calling convention specifies that arguments to functions are to be passed in registers, when possible. vectorcall uses more registers for arguments than fastcall or the default x64 calling convention use. The vectorcall calling convention is only supported in native code on x86 and x64 processors that include Streaming SIMD Extensions 2 (SSE2) and above. The current implementation does not handle Homogeneous Vector Aggregates (HVAs) correctly and this review attempts to fix it. This aubmit also includes additional lit tests to cover better HVAs corner cases. Differential Revision: https://reviews.llvm.org/D27392 llvm-svn: 290240
This commit is contained in:
parent
dcf5b72e20
commit
3b95157090
|
@ -296,6 +296,12 @@ public:
|
||||||
void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
|
void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||||
CCAssignFn Fn);
|
CCAssignFn Fn);
|
||||||
|
|
||||||
|
/// The function will invoke AnalyzeFormalArguments.
|
||||||
|
void AnalyzeArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||||
|
CCAssignFn Fn) {
|
||||||
|
AnalyzeFormalArguments(Ins, Fn);
|
||||||
|
}
|
||||||
|
|
||||||
/// AnalyzeReturn - Analyze the returned values of a return,
|
/// AnalyzeReturn - Analyze the returned values of a return,
|
||||||
/// incorporating info about the result values into this state.
|
/// incorporating info about the result values into this state.
|
||||||
void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
|
void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||||
|
@ -318,11 +324,22 @@ public:
|
||||||
SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
|
SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
|
||||||
CCAssignFn Fn);
|
CCAssignFn Fn);
|
||||||
|
|
||||||
|
/// The function will invoke AnalyzeCallOperands.
|
||||||
|
void AnalyzeArguments(const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||||
|
CCAssignFn Fn) {
|
||||||
|
AnalyzeCallOperands(Outs, Fn);
|
||||||
|
}
|
||||||
|
|
||||||
/// AnalyzeCallResult - Analyze the return values of a call,
|
/// AnalyzeCallResult - Analyze the return values of a call,
|
||||||
/// incorporating info about the passed values into this state.
|
/// incorporating info about the passed values into this state.
|
||||||
void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
|
void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||||
CCAssignFn Fn);
|
CCAssignFn Fn);
|
||||||
|
|
||||||
|
/// A shadow allocated register is a register that was allocated
|
||||||
|
/// but wasn't added to the location list (Locs).
|
||||||
|
/// \returns true if the register was allocated as shadow or false otherwise.
|
||||||
|
bool IsShadowAllocatedReg(unsigned Reg) const;
|
||||||
|
|
||||||
/// AnalyzeCallResult - Same as above except it's specialized for calls which
|
/// AnalyzeCallResult - Same as above except it's specialized for calls which
|
||||||
/// produce a single value.
|
/// produce a single value.
|
||||||
void AnalyzeCallResult(MVT VT, CCAssignFn Fn);
|
void AnalyzeCallResult(MVT VT, CCAssignFn Fn);
|
||||||
|
@ -521,6 +538,37 @@ public:
|
||||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||||
CCAssignFn CalleeFn, CCAssignFn CallerFn);
|
CCAssignFn CalleeFn, CCAssignFn CallerFn);
|
||||||
|
|
||||||
|
/// The function runs an additional analysis pass over function arguments.
|
||||||
|
/// It will mark each argument with the attribute flag SecArgPass.
|
||||||
|
/// After running, it will sort the locs list.
|
||||||
|
template <class T>
|
||||||
|
void AnalyzeArgumentsSecondPass(const SmallVectorImpl<T> &Args,
|
||||||
|
CCAssignFn Fn) {
|
||||||
|
unsigned NumFirstPassLocs = Locs.size();
|
||||||
|
|
||||||
|
/// Creates similar argument list to \p Args in which each argument is
|
||||||
|
/// marked using SecArgPass flag.
|
||||||
|
SmallVector<T, 16> SecPassArg;
|
||||||
|
// SmallVector<ISD::InputArg, 16> SecPassArg;
|
||||||
|
for (auto Arg : Args) {
|
||||||
|
Arg.Flags.setSecArgPass();
|
||||||
|
SecPassArg.push_back(Arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the second argument pass
|
||||||
|
AnalyzeArguments(SecPassArg, Fn);
|
||||||
|
|
||||||
|
// Sort the locations of the arguments according to their original position.
|
||||||
|
SmallVector<CCValAssign, 16> TmpArgLocs;
|
||||||
|
std::swap(TmpArgLocs, Locs);
|
||||||
|
auto B = TmpArgLocs.begin(), E = TmpArgLocs.end();
|
||||||
|
std::merge(B, B + NumFirstPassLocs, B + NumFirstPassLocs, E,
|
||||||
|
std::back_inserter(Locs),
|
||||||
|
[](const CCValAssign &A, const CCValAssign &B) -> bool {
|
||||||
|
return A.getValNo() < B.getValNo();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// MarkAllocated - Mark a register and all of its aliases as allocated.
|
/// MarkAllocated - Mark a register and all of its aliases as allocated.
|
||||||
void MarkAllocated(unsigned Reg);
|
void MarkAllocated(unsigned Reg);
|
||||||
|
|
|
@ -51,6 +51,15 @@ namespace ISD {
|
||||||
static const uint64_t SwiftSelfOffs = 14;
|
static const uint64_t SwiftSelfOffs = 14;
|
||||||
static const uint64_t SwiftError = 1ULL<<15; ///< Swift error parameter
|
static const uint64_t SwiftError = 1ULL<<15; ///< Swift error parameter
|
||||||
static const uint64_t SwiftErrorOffs = 15;
|
static const uint64_t SwiftErrorOffs = 15;
|
||||||
|
static const uint64_t Hva = 1ULL << 16; ///< HVA field for
|
||||||
|
///< vectorcall
|
||||||
|
static const uint64_t HvaOffs = 16;
|
||||||
|
static const uint64_t HvaStart = 1ULL << 17; ///< HVA structure start
|
||||||
|
///< for vectorcall
|
||||||
|
static const uint64_t HvaStartOffs = 17;
|
||||||
|
static const uint64_t SecArgPass = 1ULL << 18; ///< Second argument
|
||||||
|
///< pass for vectorcall
|
||||||
|
static const uint64_t SecArgPassOffs = 18;
|
||||||
static const uint64_t OrigAlign = 0x1FULL<<27;
|
static const uint64_t OrigAlign = 0x1FULL<<27;
|
||||||
static const uint64_t OrigAlignOffs = 27;
|
static const uint64_t OrigAlignOffs = 27;
|
||||||
static const uint64_t ByValSize = 0x3fffffffULL<<32; ///< Struct size
|
static const uint64_t ByValSize = 0x3fffffffULL<<32; ///< Struct size
|
||||||
|
@ -91,6 +100,15 @@ namespace ISD {
|
||||||
bool isSwiftError() const { return Flags & SwiftError; }
|
bool isSwiftError() const { return Flags & SwiftError; }
|
||||||
void setSwiftError() { Flags |= One << SwiftErrorOffs; }
|
void setSwiftError() { Flags |= One << SwiftErrorOffs; }
|
||||||
|
|
||||||
|
bool isHva() const { return Flags & Hva; }
|
||||||
|
void setHva() { Flags |= One << HvaOffs; }
|
||||||
|
|
||||||
|
bool isHvaStart() const { return Flags & HvaStart; }
|
||||||
|
void setHvaStart() { Flags |= One << HvaStartOffs; }
|
||||||
|
|
||||||
|
bool isSecArgPass() const { return Flags & SecArgPass; }
|
||||||
|
void setSecArgPass() { Flags |= One << SecArgPassOffs; }
|
||||||
|
|
||||||
bool isNest() const { return Flags & Nest; }
|
bool isNest() const { return Flags & Nest; }
|
||||||
void setNest() { Flags |= One << NestOffs; }
|
void setNest() { Flags |= One << NestOffs; }
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,8 @@
|
||||||
#include "llvm/Target/TargetLowering.h"
|
#include "llvm/Target/TargetLowering.h"
|
||||||
#include "llvm/Target/TargetRegisterInfo.h"
|
#include "llvm/Target/TargetRegisterInfo.h"
|
||||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
|
CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
|
||||||
|
@ -64,6 +66,22 @@ void CCState::MarkAllocated(unsigned Reg) {
|
||||||
UsedRegs[*AI/32] |= 1 << (*AI&31);
|
UsedRegs[*AI/32] |= 1 << (*AI&31);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CCState::IsShadowAllocatedReg(unsigned Reg) const {
|
||||||
|
if (!isAllocated(Reg))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (auto const &ValAssign : Locs) {
|
||||||
|
if (ValAssign.isRegLoc()) {
|
||||||
|
for (MCRegAliasIterator AI(ValAssign.getLocReg(), &TRI, true);
|
||||||
|
AI.isValid(); ++AI) {
|
||||||
|
if (*AI == Reg)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// Analyze an array of argument values,
|
/// Analyze an array of argument values,
|
||||||
/// incorporating info about the formals into this state.
|
/// incorporating info about the formals into this state.
|
||||||
void
|
void
|
||||||
|
|
|
@ -7732,8 +7732,19 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
|
||||||
Flags.setZExt();
|
Flags.setZExt();
|
||||||
if (Args[i].isSExt)
|
if (Args[i].isSExt)
|
||||||
Flags.setSExt();
|
Flags.setSExt();
|
||||||
if (Args[i].isInReg)
|
if (Args[i].isInReg) {
|
||||||
|
// If we are using vectorcall calling convention, a structure that is
|
||||||
|
// passed InReg - is surely an HVA
|
||||||
|
if (CLI.CallConv == CallingConv::X86_VectorCall &&
|
||||||
|
isa<StructType>(FinalType)) {
|
||||||
|
// The first value of a structure is marked
|
||||||
|
if (0 == Value)
|
||||||
|
Flags.setHvaStart();
|
||||||
|
Flags.setHva();
|
||||||
|
}
|
||||||
|
// Set InReg Flag
|
||||||
Flags.setInReg();
|
Flags.setInReg();
|
||||||
|
}
|
||||||
if (Args[i].isSRet)
|
if (Args[i].isSRet)
|
||||||
Flags.setSRet();
|
Flags.setSRet();
|
||||||
if (Args[i].isSwiftSelf)
|
if (Args[i].isSwiftSelf)
|
||||||
|
@ -8019,8 +8030,19 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
|
||||||
Flags.setZExt();
|
Flags.setZExt();
|
||||||
if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
|
if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
|
||||||
Flags.setSExt();
|
Flags.setSExt();
|
||||||
if (F.getAttributes().hasAttribute(Idx, Attribute::InReg))
|
if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) {
|
||||||
|
// If we are using vectorcall calling convention, a structure that is
|
||||||
|
// passed InReg - is surely an HVA
|
||||||
|
if (F.getCallingConv() == CallingConv::X86_VectorCall &&
|
||||||
|
isa<StructType>(I->getType())) {
|
||||||
|
// The first value of a structure is marked
|
||||||
|
if (0 == Value)
|
||||||
|
Flags.setHvaStart();
|
||||||
|
Flags.setHva();
|
||||||
|
}
|
||||||
|
// Set InReg Flag
|
||||||
Flags.setInReg();
|
Flags.setInReg();
|
||||||
|
}
|
||||||
if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
|
if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
|
||||||
Flags.setSRet();
|
Flags.setSRet();
|
||||||
if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf))
|
if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf))
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "MCTargetDesc/X86MCTargetDesc.h"
|
#include "MCTargetDesc/X86MCTargetDesc.h"
|
||||||
|
#include "X86Subtarget.h"
|
||||||
#include "llvm/CodeGen/CallingConvLower.h"
|
#include "llvm/CodeGen/CallingConvLower.h"
|
||||||
#include "llvm/IR/CallingConv.h"
|
#include "llvm/IR/CallingConv.h"
|
||||||
|
|
||||||
|
@ -39,14 +40,14 @@ bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||||
if (AvailableRegs.size() < RequiredGprsUponSplit)
|
if (AvailableRegs.size() < RequiredGprsUponSplit)
|
||||||
return false; // Not enough free registers - continue the search.
|
return false; // Not enough free registers - continue the search.
|
||||||
|
|
||||||
// Allocating the available registers
|
// Allocating the available registers.
|
||||||
for (unsigned I = 0; I < RequiredGprsUponSplit; I++) {
|
for (unsigned I = 0; I < RequiredGprsUponSplit; I++) {
|
||||||
|
|
||||||
// Marking the register as located
|
// Marking the register as located.
|
||||||
unsigned Reg = State.AllocateReg(AvailableRegs[I]);
|
unsigned Reg = State.AllocateReg(AvailableRegs[I]);
|
||||||
|
|
||||||
// Since we previously made sure that 2 registers are available
|
// Since we previously made sure that 2 registers are available
|
||||||
// we expect that a real register number will be returned
|
// we expect that a real register number will be returned.
|
||||||
assert(Reg && "Expecting a register will be available");
|
assert(Reg && "Expecting a register will be available");
|
||||||
|
|
||||||
// Assign the value to the allocated register
|
// Assign the value to the allocated register
|
||||||
|
@ -57,4 +58,151 @@ bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ArrayRef<MCPhysReg> CC_X86_VectorCallGetSSEs(const MVT &ValVT) {
|
||||||
|
if (ValVT.is512BitVector()) {
|
||||||
|
static const MCPhysReg RegListZMM[] = {X86::ZMM0, X86::ZMM1, X86::ZMM2,
|
||||||
|
X86::ZMM3, X86::ZMM4, X86::ZMM5};
|
||||||
|
return RegListZMM;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ValVT.is256BitVector()) {
|
||||||
|
static const MCPhysReg RegListYMM[] = {X86::YMM0, X86::YMM1, X86::YMM2,
|
||||||
|
X86::YMM3, X86::YMM4, X86::YMM5};
|
||||||
|
return RegListYMM;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const MCPhysReg RegListXMM[] = {X86::XMM0, X86::XMM1, X86::XMM2,
|
||||||
|
X86::XMM3, X86::XMM4, X86::XMM5};
|
||||||
|
return RegListXMM;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ArrayRef<MCPhysReg> CC_X86_64_VectorCallGetGPRs() {
|
||||||
|
static const MCPhysReg RegListGPR[] = {X86::RCX, X86::RDX, X86::R8, X86::R9};
|
||||||
|
return RegListGPR;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT,
|
||||||
|
MVT &LocVT,
|
||||||
|
CCValAssign::LocInfo &LocInfo,
|
||||||
|
ISD::ArgFlagsTy &ArgFlags,
|
||||||
|
CCState &State) {
|
||||||
|
|
||||||
|
ArrayRef<MCPhysReg> RegList = CC_X86_VectorCallGetSSEs(ValVT);
|
||||||
|
bool Is64bit = static_cast<const X86Subtarget &>(
|
||||||
|
State.getMachineFunction().getSubtarget())
|
||||||
|
.is64Bit();
|
||||||
|
|
||||||
|
for (auto Reg : RegList) {
|
||||||
|
// If the register is not marked as allocated - assign to it.
|
||||||
|
if (!State.isAllocated(Reg)) {
|
||||||
|
unsigned AssigedReg = State.AllocateReg(Reg);
|
||||||
|
assert(AssigedReg == Reg && "Expecting a valid register allocation");
|
||||||
|
State.addLoc(
|
||||||
|
CCValAssign::getReg(ValNo, ValVT, AssigedReg, LocVT, LocInfo));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// If the register is marked as shadow allocated - assign to it.
|
||||||
|
if (Is64bit && State.IsShadowAllocatedReg(Reg)) {
|
||||||
|
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm_unreachable("Clang should ensure that hva marked vectors will have "
|
||||||
|
"an available register.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||||
|
CCValAssign::LocInfo &LocInfo,
|
||||||
|
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
|
||||||
|
// On the second pass, go through the HVAs only.
|
||||||
|
if (ArgFlags.isSecArgPass()) {
|
||||||
|
if (ArgFlags.isHva())
|
||||||
|
return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo,
|
||||||
|
ArgFlags, State);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process only vector types as defined by vectorcall spec:
|
||||||
|
// "A vector type is either a floating-point type, for example,
|
||||||
|
// a float or double, or an SIMD vector type, for example, __m128 or __m256".
|
||||||
|
if (!(ValVT.isFloatingPoint() ||
|
||||||
|
(ValVT.isVector() && ValVT.getSizeInBits() >= 128))) {
|
||||||
|
// If R9 was already assigned it means that we are after the fourth element
|
||||||
|
// and because this is not an HVA / Vector type, we need to allocate
|
||||||
|
// shadow XMM register.
|
||||||
|
if (State.isAllocated(X86::R9)) {
|
||||||
|
// Assign shadow XMM register.
|
||||||
|
(void)State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT));
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ArgFlags.isHva() || ArgFlags.isHvaStart()) {
|
||||||
|
// Assign shadow GPR register.
|
||||||
|
(void)State.AllocateReg(CC_X86_64_VectorCallGetGPRs());
|
||||||
|
|
||||||
|
// Assign XMM register - (shadow for HVA and non-shadow for non HVA).
|
||||||
|
if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
|
||||||
|
// In Vectorcall Calling convention, additional shadow stack can be
|
||||||
|
// created on top of the basic 32 bytes of win64.
|
||||||
|
// It can happen if the fifth or sixth argument is vector type or HVA.
|
||||||
|
// At that case for each argument a shadow stack of 8 bytes is allocated.
|
||||||
|
if (Reg == X86::XMM4 || Reg == X86::XMM5)
|
||||||
|
State.AllocateStack(8, 8);
|
||||||
|
|
||||||
|
if (!ArgFlags.isHva()) {
|
||||||
|
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
|
||||||
|
return true; // Allocated a register - Stop the search.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this is an HVA - Stop the search,
|
||||||
|
// otherwise continue the search.
|
||||||
|
return ArgFlags.isHva();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||||
|
CCValAssign::LocInfo &LocInfo,
|
||||||
|
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
|
||||||
|
// On the second pass, go through the HVAs only.
|
||||||
|
if (ArgFlags.isSecArgPass()) {
|
||||||
|
if (ArgFlags.isHva())
|
||||||
|
return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo,
|
||||||
|
ArgFlags, State);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process only vector types as defined by vectorcall spec:
|
||||||
|
// "A vector type is either a floating point type, for example,
|
||||||
|
// a float or double, or an SIMD vector type, for example, __m128 or __m256".
|
||||||
|
if (!(ValVT.isFloatingPoint() ||
|
||||||
|
(ValVT.isVector() && ValVT.getSizeInBits() >= 128))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ArgFlags.isHva())
|
||||||
|
return true; // If this is an HVA - Stop the search.
|
||||||
|
|
||||||
|
// Assign XMM register.
|
||||||
|
if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
|
||||||
|
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// In case we did not find an available XMM register for a vector -
|
||||||
|
// pass it indirectly.
|
||||||
|
// It is similar to CCPassIndirect, with the addition of inreg.
|
||||||
|
if (!ValVT.isFloatingPoint()) {
|
||||||
|
LocVT = MVT::i32;
|
||||||
|
LocInfo = CCValAssign::Indirect;
|
||||||
|
ArgFlags.setInReg();
|
||||||
|
}
|
||||||
|
|
||||||
|
return false; // No register was assigned - Continue the search.
|
||||||
|
}
|
||||||
|
|
||||||
} // End llvm namespace
|
} // End llvm namespace
|
||||||
|
|
|
@ -24,22 +24,29 @@ namespace llvm {
|
||||||
/// When regcall calling convention compiled to 32 bit arch, special treatment
|
/// When regcall calling convention compiled to 32 bit arch, special treatment
|
||||||
/// is required for 64 bit masks.
|
/// is required for 64 bit masks.
|
||||||
/// The value should be assigned to two GPRs.
|
/// The value should be assigned to two GPRs.
|
||||||
/// @return true if registers were allocated and false otherwise
|
/// \return true if registers were allocated and false otherwise.
|
||||||
bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||||
CCValAssign::LocInfo &LocInfo,
|
CCValAssign::LocInfo &LocInfo,
|
||||||
ISD::ArgFlagsTy &ArgFlags, CCState &State);
|
ISD::ArgFlagsTy &ArgFlags, CCState &State);
|
||||||
|
|
||||||
inline bool CC_X86_32_VectorCallIndirect(unsigned &ValNo, MVT &ValVT,
|
/// Vectorcall calling convention has special handling for vector types or
|
||||||
MVT &LocVT,
|
/// HVA for 64 bit arch.
|
||||||
CCValAssign::LocInfo &LocInfo,
|
/// For HVAs shadow registers might be allocated on the first pass
|
||||||
ISD::ArgFlagsTy &ArgFlags,
|
/// and actual XMM registers are allocated on the second pass.
|
||||||
CCState &State) {
|
/// For vector types, actual XMM registers are allocated on the first pass.
|
||||||
// Similar to CCPassIndirect, with the addition of inreg.
|
/// \return true if registers were allocated and false otherwise.
|
||||||
LocVT = MVT::i32;
|
bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||||
LocInfo = CCValAssign::Indirect;
|
CCValAssign::LocInfo &LocInfo,
|
||||||
ArgFlags.setInReg();
|
ISD::ArgFlagsTy &ArgFlags, CCState &State);
|
||||||
return false; // Continue the search, but now for i32.
|
|
||||||
}
|
/// Vectorcall calling convention has special handling for vector types or
|
||||||
|
/// HVA for 32 bit arch.
|
||||||
|
/// For HVAs actual XMM registers are allocated on the second pass.
|
||||||
|
/// For vector types, actual XMM registers are allocated on the first pass.
|
||||||
|
/// \return true if registers were allocated and false otherwise.
|
||||||
|
bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||||
|
CCValAssign::LocInfo &LocInfo,
|
||||||
|
ISD::ArgFlagsTy &ArgFlags, CCState &State);
|
||||||
|
|
||||||
inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
|
inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
|
||||||
CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
|
CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
|
||||||
|
|
|
@ -308,20 +308,12 @@ def RetCC_X86_32_HiPE : CallingConv<[
|
||||||
CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>>
|
CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>>
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
// X86-32 HiPE return-value convention.
|
// X86-32 Vectorcall return-value convention.
|
||||||
def RetCC_X86_32_VectorCall : CallingConv<[
|
def RetCC_X86_32_VectorCall : CallingConv<[
|
||||||
// Vector types are returned in XMM0,XMM1,XMMM2 and XMM3.
|
// Floating Point types are returned in XMM0,XMM1,XMMM2 and XMM3.
|
||||||
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
CCIfType<[f32, f64, f128],
|
||||||
CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
|
CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
|
||||||
|
|
||||||
// 256-bit FP vectors
|
|
||||||
CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
|
|
||||||
CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
|
|
||||||
|
|
||||||
// 512-bit FP vectors
|
|
||||||
CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
|
|
||||||
CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
|
|
||||||
|
|
||||||
// Return integers in the standard way.
|
// Return integers in the standard way.
|
||||||
CCDelegateTo<RetCC_X86Common>
|
CCDelegateTo<RetCC_X86Common>
|
||||||
]>;
|
]>;
|
||||||
|
@ -350,6 +342,16 @@ def RetCC_X86_Win64_C : CallingConv<[
|
||||||
CCDelegateTo<RetCC_X86_64_C>
|
CCDelegateTo<RetCC_X86_64_C>
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
|
// X86-64 vectorcall return-value convention.
|
||||||
|
def RetCC_X86_64_Vectorcall : CallingConv<[
|
||||||
|
// Vectorcall calling convention always returns FP values in XMMs.
|
||||||
|
CCIfType<[f32, f64, f128],
|
||||||
|
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
|
||||||
|
|
||||||
|
// Otherwise, everything is the same as Windows X86-64 C CC.
|
||||||
|
CCDelegateTo<RetCC_X86_Win64_C>
|
||||||
|
]>;
|
||||||
|
|
||||||
// X86-64 HiPE return-value convention.
|
// X86-64 HiPE return-value convention.
|
||||||
def RetCC_X86_64_HiPE : CallingConv<[
|
def RetCC_X86_64_HiPE : CallingConv<[
|
||||||
// Promote all types to i64
|
// Promote all types to i64
|
||||||
|
@ -447,6 +449,9 @@ def RetCC_X86_64 : CallingConv<[
|
||||||
CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<RetCC_X86_Win64_C>>,
|
CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<RetCC_X86_Win64_C>>,
|
||||||
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<RetCC_X86_64_C>>,
|
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<RetCC_X86_64_C>>,
|
||||||
|
|
||||||
|
// Handle Vectorcall CC
|
||||||
|
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_64_Vectorcall>>,
|
||||||
|
|
||||||
// Handle HHVM calls.
|
// Handle HHVM calls.
|
||||||
CCIfCC<"CallingConv::HHVM", CCDelegateTo<RetCC_X86_64_HHVM>>,
|
CCIfCC<"CallingConv::HHVM", CCDelegateTo<RetCC_X86_64_HHVM>>,
|
||||||
|
|
||||||
|
@ -626,18 +631,7 @@ def CC_X86_Win64_C : CallingConv<[
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
def CC_X86_Win64_VectorCall : CallingConv<[
|
def CC_X86_Win64_VectorCall : CallingConv<[
|
||||||
// The first 6 floating point and vector types of 128 bits or less use
|
CCCustom<"CC_X86_64_VectorCall">,
|
||||||
// XMM0-XMM5.
|
|
||||||
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
|
||||||
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
|
|
||||||
|
|
||||||
// 256-bit vectors use YMM registers.
|
|
||||||
CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
|
|
||||||
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
|
|
||||||
|
|
||||||
// 512-bit vectors use ZMM registers.
|
|
||||||
CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
|
|
||||||
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
|
|
||||||
|
|
||||||
// Delegate to fastcall to handle integer types.
|
// Delegate to fastcall to handle integer types.
|
||||||
CCDelegateTo<CC_X86_Win64_C>
|
CCDelegateTo<CC_X86_Win64_C>
|
||||||
|
@ -847,25 +841,9 @@ def CC_X86_32_FastCall : CallingConv<[
|
||||||
CCDelegateTo<CC_X86_32_Common>
|
CCDelegateTo<CC_X86_32_Common>
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
def CC_X86_32_VectorCall : CallingConv<[
|
def CC_X86_Win32_VectorCall : CallingConv<[
|
||||||
// The first 6 floating point and vector types of 128 bits or less use
|
// Pass floating point in XMMs
|
||||||
// XMM0-XMM5.
|
CCCustom<"CC_X86_32_VectorCall">,
|
||||||
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
|
||||||
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
|
|
||||||
|
|
||||||
// 256-bit vectors use YMM registers.
|
|
||||||
CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
|
|
||||||
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
|
|
||||||
|
|
||||||
// 512-bit vectors use ZMM registers.
|
|
||||||
CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
|
|
||||||
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
|
|
||||||
|
|
||||||
// Otherwise, pass it indirectly.
|
|
||||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64,
|
|
||||||
v32i8, v16i16, v8i32, v4i64, v8f32, v4f64,
|
|
||||||
v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
|
|
||||||
CCCustom<"CC_X86_32_VectorCallIndirect">>,
|
|
||||||
|
|
||||||
// Delegate to fastcall to handle integer types.
|
// Delegate to fastcall to handle integer types.
|
||||||
CCDelegateTo<CC_X86_32_FastCall>
|
CCDelegateTo<CC_X86_32_FastCall>
|
||||||
|
@ -999,7 +977,7 @@ def CC_X86_32 : CallingConv<[
|
||||||
CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_32_Intr>>,
|
CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_32_Intr>>,
|
||||||
CCIfSubtarget<"isTargetMCU()", CCDelegateTo<CC_X86_32_MCU>>,
|
CCIfSubtarget<"isTargetMCU()", CCDelegateTo<CC_X86_32_MCU>>,
|
||||||
CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
|
CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
|
||||||
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_32_VectorCall>>,
|
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win32_VectorCall>>,
|
||||||
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
|
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
|
||||||
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
|
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
|
||||||
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
|
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include "X86CallingConv.h"
|
#include "X86CallingConv.h"
|
||||||
#include "X86FrameLowering.h"
|
#include "X86FrameLowering.h"
|
||||||
#include "X86InstrBuilder.h"
|
#include "X86InstrBuilder.h"
|
||||||
|
#include "X86IntrinsicsInfo.h"
|
||||||
#include "X86MachineFunctionInfo.h"
|
#include "X86MachineFunctionInfo.h"
|
||||||
#include "X86ShuffleDecodeConstantPool.h"
|
#include "X86ShuffleDecodeConstantPool.h"
|
||||||
#include "X86TargetMachine.h"
|
#include "X86TargetMachine.h"
|
||||||
|
@ -53,10 +54,10 @@
|
||||||
#include "llvm/Support/ErrorHandling.h"
|
#include "llvm/Support/ErrorHandling.h"
|
||||||
#include "llvm/Support/MathExtras.h"
|
#include "llvm/Support/MathExtras.h"
|
||||||
#include "llvm/Target/TargetOptions.h"
|
#include "llvm/Target/TargetOptions.h"
|
||||||
#include "X86IntrinsicsInfo.h"
|
#include <algorithm>
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
#include <numeric>
|
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
|
#include <numeric>
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
#define DEBUG_TYPE "x86-isel"
|
#define DEBUG_TYPE "x86-isel"
|
||||||
|
@ -2781,6 +2782,13 @@ static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
|
||||||
return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
|
return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool isSortedByValueNo(const SmallVectorImpl<CCValAssign> &ArgLocs) {
|
||||||
|
return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
|
||||||
|
[](const CCValAssign &A, const CCValAssign &B) -> bool {
|
||||||
|
return A.getValNo() < B.getValNo();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
SDValue X86TargetLowering::LowerFormalArguments(
|
SDValue X86TargetLowering::LowerFormalArguments(
|
||||||
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
|
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
|
||||||
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
|
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
|
||||||
|
@ -2815,11 +2823,22 @@ SDValue X86TargetLowering::LowerFormalArguments(
|
||||||
SmallVector<CCValAssign, 16> ArgLocs;
|
SmallVector<CCValAssign, 16> ArgLocs;
|
||||||
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
|
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
|
||||||
|
|
||||||
// Allocate shadow area for Win64
|
// Allocate shadow area for Win64.
|
||||||
if (IsWin64)
|
if (IsWin64)
|
||||||
CCInfo.AllocateStack(32, 8);
|
CCInfo.AllocateStack(32, 8);
|
||||||
|
|
||||||
CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
|
CCInfo.AnalyzeArguments(Ins, CC_X86);
|
||||||
|
|
||||||
|
// In vectorcall calling convention a second pass is required for the HVA
|
||||||
|
// types.
|
||||||
|
if (CallingConv::X86_VectorCall == CallConv) {
|
||||||
|
CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The next loop assumes that the locations are in the same order of the
|
||||||
|
// input arguments.
|
||||||
|
assert(isSortedByValueNo(ArgLocs) &&
|
||||||
|
"Argument Location list must be sorted before lowering");
|
||||||
|
|
||||||
SDValue ArgValue;
|
SDValue ArgValue;
|
||||||
for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
|
for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
|
||||||
|
@ -3263,11 +3282,17 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||||
SmallVector<CCValAssign, 16> ArgLocs;
|
SmallVector<CCValAssign, 16> ArgLocs;
|
||||||
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
|
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
|
||||||
|
|
||||||
// Allocate shadow area for Win64
|
// Allocate shadow area for Win64.
|
||||||
if (IsWin64)
|
if (IsWin64)
|
||||||
CCInfo.AllocateStack(32, 8);
|
CCInfo.AllocateStack(32, 8);
|
||||||
|
|
||||||
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
|
CCInfo.AnalyzeArguments(Outs, CC_X86);
|
||||||
|
|
||||||
|
// In vectorcall calling convention a second pass is required for the HVA
|
||||||
|
// types.
|
||||||
|
if (CallingConv::X86_VectorCall == CallConv) {
|
||||||
|
CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
|
||||||
|
}
|
||||||
|
|
||||||
// Get a count of how many bytes are to be pushed on the stack.
|
// Get a count of how many bytes are to be pushed on the stack.
|
||||||
unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
|
unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
|
||||||
|
@ -3322,6 +3347,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||||
SmallVector<SDValue, 8> MemOpChains;
|
SmallVector<SDValue, 8> MemOpChains;
|
||||||
SDValue StackPtr;
|
SDValue StackPtr;
|
||||||
|
|
||||||
|
// The next loop assumes that the locations are in the same order of the
|
||||||
|
// input arguments.
|
||||||
|
assert(isSortedByValueNo(ArgLocs) &&
|
||||||
|
"Argument Location list must be sorted before lowering");
|
||||||
|
|
||||||
// Walk the register/memloc assignments, inserting copies/loads. In the case
|
// Walk the register/memloc assignments, inserting copies/loads. In the case
|
||||||
// of tail call optimization arguments are handle later.
|
// of tail call optimization arguments are handle later.
|
||||||
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
|
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
|
||||||
|
|
|
@ -6,14 +6,12 @@
|
||||||
define x86_vectorcallcc i32 @test_int_1() {
|
define x86_vectorcallcc i32 @test_int_1() {
|
||||||
ret i32 0
|
ret i32 0
|
||||||
}
|
}
|
||||||
|
|
||||||
; CHECK-LABEL: {{^}}test_int_1@@0:
|
; CHECK-LABEL: {{^}}test_int_1@@0:
|
||||||
; CHECK: xorl %eax, %eax
|
; CHECK: xorl %eax, %eax
|
||||||
|
|
||||||
define x86_vectorcallcc i32 @test_int_2(i32 inreg %a) {
|
define x86_vectorcallcc i32 @test_int_2(i32 inreg %a) {
|
||||||
ret i32 %a
|
ret i32 %a
|
||||||
}
|
}
|
||||||
|
|
||||||
; X86-LABEL: {{^}}test_int_2@@4:
|
; X86-LABEL: {{^}}test_int_2@@4:
|
||||||
; X64-LABEL: {{^}}test_int_2@@8:
|
; X64-LABEL: {{^}}test_int_2@@8:
|
||||||
; CHECK: movl %ecx, %eax
|
; CHECK: movl %ecx, %eax
|
||||||
|
@ -22,7 +20,6 @@ define x86_vectorcallcc i32 @test_int_3(i64 inreg %a) {
|
||||||
%at = trunc i64 %a to i32
|
%at = trunc i64 %a to i32
|
||||||
ret i32 %at
|
ret i32 %at
|
||||||
}
|
}
|
||||||
|
|
||||||
; X86-LABEL: {{^}}test_int_3@@8:
|
; X86-LABEL: {{^}}test_int_3@@8:
|
||||||
; X64-LABEL: {{^}}test_int_3@@8:
|
; X64-LABEL: {{^}}test_int_3@@8:
|
||||||
; CHECK: movl %ecx, %eax
|
; CHECK: movl %ecx, %eax
|
||||||
|
@ -31,10 +28,8 @@ define x86_vectorcallcc i32 @test_int_4(i32 inreg %a, i32 inreg %b) {
|
||||||
%s = add i32 %a, %b
|
%s = add i32 %a, %b
|
||||||
ret i32 %s
|
ret i32 %s
|
||||||
}
|
}
|
||||||
|
|
||||||
; X86-LABEL: {{^}}test_int_4@@8:
|
; X86-LABEL: {{^}}test_int_4@@8:
|
||||||
; X86: leal (%ecx,%edx), %eax
|
; X86: leal (%ecx,%edx), %eax
|
||||||
|
|
||||||
; X64-LABEL: {{^}}test_int_4@@16:
|
; X64-LABEL: {{^}}test_int_4@@16:
|
||||||
; X64: leal (%rcx,%rdx), %eax
|
; X64: leal (%rcx,%rdx), %eax
|
||||||
|
|
||||||
|
@ -90,4 +85,139 @@ define x86_vectorcallcc <16 x i8> @test_vec_2(
|
||||||
ret <16 x i8> %r
|
ret <16 x i8> %r
|
||||||
}
|
}
|
||||||
; CHECK-LABEL: {{^}}test_vec_2@@104:
|
; CHECK-LABEL: {{^}}test_vec_2@@104:
|
||||||
; CHECK: movaps (%{{[re]}}cx), %xmm0
|
; x64: movq {{[0-9]*}}(%rsp), %rax
|
||||||
|
; CHECK: movaps (%{{rax|ecx}}), %xmm0
|
||||||
|
|
||||||
|
%struct.HVA5 = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float> }
|
||||||
|
%struct.HVA4 = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
|
||||||
|
%struct.HVA3 = type { <4 x float>, <4 x float>, <4 x float> }
|
||||||
|
%struct.HVA2 = type { <4 x float>, <4 x float> }
|
||||||
|
|
||||||
|
define x86_vectorcallcc <4 x float> @test_mixed_1(i32 %a, %struct.HVA4 inreg %bb, i32 %c) {
|
||||||
|
entry:
|
||||||
|
%b = alloca %struct.HVA4, align 16
|
||||||
|
store %struct.HVA4 %bb, %struct.HVA4* %b, align 16
|
||||||
|
%w1 = getelementptr inbounds %struct.HVA4, %struct.HVA4* %b, i32 0, i32 1
|
||||||
|
%0 = load <4 x float>, <4 x float>* %w1, align 16
|
||||||
|
ret <4 x float> %0
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test_mixed_1
|
||||||
|
; CHECK: movaps %xmm1, 16(%{{(e|r)}}sp)
|
||||||
|
; CHECK: movaps 16(%{{(e|r)}}sp), %xmm0
|
||||||
|
; CHECK: ret{{q|l}}
|
||||||
|
|
||||||
|
define x86_vectorcallcc <4 x float> @test_mixed_2(%struct.HVA4 inreg %a, %struct.HVA4* %b, <4 x float> %c) {
|
||||||
|
entry:
|
||||||
|
%c.addr = alloca <4 x float>, align 16
|
||||||
|
store <4 x float> %c, <4 x float>* %c.addr, align 16
|
||||||
|
%0 = load <4 x float>, <4 x float>* %c.addr, align 16
|
||||||
|
ret <4 x float> %0
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test_mixed_2
|
||||||
|
; X86: movaps %xmm0, (%esp)
|
||||||
|
; X64: movaps %xmm2, %xmm0
|
||||||
|
; CHECK: ret{{[ql]}}
|
||||||
|
|
||||||
|
define x86_vectorcallcc <4 x float> @test_mixed_3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f) {
|
||||||
|
entry:
|
||||||
|
%x = getelementptr inbounds %struct.HVA2, %struct.HVA2* %f, i32 0, i32 0
|
||||||
|
%0 = load <4 x float>, <4 x float>* %x, align 16
|
||||||
|
ret <4 x float> %0
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test_mixed_3
|
||||||
|
; CHECK: movaps (%{{[re][ac]}}x), %xmm0
|
||||||
|
; CHECK: ret{{[ql]}}
|
||||||
|
|
||||||
|
define x86_vectorcallcc <4 x float> @test_mixed_4(%struct.HVA4 inreg %a, %struct.HVA2* %bb, <4 x float> %c) {
|
||||||
|
entry:
|
||||||
|
%y4 = getelementptr inbounds %struct.HVA2, %struct.HVA2* %bb, i32 0, i32 1
|
||||||
|
%0 = load <4 x float>, <4 x float>* %y4, align 16
|
||||||
|
ret <4 x float> %0
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test_mixed_4
|
||||||
|
; X86: movaps 16(%eax), %xmm0
|
||||||
|
; X64: movaps 16(%rdx), %xmm0
|
||||||
|
; CHECK: ret{{[ql]}}
|
||||||
|
|
||||||
|
define x86_vectorcallcc <4 x float> @test_mixed_5(%struct.HVA3 inreg %a, %struct.HVA3* %b, <4 x float> %c, %struct.HVA2 inreg %dd) {
|
||||||
|
entry:
|
||||||
|
%d = alloca %struct.HVA2, align 16
|
||||||
|
store %struct.HVA2 %dd, %struct.HVA2* %d, align 16
|
||||||
|
%y5 = getelementptr inbounds %struct.HVA2, %struct.HVA2* %d, i32 0, i32 1
|
||||||
|
%0 = load <4 x float>, <4 x float>* %y5, align 16
|
||||||
|
ret <4 x float> %0
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test_mixed_5
|
||||||
|
; CHECK: movaps %xmm5, 16(%{{(e|r)}}sp)
|
||||||
|
; CHECK: movaps 16(%{{(e|r)}}sp), %xmm0
|
||||||
|
; CHECK: ret{{[ql]}}
|
||||||
|
|
||||||
|
define x86_vectorcallcc %struct.HVA4 @test_mixed_6(%struct.HVA4 inreg %a, %struct.HVA4* %b) {
|
||||||
|
entry:
|
||||||
|
%retval = alloca %struct.HVA4, align 16
|
||||||
|
%0 = bitcast %struct.HVA4* %retval to i8*
|
||||||
|
%1 = bitcast %struct.HVA4* %b to i8*
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 64, i32 16, i1 false)
|
||||||
|
%2 = load %struct.HVA4, %struct.HVA4* %retval, align 16
|
||||||
|
ret %struct.HVA4 %2
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test_mixed_6
|
||||||
|
; CHECK: movaps (%{{[re]}}sp), %xmm0
|
||||||
|
; CHECK: movaps 16(%{{[re]}}sp), %xmm1
|
||||||
|
; CHECK: movaps 32(%{{[re]}}sp), %xmm2
|
||||||
|
; CHECK: movaps 48(%{{[re]}}sp), %xmm3
|
||||||
|
; CHECK: ret{{[ql]}}
|
||||||
|
|
||||||
|
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
|
||||||
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
|
||||||
|
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1)
|
||||||
|
|
||||||
|
define x86_vectorcallcc void @test_mixed_7(%struct.HVA5* noalias sret %agg.result) {
|
||||||
|
entry:
|
||||||
|
%a = alloca %struct.HVA5, align 16
|
||||||
|
%0 = bitcast %struct.HVA5* %a to i8*
|
||||||
|
call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 80, i32 16, i1 false)
|
||||||
|
%1 = bitcast %struct.HVA5* %agg.result to i8*
|
||||||
|
%2 = bitcast %struct.HVA5* %a to i8*
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 80, i32 16, i1 false)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test_mixed_7
|
||||||
|
; CHECK: movaps %xmm{{[0-9]}}, 64(%{{rcx|eax}})
|
||||||
|
; CHECK: movaps %xmm{{[0-9]}}, 48(%{{rcx|eax}})
|
||||||
|
; CHECK: movaps %xmm{{[0-9]}}, 32(%{{rcx|eax}})
|
||||||
|
; CHECK: movaps %xmm{{[0-9]}}, 16(%{{rcx|eax}})
|
||||||
|
; CHECK: movaps %xmm{{[0-9]}}, (%{{rcx|eax}})
|
||||||
|
; X64: mov{{[ql]}} %rcx, %rax
|
||||||
|
; CHECK: ret{{[ql]}}
|
||||||
|
|
||||||
|
define x86_vectorcallcc <4 x float> @test_mixed_8(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f) {
|
||||||
|
entry:
|
||||||
|
%f.addr = alloca <4 x float>, align 16
|
||||||
|
store <4 x float> %f, <4 x float>* %f.addr, align 16
|
||||||
|
%0 = load <4 x float>, <4 x float>* %f.addr, align 16
|
||||||
|
ret <4 x float> %0
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test_mixed_8
|
||||||
|
; X86: movaps %xmm4, %xmm0
|
||||||
|
; X64: movaps %xmm5, %xmm0
|
||||||
|
; CHECK: ret{{[ql]}}
|
||||||
|
|
||||||
|
%struct.HFA4 = type { double, double, double, double }
|
||||||
|
declare x86_vectorcallcc double @test_mixed_9_callee(%struct.HFA4 %x, double %y)
|
||||||
|
|
||||||
|
define x86_vectorcallcc double @test_mixed_9_caller(%struct.HFA4 inreg %b) {
|
||||||
|
entry:
|
||||||
|
%call = call x86_vectorcallcc double @test_mixed_9_callee(%struct.HFA4 inreg %b, double 3.000000e+00)
|
||||||
|
%add = fadd double 1.000000e+00, %call
|
||||||
|
ret double %add
|
||||||
|
}
|
||||||
|
; CHECK-LABEL: test_mixed_9_caller
|
||||||
|
; CHECK: movaps %xmm3, %xmm4
|
||||||
|
; CHECK: movaps %xmm2, %xmm3
|
||||||
|
; CHECK: movaps %xmm1, %xmm2
|
||||||
|
; X32: movasd %xmm0, %xmm1
|
||||||
|
; X64: movapd %xmm5, %xmm1
|
||||||
|
; CHECK: call{{l|q}} test_mixed_9_callee@@40
|
||||||
|
; CHECK: addsd {{.*}}, %xmm0
|
||||||
|
; CHECK: ret{{l|q}}
|
||||||
|
|
Loading…
Reference in New Issue