forked from OSchip/llvm-project
[X86] RegCall - Handling v64i1 in 32/64 bit target
Register Calling Convention defines a new behavior for v64i1 types. This type should be saved in GPR. However for 32 bit machine we need to split the value into 2 GPRs (because each is 32 bit). Differential Revision: https://reviews.llvm.org/D26181 llvm-svn: 287217
This commit is contained in:
parent
9230db94b7
commit
489d6eff4f
|
@ -54,6 +54,7 @@ set(sources
|
||||||
X86VZeroUpper.cpp
|
X86VZeroUpper.cpp
|
||||||
X86WinAllocaExpander.cpp
|
X86WinAllocaExpander.cpp
|
||||||
X86WinEHState.cpp
|
X86WinEHState.cpp
|
||||||
|
X86CallingConv.cpp
|
||||||
${GLOBAL_ISEL_BUILD_FILES}
|
${GLOBAL_ISEL_BUILD_FILES}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
//=== X86CallingConv.cpp - X86 Custom Calling Convention Impl -*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file contains the implementation of custom routines for the X86
|
||||||
|
// Calling Convention that aren't done by tablegen.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "MCTargetDesc/X86MCTargetDesc.h"
|
||||||
|
#include "llvm/CodeGen/CallingConvLower.h"
|
||||||
|
#include "llvm/IR/CallingConv.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||||
|
CCValAssign::LocInfo &LocInfo,
|
||||||
|
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
|
||||||
|
// List of GPR registers that are available to store values in regcall
|
||||||
|
// calling convention.
|
||||||
|
static const MCPhysReg RegList[] = {X86::EAX, X86::ECX, X86::EDX, X86::EDI,
|
||||||
|
X86::ESI};
|
||||||
|
|
||||||
|
// The vector will save all the available registers for allocation.
|
||||||
|
SmallVector<unsigned, 5> AvailableRegs;
|
||||||
|
|
||||||
|
// searching for the available registers.
|
||||||
|
for (auto Reg : RegList) {
|
||||||
|
if (!State.isAllocated(Reg))
|
||||||
|
AvailableRegs.push_back(Reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t RequiredGprsUponSplit = 2;
|
||||||
|
if (AvailableRegs.size() < RequiredGprsUponSplit)
|
||||||
|
return false; // Not enough free registers - continue the search.
|
||||||
|
|
||||||
|
// Allocating the available registers
|
||||||
|
for (unsigned I = 0; I < RequiredGprsUponSplit; I++) {
|
||||||
|
|
||||||
|
// Marking the register as located
|
||||||
|
unsigned Reg = State.AllocateReg(AvailableRegs[I]);
|
||||||
|
|
||||||
|
// Since we previously made sure that 2 registers are available
|
||||||
|
// we expect that a real register number will be returned
|
||||||
|
assert(Reg && "Expecting a register will be available");
|
||||||
|
|
||||||
|
// Assign the value to the allocated register
|
||||||
|
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Successful in allocating regsiters - stop scanning next rules.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // End llvm namespace
|
|
@ -21,6 +21,14 @@
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
|
|
||||||
|
/// When regcall calling convention compiled to 32 bit arch, special treatment
|
||||||
|
/// is required for 64 bit masks.
|
||||||
|
/// The value should be assigned to two GPRs.
|
||||||
|
/// @return true if registers were allocated and false otherwise
|
||||||
|
bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
|
||||||
|
CCValAssign::LocInfo &LocInfo,
|
||||||
|
ISD::ArgFlagsTy &ArgFlags, CCState &State);
|
||||||
|
|
||||||
inline bool CC_X86_32_VectorCallIndirect(unsigned &ValNo, MVT &ValVT,
|
inline bool CC_X86_32_VectorCallIndirect(unsigned &ValNo, MVT &ValVT,
|
||||||
MVT &LocVT,
|
MVT &LocVT,
|
||||||
CCValAssign::LocInfo &LocInfo,
|
CCValAssign::LocInfo &LocInfo,
|
||||||
|
|
|
@ -77,14 +77,19 @@ def CC_#NAME : CallingConv<[
|
||||||
// bool, char, int, enum, long, pointer --> GPR
|
// bool, char, int, enum, long, pointer --> GPR
|
||||||
CCIfType<[i32], CCAssignToReg<RC.GPR_32>>,
|
CCIfType<[i32], CCAssignToReg<RC.GPR_32>>,
|
||||||
|
|
||||||
// TODO: Handle the case of mask types (v*i1)
|
|
||||||
// TODO: Handle the case of 32 bit machine with v64i1 argument
|
|
||||||
// (split to 2 registers)
|
|
||||||
CCIfType<[v8i1, v16i1, v32i1, v64i1], CCCustom<"CC_X86_RegCall_Error">>,
|
|
||||||
|
|
||||||
// long long, __int64 --> GPR
|
// long long, __int64 --> GPR
|
||||||
CCIfType<[i64], CCAssignToReg<RC.GPR_64>>,
|
CCIfType<[i64], CCAssignToReg<RC.GPR_64>>,
|
||||||
|
|
||||||
|
// __mmask64 (v64i1) --> GPR64 (for x64) or 2 x GPR32 (for IA32)
|
||||||
|
CCIfType<[v64i1], CCPromoteToType<i64>>,
|
||||||
|
CCIfSubtarget<"is64Bit()", CCIfType<[i64],
|
||||||
|
CCAssignToReg<RC.GPR_64>>>,
|
||||||
|
CCIfSubtarget<"is32Bit()", CCIfType<[i64],
|
||||||
|
CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>,
|
||||||
|
|
||||||
|
// TODO: Handle the case of mask types (v*i1)
|
||||||
|
CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>,
|
||||||
|
|
||||||
// TODO: Handle the case of long double (f80)
|
// TODO: Handle the case of long double (f80)
|
||||||
CCIfType<[f80], CCCustom<"CC_X86_RegCall_Error">>,
|
CCIfType<[f80], CCCustom<"CC_X86_RegCall_Error">>,
|
||||||
|
|
||||||
|
@ -116,7 +121,7 @@ def CC_#NAME : CallingConv<[
|
||||||
|
|
||||||
// In 32 bit, assign 64/32 bit values to 8/4 byte stack
|
// In 32 bit, assign 64/32 bit values to 8/4 byte stack
|
||||||
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
|
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
|
||||||
CCIfType<[f64], CCAssignToStack<8, 4>>,
|
CCIfType<[i64, f64], CCAssignToStack<8, 4>>,
|
||||||
|
|
||||||
// MMX type gets 8 byte slot in stack , while alignment depends on target
|
// MMX type gets 8 byte slot in stack , while alignment depends on target
|
||||||
CCIfSubtarget<"is64Bit()", CCIfType<[x86mmx], CCAssignToStack<8, 8>>>,
|
CCIfSubtarget<"is64Bit()", CCIfType<[x86mmx], CCAssignToStack<8, 8>>>,
|
||||||
|
@ -147,14 +152,19 @@ def RetCC_#NAME : CallingConv<[
|
||||||
CCIfType<[i16], CCAssignToReg<RC.GPR_16>>,
|
CCIfType<[i16], CCAssignToReg<RC.GPR_16>>,
|
||||||
CCIfType<[i32], CCAssignToReg<RC.GPR_32>>,
|
CCIfType<[i32], CCAssignToReg<RC.GPR_32>>,
|
||||||
|
|
||||||
// TODO: Handle the case of mask types (v*i1)
|
|
||||||
// TODO: Handle the case of 32 bit machine with v64i1 argument
|
|
||||||
// (split to 2 registers)
|
|
||||||
CCIfType<[v8i1, v16i1, v32i1, v64i1], CCCustom<"CC_X86_RegCall_Error">>,
|
|
||||||
|
|
||||||
// long long, __int64 --> GPR
|
// long long, __int64 --> GPR
|
||||||
CCIfType<[i64], CCAssignToReg<RC.GPR_64>>,
|
CCIfType<[i64], CCAssignToReg<RC.GPR_64>>,
|
||||||
|
|
||||||
|
// __mmask64 (v64i1) --> GPR64 (for x64) or 2 x GPR32 (for IA32)
|
||||||
|
CCIfType<[v64i1], CCPromoteToType<i64>>,
|
||||||
|
CCIfSubtarget<"is64Bit()", CCIfType<[i64],
|
||||||
|
CCAssignToReg<RC.GPR_64>>>,
|
||||||
|
CCIfSubtarget<"is32Bit()", CCIfType<[i64],
|
||||||
|
CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>,
|
||||||
|
|
||||||
|
// TODO: Handle the case of mask types (v*i1)
|
||||||
|
CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>,
|
||||||
|
|
||||||
// long double --> FP
|
// long double --> FP
|
||||||
CCIfType<[f80], CCAssignToReg<[FP0]>>,
|
CCIfType<[f80], CCAssignToReg<[FP0]>>,
|
||||||
|
|
||||||
|
|
|
@ -2094,6 +2094,46 @@ const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
|
||||||
return ScratchRegs;
|
return ScratchRegs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Lowers masks values (v*i1) to the local register values
|
||||||
|
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
|
||||||
|
const SDLoc &Dl, SelectionDAG &DAG) {
|
||||||
|
EVT ValVT = ValArg.getValueType();
|
||||||
|
|
||||||
|
if (ValVT == MVT::v64i1 && ValLoc == MVT::i64) {
|
||||||
|
// One stage lowering is required
|
||||||
|
// bitcast: v64i1 -> i64
|
||||||
|
return DAG.getBitcast(MVT::i64, ValArg);
|
||||||
|
} else
|
||||||
|
return DAG.getNode(ISD::SIGN_EXTEND, Dl, ValLoc, ValArg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Breaks v64i1 value into two registers and adds the new node to the DAG
|
||||||
|
static void Passv64i1ArgInRegs(
|
||||||
|
const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
|
||||||
|
SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
|
||||||
|
CCValAssign &NextVA, const X86Subtarget &Subtarget) {
|
||||||
|
assert((Subtarget.hasBWI() || Subtarget.hasBMI()) &&
|
||||||
|
"Expected AVX512BW or AVX512BMI target!");
|
||||||
|
assert(Subtarget.is32Bit() && "Expecting 32 bit target");
|
||||||
|
assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
|
||||||
|
assert(VA.isRegLoc() && NextVA.isRegLoc() &&
|
||||||
|
"The value should reside in two registers");
|
||||||
|
|
||||||
|
// Before splitting the value we cast it to i64
|
||||||
|
Arg = DAG.getBitcast(MVT::i64, Arg);
|
||||||
|
|
||||||
|
// Splitting the value into two i32 types
|
||||||
|
SDValue Lo, Hi;
|
||||||
|
Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
|
||||||
|
DAG.getConstant(0, Dl, MVT::i32));
|
||||||
|
Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
|
||||||
|
DAG.getConstant(1, Dl, MVT::i32));
|
||||||
|
|
||||||
|
// Attach the two i32 types into corresponding registers
|
||||||
|
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
|
||||||
|
RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
|
||||||
|
}
|
||||||
|
|
||||||
SDValue
|
SDValue
|
||||||
X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
||||||
bool isVarArg,
|
bool isVarArg,
|
||||||
|
@ -2118,10 +2158,11 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
||||||
MVT::i32));
|
MVT::i32));
|
||||||
|
|
||||||
// Copy the result values into the output registers.
|
// Copy the result values into the output registers.
|
||||||
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
|
for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
|
||||||
CCValAssign &VA = RVLocs[i];
|
++I, ++OutsIndex) {
|
||||||
|
CCValAssign &VA = RVLocs[I];
|
||||||
assert(VA.isRegLoc() && "Can only return in registers!");
|
assert(VA.isRegLoc() && "Can only return in registers!");
|
||||||
SDValue ValToCopy = OutVals[i];
|
SDValue ValToCopy = OutVals[OutsIndex];
|
||||||
EVT ValVT = ValToCopy.getValueType();
|
EVT ValVT = ValToCopy.getValueType();
|
||||||
|
|
||||||
// Promote values to the appropriate types.
|
// Promote values to the appropriate types.
|
||||||
|
@ -2131,7 +2172,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
||||||
ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
|
ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
|
||||||
else if (VA.getLocInfo() == CCValAssign::AExt) {
|
else if (VA.getLocInfo() == CCValAssign::AExt) {
|
||||||
if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
|
if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
|
||||||
ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
|
ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
|
||||||
else
|
else
|
||||||
ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
|
ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
|
||||||
}
|
}
|
||||||
|
@ -2184,9 +2225,27 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
|
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
|
||||||
Flag = Chain.getValue(1);
|
|
||||||
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
|
if (VA.needsCustom()) {
|
||||||
|
assert(VA.getValVT() == MVT::v64i1 &&
|
||||||
|
"Currently the only custom case is when we split v64i1 to 2 regs");
|
||||||
|
|
||||||
|
Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
|
||||||
|
Subtarget);
|
||||||
|
|
||||||
|
assert(2 == RegsToPass.size() &&
|
||||||
|
"Expecting two registers after Pass64BitArgInRegs");
|
||||||
|
} else {
|
||||||
|
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add nodes to the DAG and add the values into the RetOps list
|
||||||
|
for (auto &Reg : RegsToPass) {
|
||||||
|
Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
|
||||||
|
Flag = Chain.getValue(1);
|
||||||
|
RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Swift calling convention does not require we copy the sret argument
|
// Swift calling convention does not require we copy the sret argument
|
||||||
|
@ -2314,6 +2373,83 @@ EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
|
||||||
return VT.bitsLT(MinVT) ? MinVT : VT;
|
return VT.bitsLT(MinVT) ? MinVT : VT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Reads two 32 bit registers and creates a 64 bit mask value.
|
||||||
|
/// @param VA The current 32 bit value that need to be assigned.
|
||||||
|
/// @param NextVA The next 32 bit value that need to be assigned.
|
||||||
|
/// @param Root The parent DAG note
|
||||||
|
/// @param [inout] InFlag Represents SDvalue in the parent DAG node for
|
||||||
|
/// glue purposes. In the case the DAG is already using
|
||||||
|
/// physical register instead of virtual, we should glue
|
||||||
|
/// our new SDValue to InFlag SDvalue.
|
||||||
|
/// @return a new SDvalue of size 64bit.
|
||||||
|
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
|
||||||
|
SDValue &Root, SelectionDAG &DAG,
|
||||||
|
const SDLoc &Dl, const X86Subtarget &Subtarget,
|
||||||
|
SDValue *InFlag = nullptr) {
|
||||||
|
assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
|
||||||
|
assert(Subtarget.is32Bit() && "Expecting 32 bit target");
|
||||||
|
assert(VA.getValVT() == MVT::v64i1 &&
|
||||||
|
"Expecting first location of 64 bit width type");
|
||||||
|
assert(NextVA.getValVT() == VA.getValVT() &&
|
||||||
|
"The locations should have the same type");
|
||||||
|
assert(VA.isRegLoc() && NextVA.isRegLoc() &&
|
||||||
|
"The values should reside in two registers");
|
||||||
|
|
||||||
|
SDValue Lo, Hi;
|
||||||
|
unsigned Reg;
|
||||||
|
SDValue ArgValueLo, ArgValueHi;
|
||||||
|
|
||||||
|
MachineFunction &MF = DAG.getMachineFunction();
|
||||||
|
const TargetRegisterClass *RC = &X86::GR32RegClass;
|
||||||
|
|
||||||
|
// Read a 32 bit value from the registers
|
||||||
|
if (nullptr == InFlag) {
|
||||||
|
// When no physical register is present,
|
||||||
|
// create an intermediate virtual register
|
||||||
|
Reg = MF.addLiveIn(VA.getLocReg(), RC);
|
||||||
|
ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
|
||||||
|
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
|
||||||
|
ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
|
||||||
|
} else {
|
||||||
|
// When a physical register is available read the value from it and glue
|
||||||
|
// the reads together.
|
||||||
|
ArgValueLo =
|
||||||
|
DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
|
||||||
|
*InFlag = ArgValueLo.getValue(2);
|
||||||
|
ArgValueHi =
|
||||||
|
DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
|
||||||
|
*InFlag = ArgValueHi.getValue(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert the i32 type into v32i1 type
|
||||||
|
Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
|
||||||
|
|
||||||
|
// Convert the i32 type into v32i1 type
|
||||||
|
Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
|
||||||
|
|
||||||
|
// Concantenate the two values together
|
||||||
|
return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
|
||||||
|
const EVT &ValLoc, const SDLoc &Dl,
|
||||||
|
SelectionDAG &DAG) {
|
||||||
|
assert((ValLoc == MVT::i64 || ValLoc == MVT::i32) &&
|
||||||
|
"Expecting register location of size 32/64 bit");
|
||||||
|
|
||||||
|
// Currently not referenced - will be used in other mask lowering
|
||||||
|
(void)Dl;
|
||||||
|
|
||||||
|
// In the case of v64i1 no special handling is required due to two reasons:
|
||||||
|
// In 32 bit machine, this case is handled by getv64i1Argument
|
||||||
|
// In 64 bit machine, There is no need to truncate the value only bitcast
|
||||||
|
if (ValVT == MVT::v64i1 && ValLoc == MVT::i32) {
|
||||||
|
llvm_unreachable("Expecting only i64 locations");
|
||||||
|
}
|
||||||
|
|
||||||
|
return DAG.getBitcast(ValVT, ValArg);
|
||||||
|
}
|
||||||
|
|
||||||
/// Lower the result values of a call into the
|
/// Lower the result values of a call into the
|
||||||
/// appropriate copies out of appropriate physical registers.
|
/// appropriate copies out of appropriate physical registers.
|
||||||
///
|
///
|
||||||
|
@ -2330,13 +2466,14 @@ SDValue X86TargetLowering::LowerCallResult(
|
||||||
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
|
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
|
||||||
|
|
||||||
// Copy all of the result registers out of their specified physreg.
|
// Copy all of the result registers out of their specified physreg.
|
||||||
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
|
for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
|
||||||
CCValAssign &VA = RVLocs[i];
|
++I, ++InsIndex) {
|
||||||
|
CCValAssign &VA = RVLocs[I];
|
||||||
EVT CopyVT = VA.getLocVT();
|
EVT CopyVT = VA.getLocVT();
|
||||||
|
|
||||||
// If this is x86-64, and we disabled SSE, we can't return FP values
|
// If this is x86-64, and we disabled SSE, we can't return FP values
|
||||||
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
|
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
|
||||||
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget.hasSSE1())) {
|
((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
|
||||||
report_fatal_error("SSE register return with SSE disabled");
|
report_fatal_error("SSE register return with SSE disabled");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2351,19 +2488,33 @@ SDValue X86TargetLowering::LowerCallResult(
|
||||||
RoundAfterCopy = (CopyVT != VA.getLocVT());
|
RoundAfterCopy = (CopyVT != VA.getLocVT());
|
||||||
}
|
}
|
||||||
|
|
||||||
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
|
SDValue Val;
|
||||||
CopyVT, InFlag).getValue(1);
|
if (VA.needsCustom()) {
|
||||||
SDValue Val = Chain.getValue(0);
|
assert(VA.getValVT() == MVT::v64i1 &&
|
||||||
|
"Currently the only custom case is when we split v64i1 to 2 regs");
|
||||||
|
Val =
|
||||||
|
getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
|
||||||
|
} else {
|
||||||
|
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
|
||||||
|
.getValue(1);
|
||||||
|
Val = Chain.getValue(0);
|
||||||
|
InFlag = Chain.getValue(2);
|
||||||
|
}
|
||||||
|
|
||||||
if (RoundAfterCopy)
|
if (RoundAfterCopy)
|
||||||
Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
|
Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
|
||||||
// This truncation won't change the value.
|
// This truncation won't change the value.
|
||||||
DAG.getIntPtrConstant(1, dl));
|
DAG.getIntPtrConstant(1, dl));
|
||||||
|
|
||||||
if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1)
|
if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
|
||||||
Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
|
if (VA.getValVT().isVector() &&
|
||||||
|
(VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::i64)) {
|
||||||
|
// promoting a mask type (v*i1) into a register of type i64/i32
|
||||||
|
Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
|
||||||
|
} else
|
||||||
|
Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
|
||||||
|
}
|
||||||
|
|
||||||
InFlag = Chain.getValue(2);
|
|
||||||
InVals.push_back(Val);
|
InVals.push_back(Val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2431,7 +2582,8 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
|
||||||
/// Return true if the calling convention is one that we can guarantee TCO for.
|
/// Return true if the calling convention is one that we can guarantee TCO for.
|
||||||
static bool canGuaranteeTCO(CallingConv::ID CC) {
|
static bool canGuaranteeTCO(CallingConv::ID CC) {
|
||||||
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
|
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
|
||||||
CC == CallingConv::HiPE || CC == CallingConv::HHVM);
|
CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
|
||||||
|
CC == CallingConv::HHVM);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return true if we might ever do TCO for calls with this calling convention.
|
/// Return true if we might ever do TCO for calls with this calling convention.
|
||||||
|
@ -2486,9 +2638,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
|
||||||
EVT ValVT;
|
EVT ValVT;
|
||||||
|
|
||||||
// If value is passed by pointer we have address passed instead of the value
|
// If value is passed by pointer we have address passed instead of the value
|
||||||
// itself.
|
// itself. No need to extend if the mask value and location share the same
|
||||||
bool ExtendedInMem = VA.isExtInLoc() &&
|
// absolute size.
|
||||||
VA.getValVT().getScalarType() == MVT::i1;
|
bool ExtendedInMem =
|
||||||
|
VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
|
||||||
|
VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
|
||||||
|
|
||||||
if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
|
if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
|
||||||
ValVT = VA.getLocVT();
|
ValVT = VA.getLocVT();
|
||||||
|
@ -2612,8 +2766,9 @@ SDValue X86TargetLowering::LowerFormalArguments(
|
||||||
bool Is64Bit = Subtarget.is64Bit();
|
bool Is64Bit = Subtarget.is64Bit();
|
||||||
bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
|
bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
|
||||||
|
|
||||||
assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
|
assert(
|
||||||
"Var args not supported with calling convention fastcc, ghc or hipe");
|
!(isVarArg && canGuaranteeTCO(CallConv)) &&
|
||||||
|
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
|
||||||
|
|
||||||
if (CallConv == CallingConv::X86_INTR) {
|
if (CallConv == CallingConv::X86_INTR) {
|
||||||
bool isLegal = Ins.size() == 1 ||
|
bool isLegal = Ins.size() == 1 ||
|
||||||
|
@ -2633,53 +2788,59 @@ SDValue X86TargetLowering::LowerFormalArguments(
|
||||||
|
|
||||||
CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
|
CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
|
||||||
|
|
||||||
unsigned LastVal = ~0U;
|
|
||||||
SDValue ArgValue;
|
SDValue ArgValue;
|
||||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
|
||||||
CCValAssign &VA = ArgLocs[i];
|
++I, ++InsIndex) {
|
||||||
// TODO: If an arg is passed in two places (e.g. reg and stack), skip later
|
assert(InsIndex < Ins.size() && "Invalid Ins index");
|
||||||
// places.
|
CCValAssign &VA = ArgLocs[I];
|
||||||
assert(VA.getValNo() != LastVal &&
|
|
||||||
"Don't support value assigned to multiple locs yet");
|
|
||||||
(void)LastVal;
|
|
||||||
LastVal = VA.getValNo();
|
|
||||||
|
|
||||||
if (VA.isRegLoc()) {
|
if (VA.isRegLoc()) {
|
||||||
EVT RegVT = VA.getLocVT();
|
EVT RegVT = VA.getLocVT();
|
||||||
const TargetRegisterClass *RC;
|
if (VA.needsCustom()) {
|
||||||
if (RegVT == MVT::i32)
|
assert(
|
||||||
RC = &X86::GR32RegClass;
|
VA.getValVT() == MVT::v64i1 &&
|
||||||
else if (Is64Bit && RegVT == MVT::i64)
|
"Currently the only custom case is when we split v64i1 to 2 regs");
|
||||||
RC = &X86::GR64RegClass;
|
|
||||||
else if (RegVT == MVT::f32)
|
|
||||||
RC = &X86::FR32RegClass;
|
|
||||||
else if (RegVT == MVT::f64)
|
|
||||||
RC = &X86::FR64RegClass;
|
|
||||||
else if (RegVT == MVT::f128)
|
|
||||||
RC = &X86::FR128RegClass;
|
|
||||||
else if (RegVT.is512BitVector())
|
|
||||||
RC = &X86::VR512RegClass;
|
|
||||||
else if (RegVT.is256BitVector())
|
|
||||||
RC = &X86::VR256RegClass;
|
|
||||||
else if (RegVT.is128BitVector())
|
|
||||||
RC = &X86::VR128RegClass;
|
|
||||||
else if (RegVT == MVT::x86mmx)
|
|
||||||
RC = &X86::VR64RegClass;
|
|
||||||
else if (RegVT == MVT::i1)
|
|
||||||
RC = &X86::VK1RegClass;
|
|
||||||
else if (RegVT == MVT::v8i1)
|
|
||||||
RC = &X86::VK8RegClass;
|
|
||||||
else if (RegVT == MVT::v16i1)
|
|
||||||
RC = &X86::VK16RegClass;
|
|
||||||
else if (RegVT == MVT::v32i1)
|
|
||||||
RC = &X86::VK32RegClass;
|
|
||||||
else if (RegVT == MVT::v64i1)
|
|
||||||
RC = &X86::VK64RegClass;
|
|
||||||
else
|
|
||||||
llvm_unreachable("Unknown argument type!");
|
|
||||||
|
|
||||||
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
|
// v64i1 values, in regcall calling convention, that are
|
||||||
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
|
// compiled to 32 bit arch, are splited up into two registers.
|
||||||
|
ArgValue =
|
||||||
|
getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
|
||||||
|
} else {
|
||||||
|
const TargetRegisterClass *RC;
|
||||||
|
if (RegVT == MVT::i32)
|
||||||
|
RC = &X86::GR32RegClass;
|
||||||
|
else if (Is64Bit && RegVT == MVT::i64)
|
||||||
|
RC = &X86::GR64RegClass;
|
||||||
|
else if (RegVT == MVT::f32)
|
||||||
|
RC = &X86::FR32RegClass;
|
||||||
|
else if (RegVT == MVT::f64)
|
||||||
|
RC = &X86::FR64RegClass;
|
||||||
|
else if (RegVT == MVT::f128)
|
||||||
|
RC = &X86::FR128RegClass;
|
||||||
|
else if (RegVT.is512BitVector())
|
||||||
|
RC = &X86::VR512RegClass;
|
||||||
|
else if (RegVT.is256BitVector())
|
||||||
|
RC = &X86::VR256RegClass;
|
||||||
|
else if (RegVT.is128BitVector())
|
||||||
|
RC = &X86::VR128RegClass;
|
||||||
|
else if (RegVT == MVT::x86mmx)
|
||||||
|
RC = &X86::VR64RegClass;
|
||||||
|
else if (RegVT == MVT::i1)
|
||||||
|
RC = &X86::VK1RegClass;
|
||||||
|
else if (RegVT == MVT::v8i1)
|
||||||
|
RC = &X86::VK8RegClass;
|
||||||
|
else if (RegVT == MVT::v16i1)
|
||||||
|
RC = &X86::VK16RegClass;
|
||||||
|
else if (RegVT == MVT::v32i1)
|
||||||
|
RC = &X86::VK32RegClass;
|
||||||
|
else if (RegVT == MVT::v64i1)
|
||||||
|
RC = &X86::VK64RegClass;
|
||||||
|
else
|
||||||
|
llvm_unreachable("Unknown argument type!");
|
||||||
|
|
||||||
|
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
|
||||||
|
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
|
||||||
|
}
|
||||||
|
|
||||||
// If this is an 8 or 16-bit value, it is really passed promoted to 32
|
// If this is an 8 or 16-bit value, it is really passed promoted to 32
|
||||||
// bits. Insert an assert[sz]ext to capture this, then truncate to the
|
// bits. Insert an assert[sz]ext to capture this, then truncate to the
|
||||||
|
@ -2697,12 +2858,18 @@ SDValue X86TargetLowering::LowerFormalArguments(
|
||||||
// Handle MMX values passed in XMM regs.
|
// Handle MMX values passed in XMM regs.
|
||||||
if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
|
if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
|
||||||
ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
|
ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
|
||||||
else
|
else if (VA.getValVT().isVector() &&
|
||||||
|
VA.getValVT().getScalarType() == MVT::i1 &&
|
||||||
|
((RegVT == MVT::i32) || (RegVT == MVT::i64))) {
|
||||||
|
// Promoting a mask type (v*i1) into a register of type i64/i32
|
||||||
|
ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
|
||||||
|
} else
|
||||||
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
|
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
assert(VA.isMemLoc());
|
assert(VA.isMemLoc());
|
||||||
ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
|
ArgValue =
|
||||||
|
LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If value is passed via pointer - do a load.
|
// If value is passed via pointer - do a load.
|
||||||
|
@ -2713,7 +2880,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
|
||||||
InVals.push_back(ArgValue);
|
InVals.push_back(ArgValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
|
||||||
// Swift calling convention does not require we copy the sret argument
|
// Swift calling convention does not require we copy the sret argument
|
||||||
// into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
|
// into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
|
||||||
if (CallConv == CallingConv::Swift)
|
if (CallConv == CallingConv::Swift)
|
||||||
|
@ -2723,14 +2890,14 @@ SDValue X86TargetLowering::LowerFormalArguments(
|
||||||
// sret argument into %rax/%eax (depending on ABI) for the return. Save
|
// sret argument into %rax/%eax (depending on ABI) for the return. Save
|
||||||
// the argument into a virtual register so that we can access it from the
|
// the argument into a virtual register so that we can access it from the
|
||||||
// return points.
|
// return points.
|
||||||
if (Ins[i].Flags.isSRet()) {
|
if (Ins[I].Flags.isSRet()) {
|
||||||
unsigned Reg = FuncInfo->getSRetReturnReg();
|
unsigned Reg = FuncInfo->getSRetReturnReg();
|
||||||
if (!Reg) {
|
if (!Reg) {
|
||||||
MVT PtrTy = getPointerTy(DAG.getDataLayout());
|
MVT PtrTy = getPointerTy(DAG.getDataLayout());
|
||||||
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
|
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
|
||||||
FuncInfo->setSRetReturnReg(Reg);
|
FuncInfo->setSRetReturnReg(Reg);
|
||||||
}
|
}
|
||||||
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
|
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
|
||||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
|
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -3122,15 +3289,17 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||||
// Walk the register/memloc assignments, inserting copies/loads. In the case
|
// Walk the register/memloc assignments, inserting copies/loads. In the case
|
||||||
// of tail call optimization arguments are handle later.
|
// of tail call optimization arguments are handle later.
|
||||||
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
|
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
|
||||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
|
||||||
|
++I, ++OutIndex) {
|
||||||
|
assert(OutIndex < Outs.size() && "Invalid Out index");
|
||||||
// Skip inalloca arguments, they have already been written.
|
// Skip inalloca arguments, they have already been written.
|
||||||
ISD::ArgFlagsTy Flags = Outs[i].Flags;
|
ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
|
||||||
if (Flags.isInAlloca())
|
if (Flags.isInAlloca())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
CCValAssign &VA = ArgLocs[i];
|
CCValAssign &VA = ArgLocs[I];
|
||||||
EVT RegVT = VA.getLocVT();
|
EVT RegVT = VA.getLocVT();
|
||||||
SDValue Arg = OutVals[i];
|
SDValue Arg = OutVals[OutIndex];
|
||||||
bool isByVal = Flags.isByVal();
|
bool isByVal = Flags.isByVal();
|
||||||
|
|
||||||
// Promote the value if needed.
|
// Promote the value if needed.
|
||||||
|
@ -3146,7 +3315,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||||
case CCValAssign::AExt:
|
case CCValAssign::AExt:
|
||||||
if (Arg.getValueType().isVector() &&
|
if (Arg.getValueType().isVector() &&
|
||||||
Arg.getValueType().getVectorElementType() == MVT::i1)
|
Arg.getValueType().getVectorElementType() == MVT::i1)
|
||||||
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
|
Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
|
||||||
else if (RegVT.is128BitVector()) {
|
else if (RegVT.is128BitVector()) {
|
||||||
// Special case: passing MMX values in XMM registers.
|
// Special case: passing MMX values in XMM registers.
|
||||||
Arg = DAG.getBitcast(MVT::i64, Arg);
|
Arg = DAG.getBitcast(MVT::i64, Arg);
|
||||||
|
@ -3170,7 +3339,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VA.isRegLoc()) {
|
if (VA.needsCustom()) {
|
||||||
|
assert(VA.getValVT() == MVT::v64i1 &&
|
||||||
|
"Currently the only custom case is when we split v64i1 to 2 regs");
|
||||||
|
// Split v64i1 value into two registers
|
||||||
|
Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
|
||||||
|
Subtarget);
|
||||||
|
} else if (VA.isRegLoc()) {
|
||||||
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
||||||
if (isVarArg && IsWin64) {
|
if (isVarArg && IsWin64) {
|
||||||
// Win64 ABI requires argument XMM reg to be copied to the corresponding
|
// Win64 ABI requires argument XMM reg to be copied to the corresponding
|
||||||
|
@ -3270,13 +3445,25 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||||
SmallVector<SDValue, 8> MemOpChains2;
|
SmallVector<SDValue, 8> MemOpChains2;
|
||||||
SDValue FIN;
|
SDValue FIN;
|
||||||
int FI = 0;
|
int FI = 0;
|
||||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
|
||||||
CCValAssign &VA = ArgLocs[i];
|
++I, ++OutsIndex) {
|
||||||
if (VA.isRegLoc())
|
CCValAssign &VA = ArgLocs[I];
|
||||||
|
|
||||||
|
if (VA.isRegLoc()) {
|
||||||
|
if (VA.needsCustom()) {
|
||||||
|
assert((CallConv == CallingConv::X86_RegCall) &&
|
||||||
|
"Expecting custome case only in regcall calling convention");
|
||||||
|
// This means that we are in special case where one argument was
|
||||||
|
// passed through two register locations - Skip the next location
|
||||||
|
++I;
|
||||||
|
}
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
assert(VA.isMemLoc());
|
assert(VA.isMemLoc());
|
||||||
SDValue Arg = OutVals[i];
|
SDValue Arg = OutVals[OutsIndex];
|
||||||
ISD::ArgFlagsTy Flags = Outs[i].Flags;
|
ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
|
||||||
// Skip inalloca arguments. They don't require any work.
|
// Skip inalloca arguments. They don't require any work.
|
||||||
if (Flags.isInAlloca())
|
if (Flags.isInAlloca())
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -0,0 +1,195 @@
|
||||||
|
; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512bw | FileCheck --check-prefix=X32 %s
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512bw | FileCheck --check-prefix=WIN64 %s
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bw | FileCheck --check-prefix=LINUXOSX64 %s
|
||||||
|
|
||||||
|
; X32-LABEL: test_argv64i1:
|
||||||
|
; X32: kmovd %edx, %k0
|
||||||
|
; X32: kmovd %edi, %k1
|
||||||
|
; X32: kmovd %eax, %k1
|
||||||
|
; X32: kmovd %ecx, %k2
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: ad{{d|c}}l {{([0-9])*}}(%ebp), %e{{a|c}}x
|
||||||
|
; X32: retl
|
||||||
|
|
||||||
|
; WIN64-LABEL: test_argv64i1:
|
||||||
|
; WIN64: addq %rcx, %rax
|
||||||
|
; WIN64: addq %rdx, %rax
|
||||||
|
; WIN64: addq %rdi, %rax
|
||||||
|
; WIN64: addq %rsi, %rax
|
||||||
|
; WIN64: addq %r8, %rax
|
||||||
|
; WIN64: addq %r9, %rax
|
||||||
|
; WIN64: addq %r10, %rax
|
||||||
|
; WIN64: addq %r11, %rax
|
||||||
|
; WIN64: addq %r12, %rax
|
||||||
|
; WIN64: addq %r14, %rax
|
||||||
|
; WIN64: addq %r15, %rax
|
||||||
|
; WIN64: addq {{([0-9])*}}(%rsp), %rax
|
||||||
|
; WIN64: retq
|
||||||
|
|
||||||
|
; LINUXOSX64-LABEL: test_argv64i1:
|
||||||
|
; LINUXOSX64: addq %rcx, %rax
|
||||||
|
; LINUXOSX64: addq %rdx, %rax
|
||||||
|
; LINUXOSX64: addq %rdi, %rax
|
||||||
|
; LINUXOSX64: addq %rsi, %rax
|
||||||
|
; LINUXOSX64: addq %r8, %rax
|
||||||
|
; LINUXOSX64: addq %r9, %rax
|
||||||
|
; LINUXOSX64: addq %r12, %rax
|
||||||
|
; LINUXOSX64: addq %r13, %rax
|
||||||
|
; LINUXOSX64: addq %r14, %rax
|
||||||
|
; LINUXOSX64: addq %r15, %rax
|
||||||
|
; LINUXOSX64: addq {{([0-9])*}}(%rsp), %rax
|
||||||
|
; LINUXOSX64: addq {{([0-9])*}}(%rsp), %rax
|
||||||
|
; LINUXOSX64: retq
|
||||||
|
|
||||||
|
; Test regcall when receiving arguments of v64i1 type
|
||||||
|
define x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2,
|
||||||
|
<64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5,
|
||||||
|
<64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8,
|
||||||
|
<64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11,
|
||||||
|
<64 x i1> %x12) {
|
||||||
|
%y0 = bitcast <64 x i1> %x0 to i64
|
||||||
|
%y1 = bitcast <64 x i1> %x1 to i64
|
||||||
|
%y2 = bitcast <64 x i1> %x2 to i64
|
||||||
|
%y3 = bitcast <64 x i1> %x3 to i64
|
||||||
|
%y4 = bitcast <64 x i1> %x4 to i64
|
||||||
|
%y5 = bitcast <64 x i1> %x5 to i64
|
||||||
|
%y6 = bitcast <64 x i1> %x6 to i64
|
||||||
|
%y7 = bitcast <64 x i1> %x7 to i64
|
||||||
|
%y8 = bitcast <64 x i1> %x8 to i64
|
||||||
|
%y9 = bitcast <64 x i1> %x9 to i64
|
||||||
|
%y10 = bitcast <64 x i1> %x10 to i64
|
||||||
|
%y11 = bitcast <64 x i1> %x11 to i64
|
||||||
|
%y12 = bitcast <64 x i1> %x12 to i64
|
||||||
|
%add1 = add i64 %y0, %y1
|
||||||
|
%add2 = add i64 %add1, %y2
|
||||||
|
%add3 = add i64 %add2, %y3
|
||||||
|
%add4 = add i64 %add3, %y4
|
||||||
|
%add5 = add i64 %add4, %y5
|
||||||
|
%add6 = add i64 %add5, %y6
|
||||||
|
%add7 = add i64 %add6, %y7
|
||||||
|
%add8 = add i64 %add7, %y8
|
||||||
|
%add9 = add i64 %add8, %y9
|
||||||
|
%add10 = add i64 %add9, %y10
|
||||||
|
%add11 = add i64 %add10, %y11
|
||||||
|
%add12 = add i64 %add11, %y12
|
||||||
|
ret i64 %add12
|
||||||
|
}
|
||||||
|
|
||||||
|
; X32-LABEL: caller_argv64i1:
|
||||||
|
; X32: movl $2, %eax
|
||||||
|
; X32: movl $1, %ecx
|
||||||
|
; X32: movl $2, %edx
|
||||||
|
; X32: movl $1, %edi
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: pushl ${{1|2}}
|
||||||
|
; X32: call{{.*}} _test_argv64i1
|
||||||
|
|
||||||
|
; WIN64-LABEL: caller_argv64i1:
|
||||||
|
; WIN64: movabsq $4294967298, %rax
|
||||||
|
; WIN64: movq %rax, (%rsp)
|
||||||
|
; WIN64: movq %rax, %rcx
|
||||||
|
; WIN64: movq %rax, %rdx
|
||||||
|
; WIN64: movq %rax, %rdi
|
||||||
|
; WIN64: movq %rax, %rsi
|
||||||
|
; WIN64: movq %rax, %r8
|
||||||
|
; WIN64: movq %rax, %r9
|
||||||
|
; WIN64: movq %rax, %r10
|
||||||
|
; WIN64: movq %rax, %r11
|
||||||
|
; WIN64: movq %rax, %r12
|
||||||
|
; WIN64: movq %rax, %r14
|
||||||
|
; WIN64: movq %rax, %r15
|
||||||
|
; WIN64: callq test_argv64i1
|
||||||
|
|
||||||
|
; LINUXOSX64-LABEL: caller_argv64i1:
|
||||||
|
; LINUXOSX64: movabsq $4294967298, %rax
|
||||||
|
; LINUXOSX64: movq %rax, %rcx
|
||||||
|
; LINUXOSX64: movq %rax, %rdx
|
||||||
|
; LINUXOSX64: movq %rax, %rdi
|
||||||
|
; LINUXOSX64: movq %rax, %rsi
|
||||||
|
; LINUXOSX64: movq %rax, %r8
|
||||||
|
; LINUXOSX64: movq %rax, %r9
|
||||||
|
; LINUXOSX64: movq %rax, %r12
|
||||||
|
; LINUXOSX64: movq %rax, %r13
|
||||||
|
; LINUXOSX64: movq %rax, %r14
|
||||||
|
; LINUXOSX64: movq %rax, %r15
|
||||||
|
; LINUXOSX64: call{{.*}} test_argv64i1
|
||||||
|
|
||||||
|
; Test regcall when passing arguments of v64i1 type
|
||||||
|
define x86_regcallcc i64 @caller_argv64i1() #0 {
|
||||||
|
entry:
|
||||||
|
%v0 = bitcast i64 4294967298 to <64 x i1>
|
||||||
|
%call = call x86_regcallcc i64 @test_argv64i1(<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
|
||||||
|
<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
|
||||||
|
<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
|
||||||
|
<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
|
||||||
|
<64 x i1> %v0)
|
||||||
|
ret i64 %call
|
||||||
|
}
|
||||||
|
|
||||||
|
; X32-LABEL: test_retv64i1:
|
||||||
|
; X32: mov{{.*}} $2, %eax
|
||||||
|
; X32: mov{{.*}} $1, %ecx
|
||||||
|
; X32: ret{{.*}}
|
||||||
|
|
||||||
|
; WIN64-LABEL: test_retv64i1:
|
||||||
|
; WIN64: mov{{.*}} $4294967298, %rax
|
||||||
|
; WIN64: ret{{.*}}
|
||||||
|
|
||||||
|
; Test regcall when returning v64i1 type
|
||||||
|
define x86_regcallcc <64 x i1> @test_retv64i1() {
|
||||||
|
%a = bitcast i64 4294967298 to <64 x i1>
|
||||||
|
ret <64 x i1> %a
|
||||||
|
}
|
||||||
|
|
||||||
|
; X32-LABEL: caller_retv64i1:
|
||||||
|
; X32: call{{.*}} _test_retv64i1
|
||||||
|
; X32: kmov{{.*}} %eax, %k0
|
||||||
|
; X32: kmov{{.*}} %ecx, %k1
|
||||||
|
; X32: kunpckdq %k0, %k1, %k0
|
||||||
|
|
||||||
|
; Test regcall when processing result of v64i1 type
|
||||||
|
define x86_regcallcc <64 x i1> @caller_retv64i1() #0 {
|
||||||
|
entry:
|
||||||
|
%call = call x86_regcallcc <64 x i1> @test_retv64i1()
|
||||||
|
ret <64 x i1> %call
|
||||||
|
}
|
Loading…
Reference in New Issue