forked from OSchip/llvm-project
880 lines
34 KiB
C++
880 lines
34 KiB
C++
//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements the interfaces that VE uses to lower LLVM code into a
|
|
// selection DAG.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "VEISelLowering.h"
|
|
#include "MCTargetDesc/VEMCExpr.h"
|
|
#include "VEMachineFunctionInfo.h"
|
|
#include "VERegisterInfo.h"
|
|
#include "VETargetMachine.h"
|
|
#include "llvm/ADT/StringSwitch.h"
|
|
#include "llvm/CodeGen/CallingConvLower.h"
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
#include "llvm/CodeGen/MachineModuleInfo.h"
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
#include "llvm/CodeGen/SelectionDAG.h"
|
|
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/IR/Module.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
#include "llvm/Support/KnownBits.h"
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "ve-lower"
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Calling Convention Implementation
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT,
|
|
CCValAssign::LocInfo LocInfo,
|
|
ISD::ArgFlagsTy ArgFlags, CCState &State) {
|
|
switch (LocVT.SimpleTy) {
|
|
case MVT::f32: {
|
|
// Allocate stack like below
|
|
// 0 4
|
|
// +------+------+
|
|
// | empty| float|
|
|
// +------+------+
|
|
// Use align=8 for dummy area to align the beginning of these 2 area.
|
|
State.AllocateStack(4, 8); // for empty area
|
|
// Use align=4 for value to place it at just after the dummy area.
|
|
unsigned Offset = State.AllocateStack(4, 4); // for float value area
|
|
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
|
|
return true;
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
#include "VEGenCallingConv.inc"
|
|
|
|
bool VETargetLowering::CanLowerReturn(
|
|
CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
|
|
CCAssignFn *RetCC = RetCC_VE;
|
|
SmallVector<CCValAssign, 16> RVLocs;
|
|
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
|
|
return CCInfo.CheckReturn(Outs, RetCC);
|
|
}
|
|
|
|
SDValue
|
|
VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
|
bool IsVarArg,
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
const SmallVectorImpl<SDValue> &OutVals,
|
|
const SDLoc &DL, SelectionDAG &DAG) const {
|
|
// CCValAssign - represent the assignment of the return value to locations.
|
|
SmallVector<CCValAssign, 16> RVLocs;
|
|
|
|
// CCState - Info about the registers and stack slot.
|
|
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
|
|
*DAG.getContext());
|
|
|
|
// Analyze return values.
|
|
CCInfo.AnalyzeReturn(Outs, RetCC_VE);
|
|
|
|
SDValue Flag;
|
|
SmallVector<SDValue, 4> RetOps(1, Chain);
|
|
|
|
// Copy the result values into the output registers.
|
|
for (unsigned i = 0; i != RVLocs.size(); ++i) {
|
|
CCValAssign &VA = RVLocs[i];
|
|
assert(VA.isRegLoc() && "Can only return in registers!");
|
|
SDValue OutVal = OutVals[i];
|
|
|
|
// Integer return values must be sign or zero extended by the callee.
|
|
switch (VA.getLocInfo()) {
|
|
case CCValAssign::Full:
|
|
break;
|
|
case CCValAssign::SExt:
|
|
OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
|
|
break;
|
|
case CCValAssign::ZExt:
|
|
OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
|
|
break;
|
|
case CCValAssign::AExt:
|
|
OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
|
|
break;
|
|
default:
|
|
llvm_unreachable("Unknown loc info!");
|
|
}
|
|
|
|
assert(!VA.needsCustom() && "Unexpected custom lowering");
|
|
|
|
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
|
|
|
|
// Guarantee that all emitted copies are stuck together with flags.
|
|
Flag = Chain.getValue(1);
|
|
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
|
|
}
|
|
|
|
RetOps[0] = Chain; // Update chain.
|
|
|
|
// Add the flag if we have it.
|
|
if (Flag.getNode())
|
|
RetOps.push_back(Flag);
|
|
|
|
return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
|
|
}
|
|
|
|
SDValue VETargetLowering::LowerFormalArguments(
|
|
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
|
|
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
|
|
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
// Get the base offset of the incoming arguments stack space.
|
|
unsigned ArgsBaseOffset = 176;
|
|
// Get the size of the preserved arguments area
|
|
unsigned ArgsPreserved = 64;
|
|
|
|
// Analyze arguments according to CC_VE.
|
|
SmallVector<CCValAssign, 16> ArgLocs;
|
|
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
|
|
*DAG.getContext());
|
|
// Allocate the preserved area first.
|
|
CCInfo.AllocateStack(ArgsPreserved, 8);
|
|
// We already allocated the preserved area, so the stack offset computed
|
|
// by CC_VE would be correct now.
|
|
CCInfo.AnalyzeFormalArguments(Ins, CC_VE);
|
|
|
|
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
|
CCValAssign &VA = ArgLocs[i];
|
|
if (VA.isRegLoc()) {
|
|
// This argument is passed in a register.
|
|
// All integer register arguments are promoted by the caller to i64.
|
|
|
|
// Create a virtual register for the promoted live-in value.
|
|
unsigned VReg =
|
|
MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
|
|
SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
|
|
|
|
// Get the high bits for i32 struct elements.
|
|
if (VA.getValVT() == MVT::i32 && VA.needsCustom())
|
|
Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
|
|
DAG.getConstant(32, DL, MVT::i32));
|
|
|
|
// The caller promoted the argument, so insert an Assert?ext SDNode so we
|
|
// won't promote the value again in this function.
|
|
switch (VA.getLocInfo()) {
|
|
case CCValAssign::SExt:
|
|
Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
|
|
DAG.getValueType(VA.getValVT()));
|
|
break;
|
|
case CCValAssign::ZExt:
|
|
Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
|
|
DAG.getValueType(VA.getValVT()));
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
// Truncate the register down to the argument type.
|
|
if (VA.isExtInLoc())
|
|
Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
|
|
|
|
InVals.push_back(Arg);
|
|
continue;
|
|
}
|
|
|
|
// The registers are exhausted. This argument was passed on the stack.
|
|
assert(VA.isMemLoc());
|
|
// The CC_VE_Full/Half functions compute stack offsets relative to the
|
|
// beginning of the arguments area at %fp+176.
|
|
unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
|
|
unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
|
|
int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
|
|
InVals.push_back(
|
|
DAG.getLoad(VA.getValVT(), DL, Chain,
|
|
DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
|
|
MachinePointerInfo::getFixedStack(MF, FI)));
|
|
}
|
|
|
|
if (!IsVarArg)
|
|
return Chain;
|
|
|
|
// This function takes variable arguments, some of which may have been passed
|
|
// in registers %s0-%s8.
|
|
//
|
|
// The va_start intrinsic needs to know the offset to the first variable
|
|
// argument.
|
|
// TODO: need to calculate offset correctly once we support f128.
|
|
unsigned ArgOffset = ArgLocs.size() * 8;
|
|
VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
|
|
// Skip the 176 bytes of register save area.
|
|
FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
|
|
|
|
return Chain;
|
|
}
|
|
|
|
// FIXME? Maybe this could be a TableGen attribute on some registers and
|
|
// this table could be generated automatically from RegInfo.
|
|
Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
|
|
const MachineFunction &MF) const {
|
|
Register Reg = StringSwitch<Register>(RegName)
|
|
.Case("sp", VE::SX11) // Stack pointer
|
|
.Case("fp", VE::SX9) // Frame pointer
|
|
.Case("sl", VE::SX8) // Stack limit
|
|
.Case("lr", VE::SX10) // Link regsiter
|
|
.Case("tp", VE::SX14) // Thread pointer
|
|
.Case("outer", VE::SX12) // Outer regiser
|
|
.Case("info", VE::SX17) // Info area register
|
|
.Case("got", VE::SX15) // Global offset table register
|
|
.Case("plt", VE::SX16) // Procedure linkage table register
|
|
.Default(0);
|
|
|
|
if (Reg)
|
|
return Reg;
|
|
|
|
report_fatal_error("Invalid register name global variable");
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// TargetLowering Implementation
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
|
SmallVectorImpl<SDValue> &InVals) const {
|
|
SelectionDAG &DAG = CLI.DAG;
|
|
SDLoc DL = CLI.DL;
|
|
SDValue Chain = CLI.Chain;
|
|
auto PtrVT = getPointerTy(DAG.getDataLayout());
|
|
|
|
// VE target does not yet support tail call optimization.
|
|
CLI.IsTailCall = false;
|
|
|
|
// Get the base offset of the outgoing arguments stack space.
|
|
unsigned ArgsBaseOffset = 176;
|
|
// Get the size of the preserved arguments area
|
|
unsigned ArgsPreserved = 8 * 8u;
|
|
|
|
// Analyze operands of the call, assigning locations to each operand.
|
|
SmallVector<CCValAssign, 16> ArgLocs;
|
|
CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
|
|
*DAG.getContext());
|
|
// Allocate the preserved area first.
|
|
CCInfo.AllocateStack(ArgsPreserved, 8);
|
|
// We already allocated the preserved area, so the stack offset computed
|
|
// by CC_VE would be correct now.
|
|
CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE);
|
|
|
|
// VE requires to use both register and stack for varargs or no-prototyped
|
|
// functions.
|
|
bool UseBoth = CLI.IsVarArg;
|
|
|
|
// Analyze operands again if it is required to store BOTH.
|
|
SmallVector<CCValAssign, 16> ArgLocs2;
|
|
CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
|
|
ArgLocs2, *DAG.getContext());
|
|
if (UseBoth)
|
|
CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2);
|
|
|
|
// Get the size of the outgoing arguments stack space requirement.
|
|
unsigned ArgsSize = CCInfo.getNextStackOffset();
|
|
|
|
// Keep stack frames 16-byte aligned.
|
|
ArgsSize = alignTo(ArgsSize, 16);
|
|
|
|
// Adjust the stack pointer to make room for the arguments.
|
|
// FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
|
|
// with more than 6 arguments.
|
|
Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
|
|
|
|
// Collect the set of registers to pass to the function and their values.
|
|
// This will be emitted as a sequence of CopyToReg nodes glued to the call
|
|
// instruction.
|
|
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
|
|
|
|
// Collect chains from all the memory opeations that copy arguments to the
|
|
// stack. They must follow the stack pointer adjustment above and precede the
|
|
// call instruction itself.
|
|
SmallVector<SDValue, 8> MemOpChains;
|
|
|
|
// VE needs to get address of callee function in a register
|
|
// So, prepare to copy it to SX12 here.
|
|
|
|
// If the callee is a GlobalAddress node (quite common, every direct call is)
|
|
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
|
|
// Likewise ExternalSymbol -> TargetExternalSymbol.
|
|
SDValue Callee = CLI.Callee;
|
|
|
|
bool IsPICCall = isPositionIndependent();
|
|
|
|
// PC-relative references to external symbols should go through $stub.
|
|
// If so, we need to prepare GlobalBaseReg first.
|
|
const TargetMachine &TM = DAG.getTarget();
|
|
const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
|
|
const GlobalValue *GV = nullptr;
|
|
auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
|
|
if (CalleeG)
|
|
GV = CalleeG->getGlobal();
|
|
bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
|
|
bool UsePlt = !Local;
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
// Turn GlobalAddress/ExternalSymbol node into a value node
|
|
// containing the address of them here.
|
|
if (CalleeG) {
|
|
if (IsPICCall) {
|
|
if (UsePlt)
|
|
Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
|
|
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
|
|
Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
|
|
} else {
|
|
Callee =
|
|
makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
|
|
}
|
|
} else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
|
|
if (IsPICCall) {
|
|
if (UsePlt)
|
|
Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
|
|
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
|
|
Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
|
|
} else {
|
|
Callee =
|
|
makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
|
|
}
|
|
}
|
|
|
|
RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
|
|
|
|
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
|
CCValAssign &VA = ArgLocs[i];
|
|
SDValue Arg = CLI.OutVals[i];
|
|
|
|
// Promote the value if needed.
|
|
switch (VA.getLocInfo()) {
|
|
default:
|
|
llvm_unreachable("Unknown location info!");
|
|
case CCValAssign::Full:
|
|
break;
|
|
case CCValAssign::SExt:
|
|
Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
|
|
break;
|
|
case CCValAssign::ZExt:
|
|
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
|
|
break;
|
|
case CCValAssign::AExt:
|
|
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
|
|
break;
|
|
}
|
|
|
|
if (VA.isRegLoc()) {
|
|
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
|
if (!UseBoth)
|
|
continue;
|
|
VA = ArgLocs2[i];
|
|
}
|
|
|
|
assert(VA.isMemLoc());
|
|
|
|
// Create a store off the stack pointer for this argument.
|
|
SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
|
|
// The argument area starts at %fp+176 in the callee frame,
|
|
// %sp+176 in ours.
|
|
SDValue PtrOff =
|
|
DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
|
|
PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
|
|
MemOpChains.push_back(
|
|
DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
|
|
}
|
|
|
|
// Emit all stores, make sure they occur before the call.
|
|
if (!MemOpChains.empty())
|
|
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
|
|
|
|
// Build a sequence of CopyToReg nodes glued together with token chain and
|
|
// glue operands which copy the outgoing args into registers. The InGlue is
|
|
// necessary since all emitted instructions must be stuck together in order
|
|
// to pass the live physical registers.
|
|
SDValue InGlue;
|
|
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
|
|
Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
|
|
RegsToPass[i].second, InGlue);
|
|
InGlue = Chain.getValue(1);
|
|
}
|
|
|
|
// Build the operands for the call instruction itself.
|
|
SmallVector<SDValue, 8> Ops;
|
|
Ops.push_back(Chain);
|
|
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
|
|
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
|
|
RegsToPass[i].second.getValueType()));
|
|
|
|
// Add a register mask operand representing the call-preserved registers.
|
|
const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
|
|
const uint32_t *Mask =
|
|
TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
|
|
assert(Mask && "Missing call preserved mask for calling convention");
|
|
Ops.push_back(DAG.getRegisterMask(Mask));
|
|
|
|
// Make sure the CopyToReg nodes are glued to the call instruction which
|
|
// consumes the registers.
|
|
if (InGlue.getNode())
|
|
Ops.push_back(InGlue);
|
|
|
|
// Now the call itself.
|
|
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
|
|
Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
|
|
InGlue = Chain.getValue(1);
|
|
|
|
// Revert the stack pointer immediately after the call.
|
|
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
|
|
DAG.getIntPtrConstant(0, DL, true), InGlue, DL);
|
|
InGlue = Chain.getValue(1);
|
|
|
|
// Now extract the return values. This is more or less the same as
|
|
// LowerFormalArguments.
|
|
|
|
// Assign locations to each value returned by this call.
|
|
SmallVector<CCValAssign, 16> RVLocs;
|
|
CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
|
|
*DAG.getContext());
|
|
|
|
// Set inreg flag manually for codegen generated library calls that
|
|
// return float.
|
|
if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CS)
|
|
CLI.Ins[0].Flags.setInReg();
|
|
|
|
RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE);
|
|
|
|
// Copy all of the result registers out of their specified physreg.
|
|
for (unsigned i = 0; i != RVLocs.size(); ++i) {
|
|
CCValAssign &VA = RVLocs[i];
|
|
unsigned Reg = VA.getLocReg();
|
|
|
|
// When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
|
|
// reside in the same register in the high and low bits. Reuse the
|
|
// CopyFromReg previous node to avoid duplicate copies.
|
|
SDValue RV;
|
|
if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
|
|
if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
|
|
RV = Chain.getValue(0);
|
|
|
|
// But usually we'll create a new CopyFromReg for a different register.
|
|
if (!RV.getNode()) {
|
|
RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
|
|
Chain = RV.getValue(1);
|
|
InGlue = Chain.getValue(2);
|
|
}
|
|
|
|
// Get the high bits for i32 struct elements.
|
|
if (VA.getValVT() == MVT::i32 && VA.needsCustom())
|
|
RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
|
|
DAG.getConstant(32, DL, MVT::i32));
|
|
|
|
// The callee promoted the return value, so insert an Assert?ext SDNode so
|
|
// we won't promote the value again in this function.
|
|
switch (VA.getLocInfo()) {
|
|
case CCValAssign::SExt:
|
|
RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
|
|
DAG.getValueType(VA.getValVT()));
|
|
break;
|
|
case CCValAssign::ZExt:
|
|
RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
|
|
DAG.getValueType(VA.getValVT()));
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
// Truncate the register down to the return value type.
|
|
if (VA.isExtInLoc())
|
|
RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
|
|
|
|
InVals.push_back(RV);
|
|
}
|
|
|
|
return Chain;
|
|
}
|
|
|
|
/// isFPImmLegal - Returns true if the target can instruction select the
|
|
/// specified FP immediate natively. If false, the legalizer will
|
|
/// materialize the FP immediate as a load from a constant pool.
|
|
bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
|
|
bool ForCodeSize) const {
|
|
return VT == MVT::f32 || VT == MVT::f64;
|
|
}
|
|
|
|
/// Determine if the target supports unaligned memory accesses.
|
|
///
|
|
/// This function returns true if the target allows unaligned memory accesses
|
|
/// of the specified type in the given address space. If true, it also returns
|
|
/// whether the unaligned memory access is "fast" in the last argument by
|
|
/// reference. This is used, for example, in situations where an array
|
|
/// copy/move/set is converted to a sequence of store operations. Its use
|
|
/// helps to ensure that such replacements don't generate code that causes an
|
|
/// alignment error (trap) on the target machine.
|
|
bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
|
unsigned AddrSpace,
|
|
unsigned Align,
|
|
MachineMemOperand::Flags,
|
|
bool *Fast) const {
|
|
if (Fast) {
|
|
// It's fast anytime on VE
|
|
*Fast = true;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
VETargetLowering::VETargetLowering(const TargetMachine &TM,
|
|
const VESubtarget &STI)
|
|
: TargetLowering(TM), Subtarget(&STI) {
|
|
// Instructions which use registers as conditionals examine all the
|
|
// bits (as does the pseudo SELECT_CC expansion). I don't think it
|
|
// matters much whether it's ZeroOrOneBooleanContent, or
|
|
// ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
|
|
// former.
|
|
setBooleanContents(ZeroOrOneBooleanContent);
|
|
setBooleanVectorContents(ZeroOrOneBooleanContent);
|
|
|
|
// Set up the register classes.
|
|
addRegisterClass(MVT::i32, &VE::I32RegClass);
|
|
addRegisterClass(MVT::i64, &VE::I64RegClass);
|
|
addRegisterClass(MVT::f32, &VE::F32RegClass);
|
|
addRegisterClass(MVT::f64, &VE::I64RegClass);
|
|
|
|
/// Load & Store {
|
|
for (MVT FPVT : MVT::fp_valuetypes()) {
|
|
for (MVT OtherFPVT : MVT::fp_valuetypes()) {
|
|
// Turn FP extload into load/fpextend
|
|
setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
|
|
|
|
// Turn FP truncstore into trunc + store.
|
|
setTruncStoreAction(FPVT, OtherFPVT, Expand);
|
|
}
|
|
}
|
|
|
|
// VE doesn't have i1 sign extending load
|
|
for (MVT VT : MVT::integer_valuetypes()) {
|
|
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
|
|
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
|
|
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
|
|
setTruncStoreAction(VT, MVT::i1, Expand);
|
|
}
|
|
/// } Load & Store
|
|
|
|
// Custom legalize address nodes into LO/HI parts.
|
|
MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
|
|
setOperationAction(ISD::BlockAddress, PtrVT, Custom);
|
|
setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
|
|
setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
|
|
|
|
/// VAARG handling {
|
|
setOperationAction(ISD::VASTART, MVT::Other, Custom);
|
|
// VAARG needs to be lowered to access with 8 bytes alignment.
|
|
setOperationAction(ISD::VAARG, MVT::Other, Custom);
|
|
// Use the default implementation.
|
|
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
|
|
setOperationAction(ISD::VAEND, MVT::Other, Expand);
|
|
/// } VAARG handling
|
|
|
|
/// Int Ops {
|
|
for (MVT IntVT : {MVT::i32, MVT::i64}) {
|
|
// VE has no REM or DIVREM operations.
|
|
setOperationAction(ISD::UREM, IntVT, Expand);
|
|
setOperationAction(ISD::SREM, IntVT, Expand);
|
|
setOperationAction(ISD::SDIVREM, IntVT, Expand);
|
|
setOperationAction(ISD::UDIVREM, IntVT, Expand);
|
|
|
|
setOperationAction(ISD::CTTZ, IntVT, Expand);
|
|
setOperationAction(ISD::ROTL, IntVT, Expand);
|
|
setOperationAction(ISD::ROTR, IntVT, Expand);
|
|
|
|
// Use isel patterns for i32 and i64
|
|
setOperationAction(ISD::BSWAP, IntVT, Legal);
|
|
setOperationAction(ISD::CTLZ, IntVT, Legal);
|
|
setOperationAction(ISD::CTPOP, IntVT, Legal);
|
|
|
|
// Use isel patterns for i64, Promote i32
|
|
LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
|
|
setOperationAction(ISD::BITREVERSE, IntVT, Act);
|
|
}
|
|
/// } Int Ops
|
|
|
|
/// Conversion {
|
|
// VE doesn't have instructions for fp<->uint, so expand them by llvm
|
|
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
|
|
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
|
|
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
|
|
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
|
|
|
|
// fp16 not supported
|
|
for (MVT FPVT : MVT::fp_valuetypes()) {
|
|
setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
|
|
setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
|
|
}
|
|
/// } Conversion
|
|
|
|
setStackPointerRegisterToSaveRestore(VE::SX11);
|
|
|
|
// Set function alignment to 16 bytes
|
|
setMinFunctionAlignment(Align(16));
|
|
|
|
// VE stores all argument by 8 bytes alignment
|
|
setMinStackArgumentAlignment(Align(8));
|
|
|
|
computeRegisterProperties(Subtarget->getRegisterInfo());
|
|
}
|
|
|
|
const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|
#define TARGET_NODE_CASE(NAME) \
|
|
case VEISD::NAME: \
|
|
return "VEISD::" #NAME;
|
|
switch ((VEISD::NodeType)Opcode) {
|
|
case VEISD::FIRST_NUMBER:
|
|
break;
|
|
TARGET_NODE_CASE(Lo)
|
|
TARGET_NODE_CASE(Hi)
|
|
TARGET_NODE_CASE(GETFUNPLT)
|
|
TARGET_NODE_CASE(GETTLSADDR)
|
|
TARGET_NODE_CASE(CALL)
|
|
TARGET_NODE_CASE(RET_FLAG)
|
|
TARGET_NODE_CASE(GLOBAL_BASE_REG)
|
|
}
|
|
#undef TARGET_NODE_CASE
|
|
return nullptr;
|
|
}
|
|
|
|
EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
|
|
EVT VT) const {
|
|
return MVT::i32;
|
|
}
|
|
|
|
// Convert to a target node and set target flags.
|
|
SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
|
|
SelectionDAG &DAG) const {
|
|
if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
|
|
return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
|
|
GA->getValueType(0), GA->getOffset(), TF);
|
|
|
|
if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
|
|
return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
|
|
0, TF);
|
|
|
|
if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
|
|
return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
|
|
TF);
|
|
|
|
llvm_unreachable("Unhandled address SDNode");
|
|
}
|
|
|
|
// Split Op into high and low parts according to HiTF and LoTF.
|
|
// Return an ADD node combining the parts.
|
|
SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
|
|
SelectionDAG &DAG) const {
|
|
SDLoc DL(Op);
|
|
EVT VT = Op.getValueType();
|
|
SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
|
|
SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
|
|
return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
|
|
}
|
|
|
|
// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
|
|
// or ExternalSymbol SDNode.
|
|
SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
|
|
SDLoc DL(Op);
|
|
EVT PtrVT = Op.getValueType();
|
|
|
|
// Handle PIC mode first. VE needs a got load for every variable!
|
|
if (isPositionIndependent()) {
|
|
// GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
|
|
// function has calls.
|
|
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
|
|
MFI.setHasCalls(true);
|
|
auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
|
|
|
|
if (isa<ConstantPoolSDNode>(Op) ||
|
|
(GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
|
|
// Create following instructions for local linkage PIC code.
|
|
// lea %s35, %gotoff_lo(.LCPI0_0)
|
|
// and %s35, %s35, (32)0
|
|
// lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35)
|
|
// adds.l %s35, %s15, %s35 ; %s15 is GOT
|
|
// FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
|
|
SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
|
|
VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
|
|
SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
|
|
return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
|
|
}
|
|
// Create following instructions for not local linkage PIC code.
|
|
// lea %s35, %got_lo(.LCPI0_0)
|
|
// and %s35, %s35, (32)0
|
|
// lea.sl %s35, %got_hi(.LCPI0_0)(%s35)
|
|
// adds.l %s35, %s15, %s35 ; %s15 is GOT
|
|
// ld %s35, (,%s35)
|
|
// FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
|
|
SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
|
|
VEMCExpr::VK_VE_GOT_LO32, DAG);
|
|
SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
|
|
SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
|
|
return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
|
|
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
|
|
}
|
|
|
|
// This is one of the absolute code models.
|
|
switch (getTargetMachine().getCodeModel()) {
|
|
default:
|
|
llvm_unreachable("Unsupported absolute code model");
|
|
case CodeModel::Small:
|
|
case CodeModel::Medium:
|
|
case CodeModel::Large:
|
|
// abs64.
|
|
return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
|
|
}
|
|
}
|
|
|
|
/// Custom Lower {
|
|
|
|
SDValue VETargetLowering::LowerGlobalAddress(SDValue Op,
|
|
SelectionDAG &DAG) const {
|
|
return makeAddress(Op, DAG);
|
|
}
|
|
|
|
SDValue VETargetLowering::LowerBlockAddress(SDValue Op,
|
|
SelectionDAG &DAG) const {
|
|
return makeAddress(Op, DAG);
|
|
}
|
|
|
|
SDValue
|
|
VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
|
|
SelectionDAG &DAG) const {
|
|
SDLoc dl(Op);
|
|
|
|
// Generate the following code:
|
|
// t1: ch,glue = callseq_start t0, 0, 0
|
|
// t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
|
|
// t3: ch,glue = callseq_end t2, 0, 0, t2:2
|
|
// t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
|
|
SDValue Label = withTargetFlags(Op, 0, DAG);
|
|
EVT PtrVT = Op.getValueType();
|
|
|
|
// Lowering the machine isd will make sure everything is in the right
|
|
// location.
|
|
SDValue Chain = DAG.getEntryNode();
|
|
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
|
|
const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
|
|
DAG.getMachineFunction(), CallingConv::C);
|
|
Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl);
|
|
SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
|
|
Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args);
|
|
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true),
|
|
DAG.getIntPtrConstant(0, dl, true),
|
|
Chain.getValue(1), dl);
|
|
Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1));
|
|
|
|
// GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
|
|
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
|
|
MFI.setHasCalls(true);
|
|
|
|
// Also generate code to prepare a GOT register if it is PIC.
|
|
if (isPositionIndependent()) {
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
|
|
}
|
|
|
|
return Chain;
|
|
}
|
|
|
|
SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op,
|
|
SelectionDAG &DAG) const {
|
|
// The current implementation of nld (2.26) doesn't allow local exec model
|
|
// code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
|
|
// generate the general dynamic model code sequence.
|
|
//
|
|
// *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
|
|
return LowerToTLSGeneralDynamicModel(Op, DAG);
|
|
}
|
|
|
|
SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
|
|
auto PtrVT = getPointerTy(DAG.getDataLayout());
|
|
|
|
// Need frame address to find the address of VarArgsFrameIndex.
|
|
MF.getFrameInfo().setFrameAddressIsTaken(true);
|
|
|
|
// vastart just stores the address of the VarArgsFrameIndex slot into the
|
|
// memory location argument.
|
|
SDLoc DL(Op);
|
|
SDValue Offset =
|
|
DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
|
|
DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
|
|
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
|
|
return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
|
|
MachinePointerInfo(SV));
|
|
}
|
|
|
|
SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
|
|
SDNode *Node = Op.getNode();
|
|
EVT VT = Node->getValueType(0);
|
|
SDValue InChain = Node->getOperand(0);
|
|
SDValue VAListPtr = Node->getOperand(1);
|
|
EVT PtrVT = VAListPtr.getValueType();
|
|
const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
|
|
SDLoc DL(Node);
|
|
SDValue VAList =
|
|
DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
|
|
SDValue Chain = VAList.getValue(1);
|
|
SDValue NextPtr;
|
|
|
|
if (VT == MVT::f32) {
|
|
// float --> need special handling like below.
|
|
// 0 4
|
|
// +------+------+
|
|
// | empty| float|
|
|
// +------+------+
|
|
// Increment the pointer, VAList, by 8 to the next vaarg.
|
|
NextPtr =
|
|
DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
|
|
// Then, adjust VAList.
|
|
unsigned InternalOffset = 4;
|
|
VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
|
|
DAG.getConstant(InternalOffset, DL, PtrVT));
|
|
} else {
|
|
// Increment the pointer, VAList, by 8 to the next vaarg.
|
|
NextPtr =
|
|
DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
|
|
}
|
|
|
|
// Store the incremented VAList to the legalized pointer.
|
|
InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
|
|
|
|
// Load the actual argument out of the pointer VAList.
|
|
// We can't count on greater alignment than the word size.
|
|
return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(),
|
|
std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8);
|
|
}
|
|
|
|
SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
|
switch (Op.getOpcode()) {
|
|
default:
|
|
llvm_unreachable("Should not custom lower this!");
|
|
case ISD::BlockAddress:
|
|
return LowerBlockAddress(Op, DAG);
|
|
case ISD::GlobalAddress:
|
|
return LowerGlobalAddress(Op, DAG);
|
|
case ISD::GlobalTLSAddress:
|
|
return LowerGlobalTLSAddress(Op, DAG);
|
|
case ISD::VASTART:
|
|
return LowerVASTART(Op, DAG);
|
|
case ISD::VAARG:
|
|
return LowerVAARG(Op, DAG);
|
|
}
|
|
}
|
|
/// } Custom Lower
|