forked from OSchip/llvm-project
469 lines
17 KiB
C++
469 lines
17 KiB
C++
//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines an instruction selector for the RISCV target.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "RISCV.h"
|
|
#include "MCTargetDesc/RISCVMCTargetDesc.h"
|
|
#include "RISCVTargetMachine.h"
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
#include "llvm/CodeGen/SelectionDAGISel.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/MathExtras.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "riscv-isel"
|
|
|
|
// RISCV-specific code to select RISCV machine instructions for
|
|
// SelectionDAG operations.
|
|
namespace {
|
|
class RISCVDAGToDAGISel final : public SelectionDAGISel {
|
|
const RISCVSubtarget *Subtarget;
|
|
|
|
public:
|
|
explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine)
|
|
: SelectionDAGISel(TargetMachine) {}
|
|
|
|
StringRef getPassName() const override {
|
|
return "RISCV DAG->DAG Pattern Instruction Selection";
|
|
}
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override {
|
|
Subtarget = &MF.getSubtarget<RISCVSubtarget>();
|
|
return SelectionDAGISel::runOnMachineFunction(MF);
|
|
}
|
|
|
|
void PostprocessISelDAG() override;
|
|
|
|
void Select(SDNode *Node) override;
|
|
|
|
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
|
|
std::vector<SDValue> &OutOps) override;
|
|
|
|
bool SelectAddrFI(SDValue Addr, SDValue &Base);
|
|
|
|
// Include the pieces autogenerated from the target description.
|
|
#include "RISCVGenDAGISel.inc"
|
|
|
|
private:
|
|
void doPeepholeLoadStoreADDI();
|
|
void doPeepholeGlobalAddiLuiOffset();
|
|
void doPeepholeBuildPairF64SplitF64();
|
|
};
|
|
}
|
|
|
|
void RISCVDAGToDAGISel::PostprocessISelDAG() {
|
|
doPeepholeLoadStoreADDI();
|
|
doPeepholeGlobalAddiLuiOffset();
|
|
doPeepholeBuildPairF64SplitF64();
|
|
}
|
|
|
|
void RISCVDAGToDAGISel::Select(SDNode *Node) {
|
|
unsigned Opcode = Node->getOpcode();
|
|
MVT XLenVT = Subtarget->getXLenVT();
|
|
|
|
// If we have a custom node, we have already selected
|
|
if (Node->isMachineOpcode()) {
|
|
LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
|
|
Node->setNodeId(-1);
|
|
return;
|
|
}
|
|
|
|
// Instruction Selection not handled by the auto-generated tablegen selection
|
|
// should be handled here.
|
|
EVT VT = Node->getValueType(0);
|
|
if (Opcode == ISD::Constant && VT == XLenVT) {
|
|
auto *ConstNode = cast<ConstantSDNode>(Node);
|
|
// Materialize zero constants as copies from X0. This allows the coalescer
|
|
// to propagate these into other instructions.
|
|
if (ConstNode->isNullValue()) {
|
|
SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
|
|
RISCV::X0, XLenVT);
|
|
ReplaceNode(Node, New.getNode());
|
|
return;
|
|
}
|
|
}
|
|
if (Opcode == ISD::FrameIndex) {
|
|
SDLoc DL(Node);
|
|
SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT);
|
|
int FI = cast<FrameIndexSDNode>(Node)->getIndex();
|
|
EVT VT = Node->getValueType(0);
|
|
SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
|
|
ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm));
|
|
return;
|
|
}
|
|
|
|
// Select the default instruction.
|
|
SelectCode(Node);
|
|
}
|
|
|
|
bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
|
|
const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
|
|
switch (ConstraintID) {
|
|
case InlineAsm::Constraint_i:
|
|
case InlineAsm::Constraint_m:
|
|
// We just support simple memory operands that have a single address
|
|
// operand and need no special handling.
|
|
OutOps.push_back(Op);
|
|
return false;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
|
|
if (auto FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
|
|
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Detect the pattern lui %hi(global) --> ADDI %lo(global)
|
|
// HiLUI LoADDI
|
|
static bool detectLuiAddiGlobal(SDNode *Tail, unsigned &Idx, SDValue &LoADDI,
|
|
SDValue &HiLUI, GlobalAddressSDNode *&GAlo,
|
|
GlobalAddressSDNode *&GAhi) {
|
|
// Try to detect the pattern on every operand of the tail instruction.
|
|
for (Idx = 0; Idx < Tail->getNumOperands(); Idx++) {
|
|
LoADDI = Tail->getOperand(Idx);
|
|
// LoADDI should only be used by one instruction (Tail).
|
|
if (!LoADDI->isMachineOpcode() ||
|
|
!(LoADDI->getMachineOpcode() == RISCV::ADDI) ||
|
|
!isa<GlobalAddressSDNode>(LoADDI->getOperand(1)) ||
|
|
!LoADDI->hasOneUse())
|
|
continue;
|
|
// Check for existence of %lo target flag.
|
|
GAlo = cast<GlobalAddressSDNode>(LoADDI->getOperand(1));
|
|
if (!(GAlo->getTargetFlags() == RISCVII::MO_LO) ||
|
|
!(GAlo->getOffset() == 0))
|
|
return false;
|
|
// Check for existence of %hi target flag.
|
|
HiLUI = LoADDI->getOperand(0);
|
|
if (!HiLUI->isMachineOpcode() ||
|
|
!(HiLUI->getMachineOpcode() == RISCV::LUI) ||
|
|
!isa<GlobalAddressSDNode>(HiLUI->getOperand(0)) || !HiLUI->hasOneUse())
|
|
return false;
|
|
GAhi = cast<GlobalAddressSDNode>(HiLUI->getOperand(0));
|
|
if (!(GAhi->getTargetFlags() == RISCVII::MO_HI) ||
|
|
!(GAhi->getOffset() == 0))
|
|
return false;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool matchLuiOffset(SDValue &OffsetLUI, int64_t &Offset) {
|
|
if (!OffsetLUI->isMachineOpcode() ||
|
|
!(OffsetLUI->getMachineOpcode() == RISCV::LUI) ||
|
|
!isa<ConstantSDNode>(OffsetLUI->getOperand(0)))
|
|
return false;
|
|
Offset = cast<ConstantSDNode>(OffsetLUI->getOperand(0))->getSExtValue();
|
|
Offset = Offset << 12;
|
|
LLVM_DEBUG(dbgs() << " Detected \" LUI Offset_hi\"\n");
|
|
return true;
|
|
}
|
|
|
|
static bool matchAddiLuiOffset(SDValue &OffsetLoADDI, int64_t &Offset) {
|
|
// LoADDI should only be used by the tail instruction only.
|
|
if (!OffsetLoADDI->isMachineOpcode() ||
|
|
!(OffsetLoADDI->getMachineOpcode() == RISCV::ADDI) ||
|
|
!isa<ConstantSDNode>(OffsetLoADDI->getOperand(1)) ||
|
|
!OffsetLoADDI->hasOneUse())
|
|
return false;
|
|
int64_t OffLo =
|
|
cast<ConstantSDNode>(OffsetLoADDI->getOperand(1))->getZExtValue();
|
|
// HiLUI should only be used by the loADDI.
|
|
SDValue OffsetHiLUI = (OffsetLoADDI->getOperand(0));
|
|
if (!OffsetHiLUI->isMachineOpcode() ||
|
|
!(OffsetHiLUI->getMachineOpcode() == RISCV::LUI) ||
|
|
!isa<ConstantSDNode>(OffsetHiLUI->getOperand(0)) ||
|
|
!OffsetHiLUI->hasOneUse())
|
|
return false;
|
|
int64_t OffHi =
|
|
cast<ConstantSDNode>(OffsetHiLUI->getOperand(0))->getSExtValue();
|
|
Offset = (OffHi << 12) + OffLo;
|
|
LLVM_DEBUG(dbgs() << " Detected \" ADDI (LUI Offset_hi), Offset_lo\"\n");
|
|
return true;
|
|
}
|
|
|
|
static void updateTailInstrUsers(SDNode *Tail, SelectionDAG *CurDAG,
|
|
GlobalAddressSDNode *GAhi,
|
|
GlobalAddressSDNode *GAlo,
|
|
SDValue &GlobalHiLUI, SDValue &GlobalLoADDI,
|
|
int64_t Offset) {
|
|
// Update the offset in GAhi and GAlo.
|
|
SDLoc DL(Tail->getOperand(1));
|
|
SDValue GAHiNew = CurDAG->getTargetGlobalAddress(GAhi->getGlobal(), DL,
|
|
GlobalHiLUI.getValueType(),
|
|
Offset, RISCVII::MO_HI);
|
|
SDValue GALoNew = CurDAG->getTargetGlobalAddress(GAlo->getGlobal(), DL,
|
|
GlobalLoADDI.getValueType(),
|
|
Offset, RISCVII::MO_LO);
|
|
CurDAG->UpdateNodeOperands(GlobalHiLUI.getNode(), GAHiNew);
|
|
CurDAG->UpdateNodeOperands(GlobalLoADDI.getNode(), GlobalHiLUI, GALoNew);
|
|
// Update all uses of the Tail with the GlobalLoADDI. After
|
|
// this Tail will be a dead node.
|
|
SDValue From = SDValue(Tail, 0);
|
|
CurDAG->ReplaceAllUsesOfValuesWith(&From, &GlobalLoADDI, 1);
|
|
}
|
|
|
|
// TODO: This transformation might be better implemeted in a Machine Funtion
|
|
// Pass as discussed here: https://reviews.llvm.org/D45748.
|
|
//
|
|
// Merge the offset of address calculation into the offset field
|
|
// of a global address node in a global address lowering sequence ("LUI
|
|
// %hi(global) --> add %lo(global)") under the following conditions: 1) The
|
|
// offset field in the global address lowering sequence is zero. 2) The lowered
|
|
// global address is only used in one node, referred to as "Tail".
|
|
|
|
// This peephole does the following transformations to merge the offset:
|
|
|
|
// 1) ADDI (ADDI (LUI %hi(global)) %lo(global)), offset
|
|
// --->
|
|
// ADDI (LUI %hi(global + offset)) %lo(global + offset).
|
|
//
|
|
// This generates:
|
|
// lui a0, hi (global + offset)
|
|
// add a0, a0, lo (global + offset)
|
|
// Instead of
|
|
// lui a0, hi (global)
|
|
// addi a0, hi (global)
|
|
// addi a0, offset
|
|
// This pattern is for cases when the offset is small enough to fit in the
|
|
// immediate filed of ADDI (less than 12 bits).
|
|
|
|
// 2) ADD ((ADDI (LUI %hi(global)) %lo(global)), (LUI hi_offset))
|
|
// --->
|
|
// offset = hi_offset << 12
|
|
// ADDI (LUI %hi(global + offset)) %lo(global + offset)
|
|
|
|
// Which generates the ASM:
|
|
// lui a0, hi(global + offset)
|
|
// addi a0, lo(global + offset)
|
|
// Instead of:
|
|
// lui a0, hi(global)
|
|
// addi a0, lo(global)
|
|
// lui a1, (offset)
|
|
// add a0, a0, a1
|
|
|
|
// This pattern is for cases when the offset doesn't fit in an immediate field
|
|
// of ADDI but the lower 12 bits are all zeros.
|
|
|
|
// 3) ADD ((ADDI (LUI %hi(global)) %lo(global)), (ADDI lo_offset, (LUI
|
|
// hi_offset)))
|
|
// --->
|
|
// ADDI (LUI %hi(global + offset)) %lo(global + offset)
|
|
// Which generates the ASM:
|
|
// lui a1, %hi(global + offhi20<<12 + offlo12)
|
|
// addi a1, %lo(global + offhi20<<12 + offlo12)
|
|
// Instead of:
|
|
// lui a0, hi(global)
|
|
// addi a0, lo(global)
|
|
// lui a1, (offhi20)
|
|
// addi a1, (offlo12)
|
|
// add a0, a0, a1
|
|
// This pattern is for cases when the offset doesn't fit in an immediate field
|
|
// of ADDI and both the lower 1 bits and high 20 bits are non zero.
|
|
void RISCVDAGToDAGISel::doPeepholeGlobalAddiLuiOffset() {
|
|
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
|
|
++Position;
|
|
SelectionDAG::allnodes_iterator Begin(CurDAG->allnodes_begin());
|
|
while (Position != Begin) {
|
|
SDNode *Tail = &*--Position;
|
|
// Skip dead nodes and any non-machine opcodes.
|
|
if (Tail->use_empty() || !Tail->isMachineOpcode())
|
|
continue;
|
|
// The tail instruction can be an ADD or an ADDI.
|
|
if (!Tail->isMachineOpcode() || !(Tail->getMachineOpcode() == RISCV::ADD ||
|
|
Tail->getMachineOpcode() == RISCV::ADDI))
|
|
continue;
|
|
// First detect the global address part of pattern:
|
|
// (lui %hi(global) --> Addi %lo(global))
|
|
unsigned GlobalLoADDiIdx;
|
|
SDValue GlobalLoADDI;
|
|
SDValue GlobalHiLUI;
|
|
GlobalAddressSDNode *GAhi;
|
|
GlobalAddressSDNode *GAlo;
|
|
if (!detectLuiAddiGlobal(Tail, GlobalLoADDiIdx, GlobalLoADDI, GlobalHiLUI,
|
|
GAlo, GAhi))
|
|
continue;
|
|
LLVM_DEBUG(dbgs() << " Detected \"ADDI LUI %hi(global), %lo(global)\n");
|
|
// Detect the offset part for the address calculation by looking at the
|
|
// other operand of the tail instruction:
|
|
int64_t Offset;
|
|
if (Tail->getMachineOpcode() == RISCV::ADD) {
|
|
// If the Tail is an ADD instruction, the offset can be in two forms:
|
|
// 1) LUI hi_Offset followed by:
|
|
// ADDI lo_offset
|
|
// This happens in case the offset has non zero bits in
|
|
// both hi 20 and lo 12 bits.
|
|
// 2) LUI (offset20)
|
|
// This happens in case the lower 12 bits of the offset are zeros.
|
|
SDValue OffsetVal = Tail->getOperand(1 - GlobalLoADDiIdx);
|
|
if (!matchAddiLuiOffset(OffsetVal, Offset) &&
|
|
!matchLuiOffset(OffsetVal, Offset))
|
|
continue;
|
|
} else
|
|
// The Tail is an ADDI instruction:
|
|
Offset = cast<ConstantSDNode>(Tail->getOperand(1 - GlobalLoADDiIdx))
|
|
->getSExtValue();
|
|
|
|
LLVM_DEBUG(
|
|
dbgs()
|
|
<< " Fold offset value into global offset of LUI %hi and ADDI %lo\n");
|
|
LLVM_DEBUG(dbgs() << "\tTail:");
|
|
LLVM_DEBUG(Tail->dump(CurDAG));
|
|
LLVM_DEBUG(dbgs() << "\tGlobalHiLUI:");
|
|
LLVM_DEBUG(GlobalHiLUI->dump(CurDAG));
|
|
LLVM_DEBUG(dbgs() << "\tGlobalLoADDI:");
|
|
LLVM_DEBUG(GlobalLoADDI->dump(CurDAG));
|
|
LLVM_DEBUG(dbgs() << "\n");
|
|
updateTailInstrUsers(Tail, CurDAG, GAhi, GAlo, GlobalHiLUI, GlobalLoADDI,
|
|
Offset);
|
|
}
|
|
CurDAG->RemoveDeadNodes();
|
|
}
|
|
|
|
// Merge an ADDI into the offset of a load/store instruction where possible.
|
|
// (load (add base, off), 0) -> (load base, off)
|
|
// (store val, (add base, off)) -> (store val, base, off)
|
|
void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
|
|
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
|
|
++Position;
|
|
|
|
while (Position != CurDAG->allnodes_begin()) {
|
|
SDNode *N = &*--Position;
|
|
// Skip dead nodes and any non-machine opcodes.
|
|
if (N->use_empty() || !N->isMachineOpcode())
|
|
continue;
|
|
|
|
int OffsetOpIdx;
|
|
int BaseOpIdx;
|
|
|
|
// Only attempt this optimisation for I-type loads and S-type stores.
|
|
switch (N->getMachineOpcode()) {
|
|
default:
|
|
continue;
|
|
case RISCV::LB:
|
|
case RISCV::LH:
|
|
case RISCV::LW:
|
|
case RISCV::LBU:
|
|
case RISCV::LHU:
|
|
case RISCV::LWU:
|
|
case RISCV::LD:
|
|
case RISCV::FLW:
|
|
case RISCV::FLD:
|
|
BaseOpIdx = 0;
|
|
OffsetOpIdx = 1;
|
|
break;
|
|
case RISCV::SB:
|
|
case RISCV::SH:
|
|
case RISCV::SW:
|
|
case RISCV::SD:
|
|
case RISCV::FSW:
|
|
case RISCV::FSD:
|
|
BaseOpIdx = 1;
|
|
OffsetOpIdx = 2;
|
|
break;
|
|
}
|
|
|
|
// Currently, the load/store offset must be 0 to be considered for this
|
|
// peephole optimisation.
|
|
if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)) ||
|
|
N->getConstantOperandVal(OffsetOpIdx) != 0)
|
|
continue;
|
|
|
|
SDValue Base = N->getOperand(BaseOpIdx);
|
|
|
|
// If the base is an ADDI, we can merge it in to the load/store.
|
|
if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI)
|
|
continue;
|
|
|
|
SDValue ImmOperand = Base.getOperand(1);
|
|
|
|
if (auto Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
|
|
ImmOperand = CurDAG->getTargetConstant(
|
|
Const->getSExtValue(), SDLoc(ImmOperand), ImmOperand.getValueType());
|
|
} else if (auto GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
|
|
ImmOperand = CurDAG->getTargetGlobalAddress(
|
|
GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
|
|
GA->getOffset(), GA->getTargetFlags());
|
|
} else {
|
|
continue;
|
|
}
|
|
|
|
LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
|
|
LLVM_DEBUG(Base->dump(CurDAG));
|
|
LLVM_DEBUG(dbgs() << "\nN: ");
|
|
LLVM_DEBUG(N->dump(CurDAG));
|
|
LLVM_DEBUG(dbgs() << "\n");
|
|
|
|
// Modify the offset operand of the load/store.
|
|
if (BaseOpIdx == 0) // Load
|
|
CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
|
|
N->getOperand(2));
|
|
else // Store
|
|
CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
|
|
ImmOperand, N->getOperand(3));
|
|
|
|
// The add-immediate may now be dead, in which case remove it.
|
|
if (Base.getNode()->use_empty())
|
|
CurDAG->RemoveDeadNode(Base.getNode());
|
|
}
|
|
}
|
|
|
|
// Remove redundant BuildPairF64+SplitF64 pairs. i.e. cases where an f64 is
|
|
// built of two i32 values, only to be split apart again. This must be done
|
|
// here as a peephole optimisation as the DAG has not been fully legalized at
|
|
// the point BuildPairF64/SplitF64 nodes are created in RISCVISelLowering, so
|
|
// some nodes would not yet have been replaced with libcalls.
|
|
void RISCVDAGToDAGISel::doPeepholeBuildPairF64SplitF64() {
|
|
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
|
|
++Position;
|
|
|
|
while (Position != CurDAG->allnodes_begin()) {
|
|
SDNode *N = &*--Position;
|
|
// Skip dead nodes and any nodes other than SplitF64Pseudo.
|
|
if (N->use_empty() || !N->isMachineOpcode() ||
|
|
!(N->getMachineOpcode() == RISCV::SplitF64Pseudo))
|
|
continue;
|
|
|
|
// If the operand to SplitF64 is a BuildPairF64, the split operation is
|
|
// redundant. Just use the operands to BuildPairF64 as the result.
|
|
SDValue F64Val = N->getOperand(0);
|
|
if (F64Val.isMachineOpcode() &&
|
|
F64Val.getMachineOpcode() == RISCV::BuildPairF64Pseudo) {
|
|
LLVM_DEBUG(
|
|
dbgs() << "Removing redundant SplitF64Pseudo and replacing uses "
|
|
"with BuildPairF64Pseudo operands:\n");
|
|
LLVM_DEBUG(dbgs() << "N: ");
|
|
LLVM_DEBUG(N->dump(CurDAG));
|
|
LLVM_DEBUG(dbgs() << "F64Val: ");
|
|
LLVM_DEBUG(F64Val->dump(CurDAG));
|
|
LLVM_DEBUG(dbgs() << "\n");
|
|
SDValue From[] = {SDValue(N, 0), SDValue(N, 1)};
|
|
SDValue To[] = {F64Val.getOperand(0), F64Val.getOperand(1)};
|
|
CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
|
|
}
|
|
}
|
|
CurDAG->RemoveDeadNodes();
|
|
}
|
|
|
|
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
|
|
// for instruction scheduling.
|
|
FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) {
|
|
return new RISCVDAGToDAGISel(TM);
|
|
}
|