llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

469 lines
17 KiB
C++

//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines an instruction selector for the RISCV target.
//
//===----------------------------------------------------------------------===//
#include "RISCV.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "RISCVTargetMachine.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "riscv-isel"
// RISCV-specific code to select RISCV machine instructions for
// SelectionDAG operations.
namespace {
class RISCVDAGToDAGISel final : public SelectionDAGISel {
const RISCVSubtarget *Subtarget;
public:
explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine)
: SelectionDAGISel(TargetMachine) {}
StringRef getPassName() const override {
return "RISCV DAG->DAG Pattern Instruction Selection";
}
bool runOnMachineFunction(MachineFunction &MF) override {
Subtarget = &MF.getSubtarget<RISCVSubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
void PostprocessISelDAG() override;
void Select(SDNode *Node) override;
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
bool SelectAddrFI(SDValue Addr, SDValue &Base);
// Include the pieces autogenerated from the target description.
#include "RISCVGenDAGISel.inc"
private:
void doPeepholeLoadStoreADDI();
void doPeepholeGlobalAddiLuiOffset();
void doPeepholeBuildPairF64SplitF64();
};
}
void RISCVDAGToDAGISel::PostprocessISelDAG() {
doPeepholeLoadStoreADDI();
doPeepholeGlobalAddiLuiOffset();
doPeepholeBuildPairF64SplitF64();
}
void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned Opcode = Node->getOpcode();
MVT XLenVT = Subtarget->getXLenVT();
// If we have a custom node, we have already selected
if (Node->isMachineOpcode()) {
LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
Node->setNodeId(-1);
return;
}
// Instruction Selection not handled by the auto-generated tablegen selection
// should be handled here.
EVT VT = Node->getValueType(0);
if (Opcode == ISD::Constant && VT == XLenVT) {
auto *ConstNode = cast<ConstantSDNode>(Node);
// Materialize zero constants as copies from X0. This allows the coalescer
// to propagate these into other instructions.
if (ConstNode->isNullValue()) {
SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
RISCV::X0, XLenVT);
ReplaceNode(Node, New.getNode());
return;
}
}
if (Opcode == ISD::FrameIndex) {
SDLoc DL(Node);
SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT);
int FI = cast<FrameIndexSDNode>(Node)->getIndex();
EVT VT = Node->getValueType(0);
SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm));
return;
}
// Select the default instruction.
SelectCode(Node);
}
bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
switch (ConstraintID) {
case InlineAsm::Constraint_i:
case InlineAsm::Constraint_m:
// We just support simple memory operands that have a single address
// operand and need no special handling.
OutOps.push_back(Op);
return false;
default:
break;
}
return true;
}
bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
if (auto FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
return true;
}
return false;
}
// Detect the pattern lui %hi(global) --> ADDI %lo(global)
// HiLUI LoADDI
static bool detectLuiAddiGlobal(SDNode *Tail, unsigned &Idx, SDValue &LoADDI,
SDValue &HiLUI, GlobalAddressSDNode *&GAlo,
GlobalAddressSDNode *&GAhi) {
// Try to detect the pattern on every operand of the tail instruction.
for (Idx = 0; Idx < Tail->getNumOperands(); Idx++) {
LoADDI = Tail->getOperand(Idx);
// LoADDI should only be used by one instruction (Tail).
if (!LoADDI->isMachineOpcode() ||
!(LoADDI->getMachineOpcode() == RISCV::ADDI) ||
!isa<GlobalAddressSDNode>(LoADDI->getOperand(1)) ||
!LoADDI->hasOneUse())
continue;
// Check for existence of %lo target flag.
GAlo = cast<GlobalAddressSDNode>(LoADDI->getOperand(1));
if (!(GAlo->getTargetFlags() == RISCVII::MO_LO) ||
!(GAlo->getOffset() == 0))
return false;
// Check for existence of %hi target flag.
HiLUI = LoADDI->getOperand(0);
if (!HiLUI->isMachineOpcode() ||
!(HiLUI->getMachineOpcode() == RISCV::LUI) ||
!isa<GlobalAddressSDNode>(HiLUI->getOperand(0)) || !HiLUI->hasOneUse())
return false;
GAhi = cast<GlobalAddressSDNode>(HiLUI->getOperand(0));
if (!(GAhi->getTargetFlags() == RISCVII::MO_HI) ||
!(GAhi->getOffset() == 0))
return false;
return true;
}
return false;
}
static bool matchLuiOffset(SDValue &OffsetLUI, int64_t &Offset) {
if (!OffsetLUI->isMachineOpcode() ||
!(OffsetLUI->getMachineOpcode() == RISCV::LUI) ||
!isa<ConstantSDNode>(OffsetLUI->getOperand(0)))
return false;
Offset = cast<ConstantSDNode>(OffsetLUI->getOperand(0))->getSExtValue();
Offset = Offset << 12;
LLVM_DEBUG(dbgs() << " Detected \" LUI Offset_hi\"\n");
return true;
}
static bool matchAddiLuiOffset(SDValue &OffsetLoADDI, int64_t &Offset) {
// LoADDI should only be used by the tail instruction only.
if (!OffsetLoADDI->isMachineOpcode() ||
!(OffsetLoADDI->getMachineOpcode() == RISCV::ADDI) ||
!isa<ConstantSDNode>(OffsetLoADDI->getOperand(1)) ||
!OffsetLoADDI->hasOneUse())
return false;
int64_t OffLo =
cast<ConstantSDNode>(OffsetLoADDI->getOperand(1))->getZExtValue();
// HiLUI should only be used by the loADDI.
SDValue OffsetHiLUI = (OffsetLoADDI->getOperand(0));
if (!OffsetHiLUI->isMachineOpcode() ||
!(OffsetHiLUI->getMachineOpcode() == RISCV::LUI) ||
!isa<ConstantSDNode>(OffsetHiLUI->getOperand(0)) ||
!OffsetHiLUI->hasOneUse())
return false;
int64_t OffHi =
cast<ConstantSDNode>(OffsetHiLUI->getOperand(0))->getSExtValue();
Offset = (OffHi << 12) + OffLo;
LLVM_DEBUG(dbgs() << " Detected \" ADDI (LUI Offset_hi), Offset_lo\"\n");
return true;
}
static void updateTailInstrUsers(SDNode *Tail, SelectionDAG *CurDAG,
GlobalAddressSDNode *GAhi,
GlobalAddressSDNode *GAlo,
SDValue &GlobalHiLUI, SDValue &GlobalLoADDI,
int64_t Offset) {
// Update the offset in GAhi and GAlo.
SDLoc DL(Tail->getOperand(1));
SDValue GAHiNew = CurDAG->getTargetGlobalAddress(GAhi->getGlobal(), DL,
GlobalHiLUI.getValueType(),
Offset, RISCVII::MO_HI);
SDValue GALoNew = CurDAG->getTargetGlobalAddress(GAlo->getGlobal(), DL,
GlobalLoADDI.getValueType(),
Offset, RISCVII::MO_LO);
CurDAG->UpdateNodeOperands(GlobalHiLUI.getNode(), GAHiNew);
CurDAG->UpdateNodeOperands(GlobalLoADDI.getNode(), GlobalHiLUI, GALoNew);
// Update all uses of the Tail with the GlobalLoADDI. After
// this Tail will be a dead node.
SDValue From = SDValue(Tail, 0);
CurDAG->ReplaceAllUsesOfValuesWith(&From, &GlobalLoADDI, 1);
}
// TODO: This transformation might be better implemeted in a Machine Funtion
// Pass as discussed here: https://reviews.llvm.org/D45748.
//
// Merge the offset of address calculation into the offset field
// of a global address node in a global address lowering sequence ("LUI
// %hi(global) --> add %lo(global)") under the following conditions: 1) The
// offset field in the global address lowering sequence is zero. 2) The lowered
// global address is only used in one node, referred to as "Tail".
// This peephole does the following transformations to merge the offset:
// 1) ADDI (ADDI (LUI %hi(global)) %lo(global)), offset
// --->
// ADDI (LUI %hi(global + offset)) %lo(global + offset).
//
// This generates:
// lui a0, hi (global + offset)
// add a0, a0, lo (global + offset)
// Instead of
// lui a0, hi (global)
// addi a0, hi (global)
// addi a0, offset
// This pattern is for cases when the offset is small enough to fit in the
// immediate filed of ADDI (less than 12 bits).
// 2) ADD ((ADDI (LUI %hi(global)) %lo(global)), (LUI hi_offset))
// --->
// offset = hi_offset << 12
// ADDI (LUI %hi(global + offset)) %lo(global + offset)
// Which generates the ASM:
// lui a0, hi(global + offset)
// addi a0, lo(global + offset)
// Instead of:
// lui a0, hi(global)
// addi a0, lo(global)
// lui a1, (offset)
// add a0, a0, a1
// This pattern is for cases when the offset doesn't fit in an immediate field
// of ADDI but the lower 12 bits are all zeros.
// 3) ADD ((ADDI (LUI %hi(global)) %lo(global)), (ADDI lo_offset, (LUI
// hi_offset)))
// --->
// ADDI (LUI %hi(global + offset)) %lo(global + offset)
// Which generates the ASM:
// lui a1, %hi(global + offhi20<<12 + offlo12)
// addi a1, %lo(global + offhi20<<12 + offlo12)
// Instead of:
// lui a0, hi(global)
// addi a0, lo(global)
// lui a1, (offhi20)
// addi a1, (offlo12)
// add a0, a0, a1
// This pattern is for cases when the offset doesn't fit in an immediate field
// of ADDI and both the lower 1 bits and high 20 bits are non zero.
void RISCVDAGToDAGISel::doPeepholeGlobalAddiLuiOffset() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
SelectionDAG::allnodes_iterator Begin(CurDAG->allnodes_begin());
while (Position != Begin) {
SDNode *Tail = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (Tail->use_empty() || !Tail->isMachineOpcode())
continue;
// The tail instruction can be an ADD or an ADDI.
if (!Tail->isMachineOpcode() || !(Tail->getMachineOpcode() == RISCV::ADD ||
Tail->getMachineOpcode() == RISCV::ADDI))
continue;
// First detect the global address part of pattern:
// (lui %hi(global) --> Addi %lo(global))
unsigned GlobalLoADDiIdx;
SDValue GlobalLoADDI;
SDValue GlobalHiLUI;
GlobalAddressSDNode *GAhi;
GlobalAddressSDNode *GAlo;
if (!detectLuiAddiGlobal(Tail, GlobalLoADDiIdx, GlobalLoADDI, GlobalHiLUI,
GAlo, GAhi))
continue;
LLVM_DEBUG(dbgs() << " Detected \"ADDI LUI %hi(global), %lo(global)\n");
// Detect the offset part for the address calculation by looking at the
// other operand of the tail instruction:
int64_t Offset;
if (Tail->getMachineOpcode() == RISCV::ADD) {
// If the Tail is an ADD instruction, the offset can be in two forms:
// 1) LUI hi_Offset followed by:
// ADDI lo_offset
// This happens in case the offset has non zero bits in
// both hi 20 and lo 12 bits.
// 2) LUI (offset20)
// This happens in case the lower 12 bits of the offset are zeros.
SDValue OffsetVal = Tail->getOperand(1 - GlobalLoADDiIdx);
if (!matchAddiLuiOffset(OffsetVal, Offset) &&
!matchLuiOffset(OffsetVal, Offset))
continue;
} else
// The Tail is an ADDI instruction:
Offset = cast<ConstantSDNode>(Tail->getOperand(1 - GlobalLoADDiIdx))
->getSExtValue();
LLVM_DEBUG(
dbgs()
<< " Fold offset value into global offset of LUI %hi and ADDI %lo\n");
LLVM_DEBUG(dbgs() << "\tTail:");
LLVM_DEBUG(Tail->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\tGlobalHiLUI:");
LLVM_DEBUG(GlobalHiLUI->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\tGlobalLoADDI:");
LLVM_DEBUG(GlobalLoADDI->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\n");
updateTailInstrUsers(Tail, CurDAG, GAhi, GAlo, GlobalHiLUI, GlobalLoADDI,
Offset);
}
CurDAG->RemoveDeadNodes();
}
// Merge an ADDI into the offset of a load/store instruction where possible.
// (load (add base, off), 0) -> (load base, off)
// (store val, (add base, off)) -> (store val, base, off)
void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
while (Position != CurDAG->allnodes_begin()) {
SDNode *N = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (N->use_empty() || !N->isMachineOpcode())
continue;
int OffsetOpIdx;
int BaseOpIdx;
// Only attempt this optimisation for I-type loads and S-type stores.
switch (N->getMachineOpcode()) {
default:
continue;
case RISCV::LB:
case RISCV::LH:
case RISCV::LW:
case RISCV::LBU:
case RISCV::LHU:
case RISCV::LWU:
case RISCV::LD:
case RISCV::FLW:
case RISCV::FLD:
BaseOpIdx = 0;
OffsetOpIdx = 1;
break;
case RISCV::SB:
case RISCV::SH:
case RISCV::SW:
case RISCV::SD:
case RISCV::FSW:
case RISCV::FSD:
BaseOpIdx = 1;
OffsetOpIdx = 2;
break;
}
// Currently, the load/store offset must be 0 to be considered for this
// peephole optimisation.
if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)) ||
N->getConstantOperandVal(OffsetOpIdx) != 0)
continue;
SDValue Base = N->getOperand(BaseOpIdx);
// If the base is an ADDI, we can merge it in to the load/store.
if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI)
continue;
SDValue ImmOperand = Base.getOperand(1);
if (auto Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
ImmOperand = CurDAG->getTargetConstant(
Const->getSExtValue(), SDLoc(ImmOperand), ImmOperand.getValueType());
} else if (auto GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
ImmOperand = CurDAG->getTargetGlobalAddress(
GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
GA->getOffset(), GA->getTargetFlags());
} else {
continue;
}
LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
LLVM_DEBUG(Base->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\nN: ");
LLVM_DEBUG(N->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\n");
// Modify the offset operand of the load/store.
if (BaseOpIdx == 0) // Load
CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
N->getOperand(2));
else // Store
CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
ImmOperand, N->getOperand(3));
// The add-immediate may now be dead, in which case remove it.
if (Base.getNode()->use_empty())
CurDAG->RemoveDeadNode(Base.getNode());
}
}
// Remove redundant BuildPairF64+SplitF64 pairs. i.e. cases where an f64 is
// built of two i32 values, only to be split apart again. This must be done
// here as a peephole optimisation as the DAG has not been fully legalized at
// the point BuildPairF64/SplitF64 nodes are created in RISCVISelLowering, so
// some nodes would not yet have been replaced with libcalls.
void RISCVDAGToDAGISel::doPeepholeBuildPairF64SplitF64() {
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
++Position;
while (Position != CurDAG->allnodes_begin()) {
SDNode *N = &*--Position;
// Skip dead nodes and any nodes other than SplitF64Pseudo.
if (N->use_empty() || !N->isMachineOpcode() ||
!(N->getMachineOpcode() == RISCV::SplitF64Pseudo))
continue;
// If the operand to SplitF64 is a BuildPairF64, the split operation is
// redundant. Just use the operands to BuildPairF64 as the result.
SDValue F64Val = N->getOperand(0);
if (F64Val.isMachineOpcode() &&
F64Val.getMachineOpcode() == RISCV::BuildPairF64Pseudo) {
LLVM_DEBUG(
dbgs() << "Removing redundant SplitF64Pseudo and replacing uses "
"with BuildPairF64Pseudo operands:\n");
LLVM_DEBUG(dbgs() << "N: ");
LLVM_DEBUG(N->dump(CurDAG));
LLVM_DEBUG(dbgs() << "F64Val: ");
LLVM_DEBUG(F64Val->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\n");
SDValue From[] = {SDValue(N, 0), SDValue(N, 1)};
SDValue To[] = {F64Val.getOperand(0), F64Val.getOperand(1)};
CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2);
}
}
CurDAG->RemoveDeadNodes();
}
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
// for instruction scheduling.
FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) {
return new RISCVDAGToDAGISel(TM);
}