forked from OSchip/llvm-project
[RISCV] Split the pseudo instruction splitting pass
Extracts the atomic pseudo-instructions' splitting from `riscv-expand-pseudo` / `RISCVExpandPseudo` into its own pass, `riscv-expand-atomic-pseudo` / `RISCVExpandAtomicPseudo`. This allows for the expansion of atomic operations to continue to happen late (the new pass is added in `addPreEmitPass2`, so those expansions continue to happen in the same place), while the remaining pseudo-instructions can now be expanded earlier and benefit from more optimization passes. The nonatomics pass is now added in `addPreSched2`. Differential Revision: https://reviews.llvm.org/D79635
This commit is contained in:
parent
d1df560231
commit
2cb0644f90
|
@ -19,6 +19,7 @@ add_public_tablegen_target(RISCVCommonTableGen)
|
|||
add_llvm_target(RISCVCodeGen
|
||||
RISCVAsmPrinter.cpp
|
||||
RISCVCallLowering.cpp
|
||||
RISCVExpandAtomicPseudoInsts.cpp
|
||||
RISCVExpandPseudoInsts.cpp
|
||||
RISCVFrameLowering.cpp
|
||||
RISCVInstrInfo.cpp
|
||||
|
|
|
@ -43,6 +43,9 @@ void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &);
|
|||
FunctionPass *createRISCVExpandPseudoPass();
|
||||
void initializeRISCVExpandPseudoPass(PassRegistry &);
|
||||
|
||||
FunctionPass *createRISCVExpandAtomicPseudoPass();
|
||||
void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
|
||||
|
||||
InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
|
||||
RISCVSubtarget &,
|
||||
RISCVRegisterBankInfo &);
|
||||
|
|
|
@ -0,0 +1,615 @@
|
|||
//===-- RISCVExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. ---===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains a pass that expands atomic pseudo instructions into
|
||||
// target instructions. This pass should be run at the last possible moment,
|
||||
// avoiding the possibility for other passes to break the requirements for
|
||||
// forward progress in the LR/SC block.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "RISCV.h"
|
||||
#include "RISCVInstrInfo.h"
|
||||
#include "RISCVTargetMachine.h"
|
||||
|
||||
#include "llvm/CodeGen/LivePhysRegs.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define RISCV_EXPAND_ATOMIC_PSEUDO_NAME \
|
||||
"RISCV atomic pseudo instruction expansion pass"
|
||||
|
||||
namespace {
|
||||
|
||||
class RISCVExpandAtomicPseudo : public MachineFunctionPass {
|
||||
public:
|
||||
const RISCVInstrInfo *TII;
|
||||
static char ID;
|
||||
|
||||
RISCVExpandAtomicPseudo() : MachineFunctionPass(ID) {
|
||||
initializeRISCVExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
StringRef getPassName() const override {
|
||||
return RISCV_EXPAND_ATOMIC_PSEUDO_NAME;
|
||||
}
|
||||
|
||||
private:
|
||||
bool expandMBB(MachineBasicBlock &MBB);
|
||||
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
bool expandAtomicBinOp(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
|
||||
bool IsMasked, int Width,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
AtomicRMWInst::BinOp, bool IsMasked, int Width,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, bool IsMasked,
|
||||
int Width, MachineBasicBlock::iterator &NextMBBI);
|
||||
};
|
||||
|
||||
char RISCVExpandAtomicPseudo::ID = 0;
|
||||
|
||||
bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
|
||||
TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
|
||||
bool Modified = false;
|
||||
for (auto &MBB : MF)
|
||||
Modified |= expandMBB(MBB);
|
||||
return Modified;
|
||||
}
|
||||
|
||||
bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
|
||||
bool Modified = false;
|
||||
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
|
||||
while (MBBI != E) {
|
||||
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
|
||||
Modified |= expandMI(MBB, MBBI, NMBBI);
|
||||
MBBI = NMBBI;
|
||||
}
|
||||
|
||||
return Modified;
|
||||
}
|
||||
|
||||
bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
switch (MBBI->getOpcode()) {
|
||||
case RISCV::PseudoAtomicLoadNand32:
|
||||
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoAtomicLoadNand64:
|
||||
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicSwap32:
|
||||
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicLoadAdd32:
|
||||
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicLoadSub32:
|
||||
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicLoadNand32:
|
||||
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicLoadMax32:
|
||||
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicLoadMin32:
|
||||
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicLoadUMax32:
|
||||
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicLoadUMin32:
|
||||
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoCmpXchg32:
|
||||
return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
|
||||
case RISCV::PseudoCmpXchg64:
|
||||
return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
|
||||
case RISCV::PseudoMaskedCmpXchg32:
|
||||
return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static unsigned getLRForRMW32(AtomicOrdering Ordering) {
|
||||
switch (Ordering) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected AtomicOrdering");
|
||||
case AtomicOrdering::Monotonic:
|
||||
return RISCV::LR_W;
|
||||
case AtomicOrdering::Acquire:
|
||||
return RISCV::LR_W_AQ;
|
||||
case AtomicOrdering::Release:
|
||||
return RISCV::LR_W;
|
||||
case AtomicOrdering::AcquireRelease:
|
||||
return RISCV::LR_W_AQ;
|
||||
case AtomicOrdering::SequentiallyConsistent:
|
||||
return RISCV::LR_W_AQ_RL;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getSCForRMW32(AtomicOrdering Ordering) {
|
||||
switch (Ordering) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected AtomicOrdering");
|
||||
case AtomicOrdering::Monotonic:
|
||||
return RISCV::SC_W;
|
||||
case AtomicOrdering::Acquire:
|
||||
return RISCV::SC_W;
|
||||
case AtomicOrdering::Release:
|
||||
return RISCV::SC_W_RL;
|
||||
case AtomicOrdering::AcquireRelease:
|
||||
return RISCV::SC_W_RL;
|
||||
case AtomicOrdering::SequentiallyConsistent:
|
||||
return RISCV::SC_W_AQ_RL;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getLRForRMW64(AtomicOrdering Ordering) {
|
||||
switch (Ordering) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected AtomicOrdering");
|
||||
case AtomicOrdering::Monotonic:
|
||||
return RISCV::LR_D;
|
||||
case AtomicOrdering::Acquire:
|
||||
return RISCV::LR_D_AQ;
|
||||
case AtomicOrdering::Release:
|
||||
return RISCV::LR_D;
|
||||
case AtomicOrdering::AcquireRelease:
|
||||
return RISCV::LR_D_AQ;
|
||||
case AtomicOrdering::SequentiallyConsistent:
|
||||
return RISCV::LR_D_AQ_RL;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getSCForRMW64(AtomicOrdering Ordering) {
|
||||
switch (Ordering) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected AtomicOrdering");
|
||||
case AtomicOrdering::Monotonic:
|
||||
return RISCV::SC_D;
|
||||
case AtomicOrdering::Acquire:
|
||||
return RISCV::SC_D;
|
||||
case AtomicOrdering::Release:
|
||||
return RISCV::SC_D_RL;
|
||||
case AtomicOrdering::AcquireRelease:
|
||||
return RISCV::SC_D_RL;
|
||||
case AtomicOrdering::SequentiallyConsistent:
|
||||
return RISCV::SC_D_AQ_RL;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getLRForRMW(AtomicOrdering Ordering, int Width) {
|
||||
if (Width == 32)
|
||||
return getLRForRMW32(Ordering);
|
||||
if (Width == 64)
|
||||
return getLRForRMW64(Ordering);
|
||||
llvm_unreachable("Unexpected LR width\n");
|
||||
}
|
||||
|
||||
static unsigned getSCForRMW(AtomicOrdering Ordering, int Width) {
|
||||
if (Width == 32)
|
||||
return getSCForRMW32(Ordering);
|
||||
if (Width == 64)
|
||||
return getSCForRMW64(Ordering);
|
||||
llvm_unreachable("Unexpected SC width\n");
|
||||
}
|
||||
|
||||
static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
|
||||
DebugLoc DL, MachineBasicBlock *ThisMBB,
|
||||
MachineBasicBlock *LoopMBB,
|
||||
MachineBasicBlock *DoneMBB,
|
||||
AtomicRMWInst::BinOp BinOp, int Width) {
|
||||
Register DestReg = MI.getOperand(0).getReg();
|
||||
Register ScratchReg = MI.getOperand(1).getReg();
|
||||
Register AddrReg = MI.getOperand(2).getReg();
|
||||
Register IncrReg = MI.getOperand(3).getReg();
|
||||
AtomicOrdering Ordering =
|
||||
static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
|
||||
|
||||
// .loop:
|
||||
// lr.[w|d] dest, (addr)
|
||||
// binop scratch, dest, val
|
||||
// sc.[w|d] scratch, scratch, (addr)
|
||||
// bnez scratch, loop
|
||||
BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
|
||||
.addReg(AddrReg);
|
||||
switch (BinOp) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected AtomicRMW BinOp");
|
||||
case AtomicRMWInst::Nand:
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
|
||||
.addReg(DestReg)
|
||||
.addReg(IncrReg);
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(-1);
|
||||
break;
|
||||
}
|
||||
BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
|
||||
.addReg(AddrReg)
|
||||
.addReg(ScratchReg);
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
|
||||
.addReg(ScratchReg)
|
||||
.addReg(RISCV::X0)
|
||||
.addMBB(LoopMBB);
|
||||
}
|
||||
|
||||
static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
|
||||
MachineBasicBlock *MBB, Register DestReg,
|
||||
Register OldValReg, Register NewValReg,
|
||||
Register MaskReg, Register ScratchReg) {
|
||||
assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
|
||||
assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
|
||||
assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
|
||||
|
||||
// We select bits from newval and oldval using:
|
||||
// https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
|
||||
// r = oldval ^ ((oldval ^ newval) & masktargetdata);
|
||||
BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
|
||||
.addReg(OldValReg)
|
||||
.addReg(NewValReg);
|
||||
BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(MaskReg);
|
||||
BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
|
||||
.addReg(OldValReg)
|
||||
.addReg(ScratchReg);
|
||||
}
|
||||
|
||||
static void doMaskedAtomicBinOpExpansion(
|
||||
const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
|
||||
MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
|
||||
MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
|
||||
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
|
||||
Register DestReg = MI.getOperand(0).getReg();
|
||||
Register ScratchReg = MI.getOperand(1).getReg();
|
||||
Register AddrReg = MI.getOperand(2).getReg();
|
||||
Register IncrReg = MI.getOperand(3).getReg();
|
||||
Register MaskReg = MI.getOperand(4).getReg();
|
||||
AtomicOrdering Ordering =
|
||||
static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
|
||||
|
||||
// .loop:
|
||||
// lr.w destreg, (alignedaddr)
|
||||
// binop scratch, destreg, incr
|
||||
// xor scratch, destreg, scratch
|
||||
// and scratch, scratch, masktargetdata
|
||||
// xor scratch, destreg, scratch
|
||||
// sc.w scratch, scratch, (alignedaddr)
|
||||
// bnez scratch, loop
|
||||
BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
|
||||
.addReg(AddrReg);
|
||||
switch (BinOp) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected AtomicRMW BinOp");
|
||||
case AtomicRMWInst::Xchg:
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
|
||||
.addReg(IncrReg)
|
||||
.addImm(0);
|
||||
break;
|
||||
case AtomicRMWInst::Add:
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
|
||||
.addReg(DestReg)
|
||||
.addReg(IncrReg);
|
||||
break;
|
||||
case AtomicRMWInst::Sub:
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
|
||||
.addReg(DestReg)
|
||||
.addReg(IncrReg);
|
||||
break;
|
||||
case AtomicRMWInst::Nand:
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
|
||||
.addReg(DestReg)
|
||||
.addReg(IncrReg);
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(-1);
|
||||
break;
|
||||
}
|
||||
|
||||
insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
|
||||
ScratchReg);
|
||||
|
||||
BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
|
||||
.addReg(AddrReg)
|
||||
.addReg(ScratchReg);
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
|
||||
.addReg(ScratchReg)
|
||||
.addReg(RISCV::X0)
|
||||
.addMBB(LoopMBB);
|
||||
}
|
||||
|
||||
bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
|
||||
// Insert new MBBs.
|
||||
MF->insert(++MBB.getIterator(), LoopMBB);
|
||||
MF->insert(++LoopMBB->getIterator(), DoneMBB);
|
||||
|
||||
// Set up successors and transfer remaining instructions to DoneMBB.
|
||||
LoopMBB->addSuccessor(LoopMBB);
|
||||
LoopMBB->addSuccessor(DoneMBB);
|
||||
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
|
||||
DoneMBB->transferSuccessors(&MBB);
|
||||
MBB.addSuccessor(LoopMBB);
|
||||
|
||||
if (!IsMasked)
|
||||
doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
|
||||
else
|
||||
doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
|
||||
Width);
|
||||
|
||||
NextMBBI = MBB.end();
|
||||
MI.eraseFromParent();
|
||||
|
||||
LivePhysRegs LiveRegs;
|
||||
computeAndAddLiveIns(LiveRegs, *LoopMBB);
|
||||
computeAndAddLiveIns(LiveRegs, *DoneMBB);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
|
||||
MachineBasicBlock *MBB, Register ValReg,
|
||||
Register ShamtReg) {
|
||||
BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
|
||||
.addReg(ValReg)
|
||||
.addReg(ShamtReg);
|
||||
BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
|
||||
.addReg(ValReg)
|
||||
.addReg(ShamtReg);
|
||||
}
|
||||
|
||||
bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
assert(IsMasked == true &&
|
||||
"Should only need to expand masked atomic max/min");
|
||||
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
|
||||
|
||||
MachineInstr &MI = *MBBI;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
|
||||
// Insert new MBBs.
|
||||
MF->insert(++MBB.getIterator(), LoopHeadMBB);
|
||||
MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
|
||||
MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
|
||||
MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
|
||||
|
||||
// Set up successors and transfer remaining instructions to DoneMBB.
|
||||
LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
|
||||
LoopHeadMBB->addSuccessor(LoopTailMBB);
|
||||
LoopIfBodyMBB->addSuccessor(LoopTailMBB);
|
||||
LoopTailMBB->addSuccessor(LoopHeadMBB);
|
||||
LoopTailMBB->addSuccessor(DoneMBB);
|
||||
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
|
||||
DoneMBB->transferSuccessors(&MBB);
|
||||
MBB.addSuccessor(LoopHeadMBB);
|
||||
|
||||
Register DestReg = MI.getOperand(0).getReg();
|
||||
Register Scratch1Reg = MI.getOperand(1).getReg();
|
||||
Register Scratch2Reg = MI.getOperand(2).getReg();
|
||||
Register AddrReg = MI.getOperand(3).getReg();
|
||||
Register IncrReg = MI.getOperand(4).getReg();
|
||||
Register MaskReg = MI.getOperand(5).getReg();
|
||||
bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
|
||||
AtomicOrdering Ordering =
|
||||
static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
|
||||
|
||||
//
|
||||
// .loophead:
|
||||
// lr.w destreg, (alignedaddr)
|
||||
// and scratch2, destreg, mask
|
||||
// mv scratch1, destreg
|
||||
// [sext scratch2 if signed min/max]
|
||||
// ifnochangeneeded scratch2, incr, .looptail
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
|
||||
.addReg(AddrReg);
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
|
||||
.addReg(DestReg)
|
||||
.addReg(MaskReg);
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
|
||||
.addReg(DestReg)
|
||||
.addImm(0);
|
||||
|
||||
switch (BinOp) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected AtomicRMW BinOp");
|
||||
case AtomicRMWInst::Max: {
|
||||
insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
|
||||
.addReg(Scratch2Reg)
|
||||
.addReg(IncrReg)
|
||||
.addMBB(LoopTailMBB);
|
||||
break;
|
||||
}
|
||||
case AtomicRMWInst::Min: {
|
||||
insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
|
||||
.addReg(IncrReg)
|
||||
.addReg(Scratch2Reg)
|
||||
.addMBB(LoopTailMBB);
|
||||
break;
|
||||
}
|
||||
case AtomicRMWInst::UMax:
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
|
||||
.addReg(Scratch2Reg)
|
||||
.addReg(IncrReg)
|
||||
.addMBB(LoopTailMBB);
|
||||
break;
|
||||
case AtomicRMWInst::UMin:
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
|
||||
.addReg(IncrReg)
|
||||
.addReg(Scratch2Reg)
|
||||
.addMBB(LoopTailMBB);
|
||||
break;
|
||||
}
|
||||
|
||||
// .loopifbody:
|
||||
// xor scratch1, destreg, incr
|
||||
// and scratch1, scratch1, mask
|
||||
// xor scratch1, destreg, scratch1
|
||||
insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
|
||||
MaskReg, Scratch1Reg);
|
||||
|
||||
// .looptail:
|
||||
// sc.w scratch1, scratch1, (addr)
|
||||
// bnez scratch1, loop
|
||||
BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg)
|
||||
.addReg(AddrReg)
|
||||
.addReg(Scratch1Reg);
|
||||
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
|
||||
.addReg(Scratch1Reg)
|
||||
.addReg(RISCV::X0)
|
||||
.addMBB(LoopHeadMBB);
|
||||
|
||||
NextMBBI = MBB.end();
|
||||
MI.eraseFromParent();
|
||||
|
||||
LivePhysRegs LiveRegs;
|
||||
computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
|
||||
computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
|
||||
computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
|
||||
computeAndAddLiveIns(LiveRegs, *DoneMBB);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
|
||||
int Width, MachineBasicBlock::iterator &NextMBBI) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
|
||||
// Insert new MBBs.
|
||||
MF->insert(++MBB.getIterator(), LoopHeadMBB);
|
||||
MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
|
||||
MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
|
||||
|
||||
// Set up successors and transfer remaining instructions to DoneMBB.
|
||||
LoopHeadMBB->addSuccessor(LoopTailMBB);
|
||||
LoopHeadMBB->addSuccessor(DoneMBB);
|
||||
LoopTailMBB->addSuccessor(DoneMBB);
|
||||
LoopTailMBB->addSuccessor(LoopHeadMBB);
|
||||
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
|
||||
DoneMBB->transferSuccessors(&MBB);
|
||||
MBB.addSuccessor(LoopHeadMBB);
|
||||
|
||||
Register DestReg = MI.getOperand(0).getReg();
|
||||
Register ScratchReg = MI.getOperand(1).getReg();
|
||||
Register AddrReg = MI.getOperand(2).getReg();
|
||||
Register CmpValReg = MI.getOperand(3).getReg();
|
||||
Register NewValReg = MI.getOperand(4).getReg();
|
||||
AtomicOrdering Ordering =
|
||||
static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
|
||||
|
||||
if (!IsMasked) {
|
||||
// .loophead:
|
||||
// lr.[w|d] dest, (addr)
|
||||
// bne dest, cmpval, done
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
|
||||
.addReg(AddrReg);
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
|
||||
.addReg(DestReg)
|
||||
.addReg(CmpValReg)
|
||||
.addMBB(DoneMBB);
|
||||
// .looptail:
|
||||
// sc.[w|d] scratch, newval, (addr)
|
||||
// bnez scratch, loophead
|
||||
BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
|
||||
.addReg(AddrReg)
|
||||
.addReg(NewValReg);
|
||||
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
|
||||
.addReg(ScratchReg)
|
||||
.addReg(RISCV::X0)
|
||||
.addMBB(LoopHeadMBB);
|
||||
} else {
|
||||
// .loophead:
|
||||
// lr.w dest, (addr)
|
||||
// and scratch, dest, mask
|
||||
// bne scratch, cmpval, done
|
||||
Register MaskReg = MI.getOperand(5).getReg();
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
|
||||
.addReg(AddrReg);
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
|
||||
.addReg(DestReg)
|
||||
.addReg(MaskReg);
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
|
||||
.addReg(ScratchReg)
|
||||
.addReg(CmpValReg)
|
||||
.addMBB(DoneMBB);
|
||||
|
||||
// .looptail:
|
||||
// xor scratch, dest, newval
|
||||
// and scratch, scratch, mask
|
||||
// xor scratch, dest, scratch
|
||||
// sc.w scratch, scratch, (adrr)
|
||||
// bnez scratch, loophead
|
||||
insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
|
||||
MaskReg, ScratchReg);
|
||||
BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
|
||||
.addReg(AddrReg)
|
||||
.addReg(ScratchReg);
|
||||
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
|
||||
.addReg(ScratchReg)
|
||||
.addReg(RISCV::X0)
|
||||
.addMBB(LoopHeadMBB);
|
||||
}
|
||||
|
||||
NextMBBI = MBB.end();
|
||||
MI.eraseFromParent();
|
||||
|
||||
LivePhysRegs LiveRegs;
|
||||
computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
|
||||
computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
|
||||
computeAndAddLiveIns(LiveRegs, *DoneMBB);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // end of anonymous namespace
|
||||
|
||||
INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",
|
||||
RISCV_EXPAND_ATOMIC_PSEUDO_NAME, false, false)
|
||||
|
||||
namespace llvm {
|
||||
|
||||
FunctionPass *createRISCVExpandAtomicPseudoPass() {
|
||||
return new RISCVExpandAtomicPseudo();
|
||||
}
|
||||
|
||||
} // end of namespace llvm
|
|
@ -43,17 +43,6 @@ private:
|
|||
bool expandMBB(MachineBasicBlock &MBB);
|
||||
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
bool expandAtomicBinOp(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
|
||||
bool IsMasked, int Width,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
AtomicRMWInst::BinOp, bool IsMasked, int Width,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, bool IsMasked,
|
||||
int Width, MachineBasicBlock::iterator &NextMBBI);
|
||||
bool expandAuipcInstPair(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI,
|
||||
|
@ -99,40 +88,6 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
|||
MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
switch (MBBI->getOpcode()) {
|
||||
case RISCV::PseudoAtomicLoadNand32:
|
||||
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoAtomicLoadNand64:
|
||||
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicSwap32:
|
||||
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicLoadAdd32:
|
||||
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicLoadSub32:
|
||||
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicLoadNand32:
|
||||
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicLoadMax32:
|
||||
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicLoadMin32:
|
||||
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicLoadUMax32:
|
||||
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoMaskedAtomicLoadUMin32:
|
||||
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
|
||||
NextMBBI);
|
||||
case RISCV::PseudoCmpXchg32:
|
||||
return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
|
||||
case RISCV::PseudoCmpXchg64:
|
||||
return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
|
||||
case RISCV::PseudoMaskedCmpXchg32:
|
||||
return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
|
||||
case RISCV::PseudoLLA:
|
||||
return expandLoadLocalAddress(MBB, MBBI, NextMBBI);
|
||||
case RISCV::PseudoLA:
|
||||
|
@ -146,481 +101,6 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
|||
return false;
|
||||
}
|
||||
|
||||
static unsigned getLRForRMW32(AtomicOrdering Ordering) {
|
||||
switch (Ordering) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected AtomicOrdering");
|
||||
case AtomicOrdering::Monotonic:
|
||||
return RISCV::LR_W;
|
||||
case AtomicOrdering::Acquire:
|
||||
return RISCV::LR_W_AQ;
|
||||
case AtomicOrdering::Release:
|
||||
return RISCV::LR_W;
|
||||
case AtomicOrdering::AcquireRelease:
|
||||
return RISCV::LR_W_AQ;
|
||||
case AtomicOrdering::SequentiallyConsistent:
|
||||
return RISCV::LR_W_AQ_RL;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getSCForRMW32(AtomicOrdering Ordering) {
|
||||
switch (Ordering) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected AtomicOrdering");
|
||||
case AtomicOrdering::Monotonic:
|
||||
return RISCV::SC_W;
|
||||
case AtomicOrdering::Acquire:
|
||||
return RISCV::SC_W;
|
||||
case AtomicOrdering::Release:
|
||||
return RISCV::SC_W_RL;
|
||||
case AtomicOrdering::AcquireRelease:
|
||||
return RISCV::SC_W_RL;
|
||||
case AtomicOrdering::SequentiallyConsistent:
|
||||
return RISCV::SC_W_AQ_RL;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getLRForRMW64(AtomicOrdering Ordering) {
|
||||
switch (Ordering) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected AtomicOrdering");
|
||||
case AtomicOrdering::Monotonic:
|
||||
return RISCV::LR_D;
|
||||
case AtomicOrdering::Acquire:
|
||||
return RISCV::LR_D_AQ;
|
||||
case AtomicOrdering::Release:
|
||||
return RISCV::LR_D;
|
||||
case AtomicOrdering::AcquireRelease:
|
||||
return RISCV::LR_D_AQ;
|
||||
case AtomicOrdering::SequentiallyConsistent:
|
||||
return RISCV::LR_D_AQ_RL;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getSCForRMW64(AtomicOrdering Ordering) {
|
||||
switch (Ordering) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected AtomicOrdering");
|
||||
case AtomicOrdering::Monotonic:
|
||||
return RISCV::SC_D;
|
||||
case AtomicOrdering::Acquire:
|
||||
return RISCV::SC_D;
|
||||
case AtomicOrdering::Release:
|
||||
return RISCV::SC_D_RL;
|
||||
case AtomicOrdering::AcquireRelease:
|
||||
return RISCV::SC_D_RL;
|
||||
case AtomicOrdering::SequentiallyConsistent:
|
||||
return RISCV::SC_D_AQ_RL;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getLRForRMW(AtomicOrdering Ordering, int Width) {
|
||||
if (Width == 32)
|
||||
return getLRForRMW32(Ordering);
|
||||
if (Width == 64)
|
||||
return getLRForRMW64(Ordering);
|
||||
llvm_unreachable("Unexpected LR width\n");
|
||||
}
|
||||
|
||||
static unsigned getSCForRMW(AtomicOrdering Ordering, int Width) {
|
||||
if (Width == 32)
|
||||
return getSCForRMW32(Ordering);
|
||||
if (Width == 64)
|
||||
return getSCForRMW64(Ordering);
|
||||
llvm_unreachable("Unexpected SC width\n");
|
||||
}
|
||||
|
||||
static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
|
||||
DebugLoc DL, MachineBasicBlock *ThisMBB,
|
||||
MachineBasicBlock *LoopMBB,
|
||||
MachineBasicBlock *DoneMBB,
|
||||
AtomicRMWInst::BinOp BinOp, int Width) {
|
||||
Register DestReg = MI.getOperand(0).getReg();
|
||||
Register ScratchReg = MI.getOperand(1).getReg();
|
||||
Register AddrReg = MI.getOperand(2).getReg();
|
||||
Register IncrReg = MI.getOperand(3).getReg();
|
||||
AtomicOrdering Ordering =
|
||||
static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
|
||||
|
||||
// .loop:
|
||||
// lr.[w|d] dest, (addr)
|
||||
// binop scratch, dest, val
|
||||
// sc.[w|d] scratch, scratch, (addr)
|
||||
// bnez scratch, loop
|
||||
BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
|
||||
.addReg(AddrReg);
|
||||
switch (BinOp) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected AtomicRMW BinOp");
|
||||
case AtomicRMWInst::Nand:
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
|
||||
.addReg(DestReg)
|
||||
.addReg(IncrReg);
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(-1);
|
||||
break;
|
||||
}
|
||||
BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
|
||||
.addReg(AddrReg)
|
||||
.addReg(ScratchReg);
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
|
||||
.addReg(ScratchReg)
|
||||
.addReg(RISCV::X0)
|
||||
.addMBB(LoopMBB);
|
||||
}
|
||||
|
||||
static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
|
||||
MachineBasicBlock *MBB, Register DestReg,
|
||||
Register OldValReg, Register NewValReg,
|
||||
Register MaskReg, Register ScratchReg) {
|
||||
assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
|
||||
assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
|
||||
assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
|
||||
|
||||
// We select bits from newval and oldval using:
|
||||
// https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
|
||||
// r = oldval ^ ((oldval ^ newval) & masktargetdata);
|
||||
BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
|
||||
.addReg(OldValReg)
|
||||
.addReg(NewValReg);
|
||||
BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(MaskReg);
|
||||
BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
|
||||
.addReg(OldValReg)
|
||||
.addReg(ScratchReg);
|
||||
}
|
||||
|
||||
static void doMaskedAtomicBinOpExpansion(
|
||||
const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
|
||||
MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
|
||||
MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
|
||||
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
|
||||
Register DestReg = MI.getOperand(0).getReg();
|
||||
Register ScratchReg = MI.getOperand(1).getReg();
|
||||
Register AddrReg = MI.getOperand(2).getReg();
|
||||
Register IncrReg = MI.getOperand(3).getReg();
|
||||
Register MaskReg = MI.getOperand(4).getReg();
|
||||
AtomicOrdering Ordering =
|
||||
static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
|
||||
|
||||
// .loop:
|
||||
// lr.w destreg, (alignedaddr)
|
||||
// binop scratch, destreg, incr
|
||||
// xor scratch, destreg, scratch
|
||||
// and scratch, scratch, masktargetdata
|
||||
// xor scratch, destreg, scratch
|
||||
// sc.w scratch, scratch, (alignedaddr)
|
||||
// bnez scratch, loop
|
||||
BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
|
||||
.addReg(AddrReg);
|
||||
switch (BinOp) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected AtomicRMW BinOp");
|
||||
case AtomicRMWInst::Xchg:
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
|
||||
.addReg(IncrReg)
|
||||
.addImm(0);
|
||||
break;
|
||||
case AtomicRMWInst::Add:
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
|
||||
.addReg(DestReg)
|
||||
.addReg(IncrReg);
|
||||
break;
|
||||
case AtomicRMWInst::Sub:
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
|
||||
.addReg(DestReg)
|
||||
.addReg(IncrReg);
|
||||
break;
|
||||
case AtomicRMWInst::Nand:
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
|
||||
.addReg(DestReg)
|
||||
.addReg(IncrReg);
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(-1);
|
||||
break;
|
||||
}
|
||||
|
||||
insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
|
||||
ScratchReg);
|
||||
|
||||
BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
|
||||
.addReg(AddrReg)
|
||||
.addReg(ScratchReg);
|
||||
BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
|
||||
.addReg(ScratchReg)
|
||||
.addReg(RISCV::X0)
|
||||
.addMBB(LoopMBB);
|
||||
}
|
||||
|
||||
bool RISCVExpandPseudo::expandAtomicBinOp(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
|
||||
// Insert new MBBs.
|
||||
MF->insert(++MBB.getIterator(), LoopMBB);
|
||||
MF->insert(++LoopMBB->getIterator(), DoneMBB);
|
||||
|
||||
// Set up successors and transfer remaining instructions to DoneMBB.
|
||||
LoopMBB->addSuccessor(LoopMBB);
|
||||
LoopMBB->addSuccessor(DoneMBB);
|
||||
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
|
||||
DoneMBB->transferSuccessors(&MBB);
|
||||
MBB.addSuccessor(LoopMBB);
|
||||
|
||||
if (!IsMasked)
|
||||
doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
|
||||
else
|
||||
doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
|
||||
Width);
|
||||
|
||||
NextMBBI = MBB.end();
|
||||
MI.eraseFromParent();
|
||||
|
||||
LivePhysRegs LiveRegs;
|
||||
computeAndAddLiveIns(LiveRegs, *LoopMBB);
|
||||
computeAndAddLiveIns(LiveRegs, *DoneMBB);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
|
||||
MachineBasicBlock *MBB, Register ValReg,
|
||||
Register ShamtReg) {
|
||||
BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
|
||||
.addReg(ValReg)
|
||||
.addReg(ShamtReg);
|
||||
BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
|
||||
.addReg(ValReg)
|
||||
.addReg(ShamtReg);
|
||||
}
|
||||
|
||||
bool RISCVExpandPseudo::expandAtomicMinMaxOp(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
assert(IsMasked == true &&
|
||||
"Should only need to expand masked atomic max/min");
|
||||
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
|
||||
|
||||
MachineInstr &MI = *MBBI;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
|
||||
// Insert new MBBs.
|
||||
MF->insert(++MBB.getIterator(), LoopHeadMBB);
|
||||
MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
|
||||
MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
|
||||
MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
|
||||
|
||||
// Set up successors and transfer remaining instructions to DoneMBB.
|
||||
LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
|
||||
LoopHeadMBB->addSuccessor(LoopTailMBB);
|
||||
LoopIfBodyMBB->addSuccessor(LoopTailMBB);
|
||||
LoopTailMBB->addSuccessor(LoopHeadMBB);
|
||||
LoopTailMBB->addSuccessor(DoneMBB);
|
||||
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
|
||||
DoneMBB->transferSuccessors(&MBB);
|
||||
MBB.addSuccessor(LoopHeadMBB);
|
||||
|
||||
Register DestReg = MI.getOperand(0).getReg();
|
||||
Register Scratch1Reg = MI.getOperand(1).getReg();
|
||||
Register Scratch2Reg = MI.getOperand(2).getReg();
|
||||
Register AddrReg = MI.getOperand(3).getReg();
|
||||
Register IncrReg = MI.getOperand(4).getReg();
|
||||
Register MaskReg = MI.getOperand(5).getReg();
|
||||
bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
|
||||
AtomicOrdering Ordering =
|
||||
static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
|
||||
|
||||
//
|
||||
// .loophead:
|
||||
// lr.w destreg, (alignedaddr)
|
||||
// and scratch2, destreg, mask
|
||||
// mv scratch1, destreg
|
||||
// [sext scratch2 if signed min/max]
|
||||
// ifnochangeneeded scratch2, incr, .looptail
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
|
||||
.addReg(AddrReg);
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
|
||||
.addReg(DestReg)
|
||||
.addReg(MaskReg);
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
|
||||
.addReg(DestReg)
|
||||
.addImm(0);
|
||||
|
||||
switch (BinOp) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected AtomicRMW BinOp");
|
||||
case AtomicRMWInst::Max: {
|
||||
insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
|
||||
.addReg(Scratch2Reg)
|
||||
.addReg(IncrReg)
|
||||
.addMBB(LoopTailMBB);
|
||||
break;
|
||||
}
|
||||
case AtomicRMWInst::Min: {
|
||||
insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
|
||||
.addReg(IncrReg)
|
||||
.addReg(Scratch2Reg)
|
||||
.addMBB(LoopTailMBB);
|
||||
break;
|
||||
}
|
||||
case AtomicRMWInst::UMax:
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
|
||||
.addReg(Scratch2Reg)
|
||||
.addReg(IncrReg)
|
||||
.addMBB(LoopTailMBB);
|
||||
break;
|
||||
case AtomicRMWInst::UMin:
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
|
||||
.addReg(IncrReg)
|
||||
.addReg(Scratch2Reg)
|
||||
.addMBB(LoopTailMBB);
|
||||
break;
|
||||
}
|
||||
|
||||
// .loopifbody:
|
||||
// xor scratch1, destreg, incr
|
||||
// and scratch1, scratch1, mask
|
||||
// xor scratch1, destreg, scratch1
|
||||
insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
|
||||
MaskReg, Scratch1Reg);
|
||||
|
||||
// .looptail:
|
||||
// sc.w scratch1, scratch1, (addr)
|
||||
// bnez scratch1, loop
|
||||
BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg)
|
||||
.addReg(AddrReg)
|
||||
.addReg(Scratch1Reg);
|
||||
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
|
||||
.addReg(Scratch1Reg)
|
||||
.addReg(RISCV::X0)
|
||||
.addMBB(LoopHeadMBB);
|
||||
|
||||
NextMBBI = MBB.end();
|
||||
MI.eraseFromParent();
|
||||
|
||||
LivePhysRegs LiveRegs;
|
||||
computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
|
||||
computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
|
||||
computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
|
||||
computeAndAddLiveIns(LiveRegs, *DoneMBB);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RISCVExpandPseudo::expandAtomicCmpXchg(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
|
||||
int Width, MachineBasicBlock::iterator &NextMBBI) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
|
||||
// Insert new MBBs.
|
||||
MF->insert(++MBB.getIterator(), LoopHeadMBB);
|
||||
MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
|
||||
MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
|
||||
|
||||
// Set up successors and transfer remaining instructions to DoneMBB.
|
||||
LoopHeadMBB->addSuccessor(LoopTailMBB);
|
||||
LoopHeadMBB->addSuccessor(DoneMBB);
|
||||
LoopTailMBB->addSuccessor(DoneMBB);
|
||||
LoopTailMBB->addSuccessor(LoopHeadMBB);
|
||||
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
|
||||
DoneMBB->transferSuccessors(&MBB);
|
||||
MBB.addSuccessor(LoopHeadMBB);
|
||||
|
||||
Register DestReg = MI.getOperand(0).getReg();
|
||||
Register ScratchReg = MI.getOperand(1).getReg();
|
||||
Register AddrReg = MI.getOperand(2).getReg();
|
||||
Register CmpValReg = MI.getOperand(3).getReg();
|
||||
Register NewValReg = MI.getOperand(4).getReg();
|
||||
AtomicOrdering Ordering =
|
||||
static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
|
||||
|
||||
if (!IsMasked) {
|
||||
// .loophead:
|
||||
// lr.[w|d] dest, (addr)
|
||||
// bne dest, cmpval, done
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
|
||||
.addReg(AddrReg);
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
|
||||
.addReg(DestReg)
|
||||
.addReg(CmpValReg)
|
||||
.addMBB(DoneMBB);
|
||||
// .looptail:
|
||||
// sc.[w|d] scratch, newval, (addr)
|
||||
// bnez scratch, loophead
|
||||
BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
|
||||
.addReg(AddrReg)
|
||||
.addReg(NewValReg);
|
||||
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
|
||||
.addReg(ScratchReg)
|
||||
.addReg(RISCV::X0)
|
||||
.addMBB(LoopHeadMBB);
|
||||
} else {
|
||||
// .loophead:
|
||||
// lr.w dest, (addr)
|
||||
// and scratch, dest, mask
|
||||
// bne scratch, cmpval, done
|
||||
Register MaskReg = MI.getOperand(5).getReg();
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
|
||||
.addReg(AddrReg);
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
|
||||
.addReg(DestReg)
|
||||
.addReg(MaskReg);
|
||||
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
|
||||
.addReg(ScratchReg)
|
||||
.addReg(CmpValReg)
|
||||
.addMBB(DoneMBB);
|
||||
|
||||
// .looptail:
|
||||
// xor scratch, dest, newval
|
||||
// and scratch, scratch, mask
|
||||
// xor scratch, dest, scratch
|
||||
// sc.w scratch, scratch, (adrr)
|
||||
// bnez scratch, loophead
|
||||
insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
|
||||
MaskReg, ScratchReg);
|
||||
BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
|
||||
.addReg(AddrReg)
|
||||
.addReg(ScratchReg);
|
||||
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
|
||||
.addReg(ScratchReg)
|
||||
.addReg(RISCV::X0)
|
||||
.addMBB(LoopHeadMBB);
|
||||
}
|
||||
|
||||
NextMBBI = MBB.end();
|
||||
MI.eraseFromParent();
|
||||
|
||||
LivePhysRegs LiveRegs;
|
||||
computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
|
||||
computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
|
||||
computeAndAddLiveIns(LiveRegs, *DoneMBB);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RISCVExpandPseudo::expandAuipcInstPair(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI, unsigned FlagsHi,
|
||||
|
|
|
@ -128,6 +128,7 @@ public:
|
|||
bool addGlobalInstructionSelect() override;
|
||||
void addPreEmitPass() override;
|
||||
void addPreEmitPass2() override;
|
||||
void addPreSched2() override;
|
||||
void addPreRegAlloc() override;
|
||||
};
|
||||
}
|
||||
|
@ -167,13 +168,15 @@ bool RISCVPassConfig::addGlobalInstructionSelect() {
|
|||
return false;
|
||||
}
|
||||
|
||||
void RISCVPassConfig::addPreSched2() { addPass(createRISCVExpandPseudoPass()); }
|
||||
|
||||
void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
|
||||
|
||||
void RISCVPassConfig::addPreEmitPass2() {
|
||||
// Schedule the expansion of AMOs at the last possible moment, avoiding the
|
||||
// possibility for other passes to break the requirements for forward
|
||||
// progress in the LR/SC block.
|
||||
addPass(createRISCVExpandPseudoPass());
|
||||
addPass(createRISCVExpandAtomicPseudoPass());
|
||||
}
|
||||
|
||||
void RISCVPassConfig::addPreRegAlloc() {
|
||||
|
|
|
@ -82,23 +82,23 @@ define signext i32 @lower_blockaddress_displ(i32 signext %w) nounwind {
|
|||
; RV32I-MEDIUM: # %bb.0: # %entry
|
||||
; RV32I-MEDIUM-NEXT: addi sp, sp, -16
|
||||
; RV32I-MEDIUM-NEXT: sw ra, 12(sp)
|
||||
; RV32I-MEDIUM-NEXT: .LBB2_5: # %entry
|
||||
; RV32I-MEDIUM-NEXT: .LBB2_1: # %entry
|
||||
; RV32I-MEDIUM-NEXT: # Label of block must be emitted
|
||||
; RV32I-MEDIUM-NEXT: auipc a1, %pcrel_hi(.Ltmp0)
|
||||
; RV32I-MEDIUM-NEXT: addi a1, a1, %pcrel_lo(.LBB2_5)
|
||||
; RV32I-MEDIUM-NEXT: addi a1, a1, %pcrel_lo(.LBB2_1)
|
||||
; RV32I-MEDIUM-NEXT: addi a2, zero, 101
|
||||
; RV32I-MEDIUM-NEXT: sw a1, 8(sp)
|
||||
; RV32I-MEDIUM-NEXT: blt a0, a2, .LBB2_3
|
||||
; RV32I-MEDIUM-NEXT: # %bb.1: # %if.then
|
||||
; RV32I-MEDIUM-NEXT: blt a0, a2, .LBB2_4
|
||||
; RV32I-MEDIUM-NEXT: # %bb.2: # %if.then
|
||||
; RV32I-MEDIUM-NEXT: lw a0, 8(sp)
|
||||
; RV32I-MEDIUM-NEXT: jr a0
|
||||
; RV32I-MEDIUM-NEXT: .Ltmp0: # Block address taken
|
||||
; RV32I-MEDIUM-NEXT: .LBB2_2: # %return
|
||||
; RV32I-MEDIUM-NEXT: .LBB2_3: # %return
|
||||
; RV32I-MEDIUM-NEXT: addi a0, zero, 4
|
||||
; RV32I-MEDIUM-NEXT: j .LBB2_4
|
||||
; RV32I-MEDIUM-NEXT: .LBB2_3: # %return.clone
|
||||
; RV32I-MEDIUM-NEXT: j .LBB2_5
|
||||
; RV32I-MEDIUM-NEXT: .LBB2_4: # %return.clone
|
||||
; RV32I-MEDIUM-NEXT: addi a0, zero, 3
|
||||
; RV32I-MEDIUM-NEXT: .LBB2_4: # %.split
|
||||
; RV32I-MEDIUM-NEXT: .LBB2_5: # %.split
|
||||
; RV32I-MEDIUM-NEXT: lw ra, 12(sp)
|
||||
; RV32I-MEDIUM-NEXT: addi sp, sp, 16
|
||||
; RV32I-MEDIUM-NEXT: ret
|
||||
|
|
Loading…
Reference in New Issue