forked from OSchip/llvm-project
[PowerPC] Generate inlined quadword lock free atomic operations via AtomicExpand
This patch uses AtomicExpandPass to implement quadword lock free atomic operations. It adopts the method introduced in https://reviews.llvm.org/D47882, which expand atomic operations post RA to avoid spilling that might prevent LL/SC progress. Reviewed By: jsji Differential Revision: https://reviews.llvm.org/D103614
This commit is contained in:
parent
de79ba9f9a
commit
b9c3941cd6
|
@ -1600,3 +1600,23 @@ let TargetPrefix = "ppc" in {
|
|||
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC Atomic Intrinsic Definitions.
|
||||
let TargetPrefix = "ppc" in {
|
||||
class AtomicRMW128Intrinsic
|
||||
: Intrinsic<[llvm_i64_ty, llvm_i64_ty],
|
||||
[llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
|
||||
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
|
||||
def int_ppc_atomicrmw_xchg_i128 : AtomicRMW128Intrinsic;
|
||||
def int_ppc_atomicrmw_add_i128 : AtomicRMW128Intrinsic;
|
||||
def int_ppc_atomicrmw_sub_i128 : AtomicRMW128Intrinsic;
|
||||
def int_ppc_atomicrmw_and_i128 : AtomicRMW128Intrinsic;
|
||||
def int_ppc_atomicrmw_or_i128 : AtomicRMW128Intrinsic;
|
||||
def int_ppc_atomicrmw_xor_i128 : AtomicRMW128Intrinsic;
|
||||
def int_ppc_atomicrmw_nand_i128 : AtomicRMW128Intrinsic;
|
||||
def int_ppc_cmpxchg_i128 : Intrinsic<[llvm_i64_ty, llvm_i64_ty],
|
||||
[llvm_ptr_ty,
|
||||
llvm_i64_ty, llvm_i64_ty,
|
||||
llvm_i64_ty, llvm_i64_ty],
|
||||
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
|
||||
}
|
||||
|
|
|
@ -689,6 +689,8 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
|
|||
if (PMV.ValueType == PMV.WordType) {
|
||||
PMV.AlignedAddr = Addr;
|
||||
PMV.AlignedAddrAlignment = AddrAlign;
|
||||
PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
|
||||
PMV.Mask = ConstantInt::get(PMV.ValueType, ~0);
|
||||
return PMV;
|
||||
}
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@ add_llvm_target(PowerPCCodeGen
|
|||
PPCCallingConv.cpp
|
||||
PPCCCState.cpp
|
||||
PPCCTRLoops.cpp
|
||||
PPCExpandAtomicPseudoInsts.cpp
|
||||
PPCHazardRecognizers.cpp
|
||||
PPCInstrInfo.cpp
|
||||
PPCISelDAGToDAG.cpp
|
||||
|
|
|
@ -52,6 +52,7 @@ FunctionPass *createPPCCTRLoops();
|
|||
FunctionPass *createPPCBoolRetToIntPass();
|
||||
FunctionPass *createPPCExpandISELPass();
|
||||
FunctionPass *createPPCPreEmitPeepholePass();
|
||||
FunctionPass *createPPCExpandAtomicPseudoPass();
|
||||
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
|
||||
AsmPrinter &AP);
|
||||
bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
|
||||
|
@ -75,6 +76,7 @@ FunctionPass *createPPCCTRLoops();
|
|||
void initializePPCPreEmitPeepholePass(PassRegistry &);
|
||||
void initializePPCTLSDynamicCallPass(PassRegistry &);
|
||||
void initializePPCMIPeepholePass(PassRegistry&);
|
||||
void initializePPCExpandAtomicPseudoPass(PassRegistry &);
|
||||
|
||||
extern char &PPCVSXFMAMutateID;
|
||||
|
||||
|
|
|
@ -161,6 +161,9 @@ def FeatureDirectMove :
|
|||
def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics",
|
||||
"HasPartwordAtomics", "true",
|
||||
"Enable l[bh]arx and st[bh]cx.">;
|
||||
def FeatureQuadwordAtomic : SubtargetFeature<"quadword-atomics",
|
||||
"HasQuadwordAtomics", "true",
|
||||
"Enable lqarx and stqcx.">;
|
||||
def FeatureInvariantFunctionDescriptors :
|
||||
SubtargetFeature<"invariant-function-descriptors",
|
||||
"HasInvariantFunctionDescriptors", "true",
|
||||
|
@ -331,6 +334,7 @@ def ProcessorFeatures {
|
|||
FeatureDirectMove,
|
||||
FeatureICBT,
|
||||
FeaturePartwordAtomic,
|
||||
FeatureQuadwordAtomic,
|
||||
FeaturePredictableSelectIsExpensive
|
||||
];
|
||||
|
||||
|
|
|
@ -0,0 +1,306 @@
|
|||
//===-- PPCExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -----===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains a pass that expands atomic pseudo instructions into
|
||||
// target instructions post RA. With such method, LL/SC loop is considered as
|
||||
// a whole blob and make spilling unlikely happens in the LL/SC loop.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "MCTargetDesc/PPCPredicates.h"
|
||||
#include "PPC.h"
|
||||
#include "PPCInstrInfo.h"
|
||||
#include "PPCTargetMachine.h"
|
||||
|
||||
#include "llvm/CodeGen/LivePhysRegs.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "ppc-atomic-expand"
|
||||
|
||||
namespace {
|
||||
|
||||
class PPCExpandAtomicPseudo : public MachineFunctionPass {
|
||||
public:
|
||||
const PPCInstrInfo *TII;
|
||||
const PPCRegisterInfo *TRI;
|
||||
static char ID;
|
||||
|
||||
PPCExpandAtomicPseudo() : MachineFunctionPass(ID) {
|
||||
initializePPCExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
private:
|
||||
bool expandMI(MachineBasicBlock &MBB, MachineInstr &MI,
|
||||
MachineBasicBlock::iterator &NMBBI);
|
||||
bool expandAtomicRMW128(MachineBasicBlock &MBB, MachineInstr &MI,
|
||||
MachineBasicBlock::iterator &NMBBI);
|
||||
bool expandAtomicCmpSwap128(MachineBasicBlock &MBB, MachineInstr &MI,
|
||||
MachineBasicBlock::iterator &NMBBI);
|
||||
};
|
||||
|
||||
static void PairedCopy(const PPCInstrInfo *TII, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
|
||||
Register Dest0, Register Dest1, Register Src0,
|
||||
Register Src1) {
|
||||
const MCInstrDesc &OR = TII->get(PPC::OR8);
|
||||
const MCInstrDesc &XOR = TII->get(PPC::XOR8);
|
||||
if (Dest0 == Src1 && Dest1 == Src0) {
|
||||
// The most tricky case, swapping values.
|
||||
BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1);
|
||||
BuildMI(MBB, MBBI, DL, XOR, Dest1).addReg(Dest0).addReg(Dest1);
|
||||
BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1);
|
||||
} else if (Dest0 != Src0 || Dest1 != Src1) {
|
||||
if (Dest0 == Src1 || Dest1 != Src0) {
|
||||
BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1);
|
||||
BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0);
|
||||
} else {
|
||||
BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0);
|
||||
BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
|
||||
bool Changed = false;
|
||||
TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
|
||||
TRI = &TII->getRegisterInfo();
|
||||
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
|
||||
MachineBasicBlock &MBB = *I;
|
||||
for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end();
|
||||
MBBI != MBBE;) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
|
||||
Changed |= expandMI(MBB, MI, NMBBI);
|
||||
MBBI = NMBBI;
|
||||
}
|
||||
}
|
||||
if (Changed)
|
||||
MF.RenumberBlocks();
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI,
|
||||
MachineBasicBlock::iterator &NMBBI) {
|
||||
switch (MI.getOpcode()) {
|
||||
case PPC::ATOMIC_SWAP_I128:
|
||||
case PPC::ATOMIC_LOAD_ADD_I128:
|
||||
case PPC::ATOMIC_LOAD_SUB_I128:
|
||||
case PPC::ATOMIC_LOAD_XOR_I128:
|
||||
case PPC::ATOMIC_LOAD_NAND_I128:
|
||||
case PPC::ATOMIC_LOAD_AND_I128:
|
||||
case PPC::ATOMIC_LOAD_OR_I128:
|
||||
return expandAtomicRMW128(MBB, MI, NMBBI);
|
||||
case PPC::ATOMIC_CMP_SWAP_I128:
|
||||
return expandAtomicCmpSwap128(MBB, MI, NMBBI);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool PPCExpandAtomicPseudo::expandAtomicRMW128(
|
||||
MachineBasicBlock &MBB, MachineInstr &MI,
|
||||
MachineBasicBlock::iterator &NMBBI) {
|
||||
const MCInstrDesc &LL = TII->get(PPC::LQARX);
|
||||
const MCInstrDesc &SC = TII->get(PPC::STQCX);
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
const BasicBlock *BB = MBB.getBasicBlock();
|
||||
// Create layout of control flow.
|
||||
MachineFunction::iterator MFI = ++MBB.getIterator();
|
||||
MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB);
|
||||
MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
|
||||
MF->insert(MFI, LoopMBB);
|
||||
MF->insert(MFI, ExitMBB);
|
||||
ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()),
|
||||
MBB.end());
|
||||
ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
|
||||
MBB.addSuccessor(LoopMBB);
|
||||
|
||||
// For non-min/max operations, control flow is kinda like:
|
||||
// MBB:
|
||||
// ...
|
||||
// LoopMBB:
|
||||
// lqarx in, ptr
|
||||
// addc out.sub_x1, in.sub_x1, op.sub_x1
|
||||
// adde out.sub_x0, in.sub_x0, op.sub_x0
|
||||
// stqcx out, ptr
|
||||
// bne- LoopMBB
|
||||
// ExitMBB:
|
||||
// ...
|
||||
Register Old = MI.getOperand(0).getReg();
|
||||
Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0);
|
||||
Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1);
|
||||
Register Scratch = MI.getOperand(1).getReg();
|
||||
Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0);
|
||||
Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1);
|
||||
Register RA = MI.getOperand(2).getReg();
|
||||
Register RB = MI.getOperand(3).getReg();
|
||||
Register IncrLo = MI.getOperand(4).getReg();
|
||||
Register IncrHi = MI.getOperand(5).getReg();
|
||||
unsigned RMWOpcode = MI.getOpcode();
|
||||
|
||||
MachineBasicBlock *CurrentMBB = LoopMBB;
|
||||
BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB);
|
||||
|
||||
switch (RMWOpcode) {
|
||||
case PPC::ATOMIC_SWAP_I128:
|
||||
PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo,
|
||||
IncrHi, IncrLo);
|
||||
break;
|
||||
case PPC::ATOMIC_LOAD_ADD_I128:
|
||||
BuildMI(CurrentMBB, DL, TII->get(PPC::ADDC8), ScratchLo)
|
||||
.addReg(IncrLo)
|
||||
.addReg(OldLo);
|
||||
BuildMI(CurrentMBB, DL, TII->get(PPC::ADDE8), ScratchHi)
|
||||
.addReg(IncrHi)
|
||||
.addReg(OldHi);
|
||||
break;
|
||||
case PPC::ATOMIC_LOAD_SUB_I128:
|
||||
BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFC8), ScratchLo)
|
||||
.addReg(IncrLo)
|
||||
.addReg(OldLo);
|
||||
BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFE8), ScratchHi)
|
||||
.addReg(IncrHi)
|
||||
.addReg(OldHi);
|
||||
break;
|
||||
|
||||
#define TRIVIAL_ATOMICRMW(Opcode, Instr) \
|
||||
case Opcode: \
|
||||
BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchLo) \
|
||||
.addReg(IncrLo) \
|
||||
.addReg(OldLo); \
|
||||
BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchHi) \
|
||||
.addReg(IncrHi) \
|
||||
.addReg(OldHi); \
|
||||
break
|
||||
|
||||
TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_OR_I128, PPC::OR8);
|
||||
TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_XOR_I128, PPC::XOR8);
|
||||
TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_AND_I128, PPC::AND8);
|
||||
TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_NAND_I128, PPC::NAND8);
|
||||
#undef TRIVIAL_ATOMICRMW
|
||||
default:
|
||||
llvm_unreachable("Unhandled atomic RMW operation");
|
||||
}
|
||||
BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB);
|
||||
BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
|
||||
.addImm(PPC::PRED_NE)
|
||||
.addReg(PPC::CR0)
|
||||
.addMBB(LoopMBB);
|
||||
CurrentMBB->addSuccessor(LoopMBB);
|
||||
CurrentMBB->addSuccessor(ExitMBB);
|
||||
recomputeLiveIns(*LoopMBB);
|
||||
recomputeLiveIns(*ExitMBB);
|
||||
NMBBI = MBB.end();
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
|
||||
MachineBasicBlock &MBB, MachineInstr &MI,
|
||||
MachineBasicBlock::iterator &NMBBI) {
|
||||
const MCInstrDesc &LL = TII->get(PPC::LQARX);
|
||||
const MCInstrDesc &SC = TII->get(PPC::STQCX);
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
const BasicBlock *BB = MBB.getBasicBlock();
|
||||
Register Old = MI.getOperand(0).getReg();
|
||||
Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0);
|
||||
Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1);
|
||||
Register Scratch = MI.getOperand(1).getReg();
|
||||
Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0);
|
||||
Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1);
|
||||
Register RA = MI.getOperand(2).getReg();
|
||||
Register RB = MI.getOperand(3).getReg();
|
||||
Register CmpLo = MI.getOperand(4).getReg();
|
||||
Register CmpHi = MI.getOperand(5).getReg();
|
||||
Register NewLo = MI.getOperand(6).getReg();
|
||||
Register NewHi = MI.getOperand(7).getReg();
|
||||
// Create layout of control flow.
|
||||
// loop:
|
||||
// old = lqarx ptr
|
||||
// <compare old, cmp>
|
||||
// bne 0, fail
|
||||
// succ:
|
||||
// stqcx new ptr
|
||||
// bne 0, loop
|
||||
// b exit
|
||||
// fail:
|
||||
// stqcx old ptr
|
||||
// exit:
|
||||
// ....
|
||||
MachineFunction::iterator MFI = ++MBB.getIterator();
|
||||
MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB);
|
||||
MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB);
|
||||
MachineBasicBlock *CmpFailMBB = MF->CreateMachineBasicBlock(BB);
|
||||
MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
|
||||
MF->insert(MFI, LoopCmpMBB);
|
||||
MF->insert(MFI, CmpSuccMBB);
|
||||
MF->insert(MFI, CmpFailMBB);
|
||||
MF->insert(MFI, ExitMBB);
|
||||
ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()),
|
||||
MBB.end());
|
||||
ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
|
||||
MBB.addSuccessor(LoopCmpMBB);
|
||||
// Build loop.
|
||||
MachineBasicBlock *CurrentMBB = LoopCmpMBB;
|
||||
BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB);
|
||||
BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchLo)
|
||||
.addReg(OldLo)
|
||||
.addReg(CmpLo);
|
||||
BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchHi)
|
||||
.addReg(OldHi)
|
||||
.addReg(CmpHi);
|
||||
BuildMI(CurrentMBB, DL, TII->get(PPC::OR8_rec), ScratchLo)
|
||||
.addReg(ScratchLo)
|
||||
.addReg(ScratchHi);
|
||||
BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
|
||||
.addImm(PPC::PRED_NE)
|
||||
.addReg(PPC::CR0)
|
||||
.addMBB(CmpFailMBB);
|
||||
CurrentMBB->addSuccessor(CmpSuccMBB);
|
||||
CurrentMBB->addSuccessor(CmpFailMBB);
|
||||
// Build succ.
|
||||
CurrentMBB = CmpSuccMBB;
|
||||
PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo,
|
||||
NewHi, NewLo);
|
||||
BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB);
|
||||
BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
|
||||
.addImm(PPC::PRED_NE)
|
||||
.addReg(PPC::CR0)
|
||||
.addMBB(LoopCmpMBB);
|
||||
BuildMI(CurrentMBB, DL, TII->get(PPC::B)).addMBB(ExitMBB);
|
||||
CurrentMBB->addSuccessor(LoopCmpMBB);
|
||||
CurrentMBB->addSuccessor(ExitMBB);
|
||||
CurrentMBB = CmpFailMBB;
|
||||
BuildMI(CurrentMBB, DL, SC).addReg(Old).addReg(RA).addReg(RB);
|
||||
CurrentMBB->addSuccessor(ExitMBB);
|
||||
|
||||
recomputeLiveIns(*LoopCmpMBB);
|
||||
recomputeLiveIns(*CmpSuccMBB);
|
||||
recomputeLiveIns(*CmpFailMBB);
|
||||
recomputeLiveIns(*ExitMBB);
|
||||
NMBBI = MBB.end();
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
INITIALIZE_PASS(PPCExpandAtomicPseudo, DEBUG_TYPE, "PowerPC Expand Atomic",
|
||||
false, false)
|
||||
|
||||
char PPCExpandAtomicPseudo::ID = 0;
|
||||
FunctionPass *llvm::createPPCExpandAtomicPseudoPass() {
|
||||
return new PPCExpandAtomicPseudo();
|
||||
}
|
|
@ -121,6 +121,11 @@ cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
|
|||
static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
|
||||
cl::desc("use absolute jump tables on ppc"), cl::Hidden);
|
||||
|
||||
static cl::opt<bool> EnableQuadwordAtomics(
|
||||
"ppc-quadword-atomics",
|
||||
cl::desc("enable quadword lock-free atomic operations"), cl::init(false),
|
||||
cl::Hidden);
|
||||
|
||||
STATISTIC(NumTailCalls, "Number of tail calls");
|
||||
STATISTIC(NumSiblingCalls, "Number of sibling calls");
|
||||
STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
|
||||
|
@ -1281,6 +1286,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
|||
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
|
||||
}
|
||||
|
||||
if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics())
|
||||
setMaxAtomicSizeInBitsSupported(128);
|
||||
|
||||
setBooleanContents(ZeroOrOneBooleanContent);
|
||||
|
||||
if (Subtarget.hasAltivec()) {
|
||||
|
@ -12628,6 +12636,17 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|||
} else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
|
||||
MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
|
||||
return emitProbedAlloca(MI, BB);
|
||||
} else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
Register Src = MI.getOperand(2).getReg();
|
||||
Register Lo = MI.getOperand(0).getReg();
|
||||
Register Hi = MI.getOperand(1).getReg();
|
||||
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
|
||||
.addDef(Lo)
|
||||
.addUse(Src, 0, PPC::sub_gp8_x1);
|
||||
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
|
||||
.addDef(Hi)
|
||||
.addUse(Src, 0, PPC::sub_gp8_x0);
|
||||
} else {
|
||||
llvm_unreachable("Unexpected instr type to insert");
|
||||
}
|
||||
|
@ -16042,6 +16061,22 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||
MachineFunction &MF,
|
||||
unsigned Intrinsic) const {
|
||||
switch (Intrinsic) {
|
||||
case Intrinsic::ppc_atomicrmw_xchg_i128:
|
||||
case Intrinsic::ppc_atomicrmw_add_i128:
|
||||
case Intrinsic::ppc_atomicrmw_sub_i128:
|
||||
case Intrinsic::ppc_atomicrmw_nand_i128:
|
||||
case Intrinsic::ppc_atomicrmw_and_i128:
|
||||
case Intrinsic::ppc_atomicrmw_or_i128:
|
||||
case Intrinsic::ppc_atomicrmw_xor_i128:
|
||||
case Intrinsic::ppc_cmpxchg_i128:
|
||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||
Info.memVT = MVT::i128;
|
||||
Info.ptrVal = I.getArgOperand(0);
|
||||
Info.offset = 0;
|
||||
Info.align = Align(16);
|
||||
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MOVolatile;
|
||||
return true;
|
||||
case Intrinsic::ppc_altivec_lvx:
|
||||
case Intrinsic::ppc_altivec_lvxl:
|
||||
case Intrinsic::ppc_altivec_lvebx:
|
||||
|
@ -17457,3 +17492,102 @@ CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
|
|||
return CC_PPC64_ELF_FIS;
|
||||
}
|
||||
}
|
||||
|
||||
TargetLowering::AtomicExpansionKind
|
||||
PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
|
||||
if (AI->isFloatingPointOperation())
|
||||
return AtomicExpansionKind::None;
|
||||
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
|
||||
if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
|
||||
return AtomicExpansionKind::MaskedIntrinsic;
|
||||
return AtomicExpansionKind::None;
|
||||
}
|
||||
|
||||
TargetLowering::AtomicExpansionKind
|
||||
PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
|
||||
unsigned Size = AI->getPointerOperand()
|
||||
->getType()
|
||||
->getPointerElementType()
|
||||
->getPrimitiveSizeInBits();
|
||||
if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
|
||||
return AtomicExpansionKind::MaskedIntrinsic;
|
||||
return AtomicExpansionKind::None;
|
||||
}
|
||||
|
||||
static Intrinsic::ID
|
||||
getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
|
||||
switch (BinOp) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected AtomicRMW BinOp");
|
||||
case AtomicRMWInst::Xchg:
|
||||
return Intrinsic::ppc_atomicrmw_xchg_i128;
|
||||
case AtomicRMWInst::Add:
|
||||
return Intrinsic::ppc_atomicrmw_add_i128;
|
||||
case AtomicRMWInst::Sub:
|
||||
return Intrinsic::ppc_atomicrmw_sub_i128;
|
||||
case AtomicRMWInst::And:
|
||||
return Intrinsic::ppc_atomicrmw_and_i128;
|
||||
case AtomicRMWInst::Or:
|
||||
return Intrinsic::ppc_atomicrmw_or_i128;
|
||||
case AtomicRMWInst::Xor:
|
||||
return Intrinsic::ppc_atomicrmw_xor_i128;
|
||||
case AtomicRMWInst::Nand:
|
||||
return Intrinsic::ppc_atomicrmw_nand_i128;
|
||||
}
|
||||
}
|
||||
|
||||
Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
|
||||
IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
|
||||
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
|
||||
assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
|
||||
"Only support quadword now");
|
||||
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
|
||||
Type *ValTy = cast<PointerType>(AlignedAddr->getType())->getElementType();
|
||||
assert(ValTy->getPrimitiveSizeInBits() == 128);
|
||||
Function *RMW = Intrinsic::getDeclaration(
|
||||
M, getIntrinsicForAtomicRMWBinOp128(AI->getOperation()));
|
||||
Type *Int64Ty = Type::getInt64Ty(M->getContext());
|
||||
Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
|
||||
Value *IncrHi =
|
||||
Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
|
||||
Value *Addr =
|
||||
Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
|
||||
Value *LoHi = Builder.CreateCall(RMW, {Addr, IncrLo, IncrHi});
|
||||
Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
|
||||
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
|
||||
Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
|
||||
Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
|
||||
return Builder.CreateOr(
|
||||
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
|
||||
}
|
||||
|
||||
Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
|
||||
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
|
||||
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
|
||||
assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
|
||||
"Only support quadword now");
|
||||
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
|
||||
Type *ValTy = cast<PointerType>(AlignedAddr->getType())->getElementType();
|
||||
assert(ValTy->getPrimitiveSizeInBits() == 128);
|
||||
Function *IntCmpXchg =
|
||||
Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
|
||||
Type *Int64Ty = Type::getInt64Ty(M->getContext());
|
||||
Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
|
||||
Value *CmpHi =
|
||||
Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
|
||||
Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
|
||||
Value *NewHi =
|
||||
Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
|
||||
Value *Addr =
|
||||
Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
|
||||
emitLeadingFence(Builder, CI, Ord);
|
||||
Value *LoHi =
|
||||
Builder.CreateCall(IntCmpXchg, {Addr, CmpLo, CmpHi, NewLo, NewHi});
|
||||
emitTrailingFence(Builder, CI, Ord);
|
||||
Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
|
||||
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
|
||||
Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
|
||||
Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
|
||||
return Builder.CreateOr(
|
||||
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
|
||||
}
|
||||
|
|
|
@ -876,6 +876,23 @@ namespace llvm {
|
|||
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
|
||||
AtomicOrdering Ord) const override;
|
||||
|
||||
TargetLowering::AtomicExpansionKind
|
||||
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
|
||||
|
||||
TargetLowering::AtomicExpansionKind
|
||||
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
|
||||
|
||||
Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder,
|
||||
AtomicRMWInst *AI, Value *AlignedAddr,
|
||||
Value *Incr, Value *Mask,
|
||||
Value *ShiftAmt,
|
||||
AtomicOrdering Ord) const override;
|
||||
Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder,
|
||||
AtomicCmpXchgInst *CI,
|
||||
Value *AlignedAddr, Value *CmpVal,
|
||||
Value *NewVal, Value *Mask,
|
||||
AtomicOrdering Ord) const override;
|
||||
|
||||
MachineBasicBlock *
|
||||
EmitInstrWithCustomInserter(MachineInstr &MI,
|
||||
MachineBasicBlock *MBB) const override;
|
||||
|
|
|
@ -304,6 +304,88 @@ def STQCX : XForm_1_memOp<31, 182, (outs), (ins g8prc:$RSp, memrr:$dst),
|
|||
isPPC64, isRecordForm;
|
||||
}
|
||||
|
||||
def SPLIT_QUADWORD : PPCCustomInserterPseudo<(outs g8rc:$lo, g8rc:$hi),
|
||||
(ins g8prc:$src),
|
||||
"#SPLIT_QUADWORD", []>;
|
||||
class AtomicRMW128<string asmstr>
|
||||
: PPCPostRAExpPseudo<(outs g8prc:$RTp, g8prc:$scratch),
|
||||
(ins memrr:$ptr, g8rc:$incr_lo, g8rc:$incr_hi),
|
||||
asmstr, []>;
|
||||
// We have to keep values in MI's uses during LL/SC looping as they are,
|
||||
// so set both $RTp and $scratch earlyclobber.
|
||||
let mayStore = 1, mayLoad = 1,
|
||||
Defs = [CR0],
|
||||
Constraints = "@earlyclobber $scratch,@earlyclobber $RTp" in {
|
||||
// Atomic pseudo instructions expanded post-ra.
|
||||
def ATOMIC_SWAP_I128 : AtomicRMW128<"#ATOMIC_SWAP_I128">;
|
||||
def ATOMIC_LOAD_ADD_I128 : AtomicRMW128<"#ATOMIC_LOAD_ADD_I128">;
|
||||
def ATOMIC_LOAD_SUB_I128 : AtomicRMW128<"#ATOMIC_LOAD_SUB_I128">;
|
||||
def ATOMIC_LOAD_AND_I128 : AtomicRMW128<"#ATOMIC_LOAD_AND_I128">;
|
||||
def ATOMIC_LOAD_XOR_I128 : AtomicRMW128<"#ATOMIC_LOAD_XOR_I128">;
|
||||
def ATOMIC_LOAD_OR_I128 : AtomicRMW128<"#ATOMIC_LOAD_OR_I128">;
|
||||
def ATOMIC_LOAD_NAND_I128 : AtomicRMW128<"#ATOMIC_LOAD_NAND_I128">;
|
||||
|
||||
def ATOMIC_CMP_SWAP_I128 : PPCPostRAExpPseudo<
|
||||
(outs g8prc:$RTp, g8prc:$scratch),
|
||||
(ins memrr:$ptr, g8rc:$cmp_lo, g8rc:$cmp_hi,
|
||||
g8rc:$new_lo, g8rc:$new_hi),
|
||||
"#ATOMIC_CMP_SWAP_I128", []>;
|
||||
}
|
||||
|
||||
def : Pat<(int_ppc_atomicrmw_add_i128 ForceXForm:$ptr,
|
||||
i64:$incr_lo,
|
||||
i64:$incr_hi),
|
||||
(SPLIT_QUADWORD (ATOMIC_LOAD_ADD_I128 memrr:$ptr,
|
||||
g8rc:$incr_lo,
|
||||
g8rc:$incr_hi))>;
|
||||
def : Pat<(int_ppc_atomicrmw_sub_i128 ForceXForm:$ptr,
|
||||
i64:$incr_lo,
|
||||
i64:$incr_hi),
|
||||
(SPLIT_QUADWORD (ATOMIC_LOAD_SUB_I128 memrr:$ptr,
|
||||
g8rc:$incr_lo,
|
||||
g8rc:$incr_hi))>;
|
||||
def : Pat<(int_ppc_atomicrmw_xor_i128 ForceXForm:$ptr,
|
||||
i64:$incr_lo,
|
||||
i64:$incr_hi),
|
||||
(SPLIT_QUADWORD (ATOMIC_LOAD_XOR_I128 memrr:$ptr,
|
||||
g8rc:$incr_lo,
|
||||
g8rc:$incr_hi))>;
|
||||
def : Pat<(int_ppc_atomicrmw_and_i128 ForceXForm:$ptr,
|
||||
i64:$incr_lo,
|
||||
i64:$incr_hi),
|
||||
(SPLIT_QUADWORD (ATOMIC_LOAD_AND_I128 memrr:$ptr,
|
||||
g8rc:$incr_lo,
|
||||
g8rc:$incr_hi))>;
|
||||
def : Pat<(int_ppc_atomicrmw_nand_i128 ForceXForm:$ptr,
|
||||
i64:$incr_lo,
|
||||
i64:$incr_hi),
|
||||
(SPLIT_QUADWORD (ATOMIC_LOAD_NAND_I128 memrr:$ptr,
|
||||
g8rc:$incr_lo,
|
||||
g8rc:$incr_hi))>;
|
||||
def : Pat<(int_ppc_atomicrmw_or_i128 ForceXForm:$ptr,
|
||||
i64:$incr_lo,
|
||||
i64:$incr_hi),
|
||||
(SPLIT_QUADWORD (ATOMIC_LOAD_OR_I128 memrr:$ptr,
|
||||
g8rc:$incr_lo,
|
||||
g8rc:$incr_hi))>;
|
||||
def : Pat<(int_ppc_atomicrmw_xchg_i128 ForceXForm:$ptr,
|
||||
i64:$incr_lo,
|
||||
i64:$incr_hi),
|
||||
(SPLIT_QUADWORD (ATOMIC_SWAP_I128 memrr:$ptr,
|
||||
g8rc:$incr_lo,
|
||||
g8rc:$incr_hi))>;
|
||||
def : Pat<(int_ppc_cmpxchg_i128 ForceXForm:$ptr,
|
||||
i64:$cmp_lo,
|
||||
i64:$cmp_hi,
|
||||
i64:$new_lo,
|
||||
i64:$new_hi),
|
||||
(SPLIT_QUADWORD (ATOMIC_CMP_SWAP_I128
|
||||
memrr:$ptr,
|
||||
g8rc:$cmp_lo,
|
||||
g8rc:$cmp_hi,
|
||||
g8rc:$new_lo,
|
||||
g8rc:$new_hi))>;
|
||||
|
||||
let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
|
||||
def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC),
|
||||
"stdat $rS, $rA, $FC", IIC_LdStStore>, isPPC64,
|
||||
|
|
|
@ -1170,6 +1170,7 @@ def IsE500 : Predicate<"Subtarget->isE500()">;
|
|||
def HasSPE : Predicate<"Subtarget->hasSPE()">;
|
||||
def HasICBT : Predicate<"Subtarget->hasICBT()">;
|
||||
def HasPartwordAtomics : Predicate<"Subtarget->hasPartwordAtomics()">;
|
||||
def HasQuadwordAtomics : Predicate<"Subtarget->hasQuadwordAtomics()">;
|
||||
def NoNaNsFPMath
|
||||
: Predicate<"Subtarget->getTargetMachine().Options.NoNaNsFPMath">;
|
||||
def NaNsFPMath
|
||||
|
|
|
@ -139,6 +139,7 @@ protected:
|
|||
bool HasICBT;
|
||||
bool HasInvariantFunctionDescriptors;
|
||||
bool HasPartwordAtomics;
|
||||
bool HasQuadwordAtomics;
|
||||
bool HasDirectMove;
|
||||
bool HasHTM;
|
||||
bool HasFloat128;
|
||||
|
@ -302,6 +303,7 @@ public:
|
|||
bool usePPCPreRASchedStrategy() const { return UsePPCPreRASchedStrategy; }
|
||||
bool usePPCPostRASchedStrategy() const { return UsePPCPostRASchedStrategy; }
|
||||
bool hasPartwordAtomics() const { return HasPartwordAtomics; }
|
||||
bool hasQuadwordAtomics() const { return HasQuadwordAtomics; }
|
||||
bool hasDirectMove() const { return HasDirectMove; }
|
||||
|
||||
Align getPlatformStackAlignment() const {
|
||||
|
|
|
@ -123,6 +123,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
|
|||
initializePPCTLSDynamicCallPass(PR);
|
||||
initializePPCMIPeepholePass(PR);
|
||||
initializePPCLowerMASSVEntriesPass(PR);
|
||||
initializePPCExpandAtomicPseudoPass(PR);
|
||||
initializeGlobalISel(PR);
|
||||
}
|
||||
|
||||
|
@ -539,6 +540,10 @@ void PPCPassConfig::addPreEmitPass() {
|
|||
}
|
||||
|
||||
void PPCPassConfig::addPreEmitPass2() {
|
||||
// Schedule the expansion of AMOs at the last possible moment, avoiding the
|
||||
// possibility for other passes to break the requirements for forward
|
||||
// progress in the LL/SC block.
|
||||
addPass(createPPCExpandAtomicPseudoPass());
|
||||
// Must run branch selection immediately preceding the asm printer.
|
||||
addPass(createPPCBranchSelectionPass());
|
||||
}
|
||||
|
|
|
@ -197,6 +197,7 @@
|
|||
; CHECK-NEXT: Contiguously Lay Out Funclets
|
||||
; CHECK-NEXT: StackMap Liveness Analysis
|
||||
; CHECK-NEXT: Live DEBUG_VALUE analysis
|
||||
; CHECK-NEXT: PowerPC Expand Atomic
|
||||
; CHECK-NEXT: PowerPC Branch Selector
|
||||
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Machine Optimization Remark Emitter
|
||||
|
|
|
@ -0,0 +1,452 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr8 \
|
||||
; RUN: -ppc-asm-full-reg-names -ppc-quadword-atomics \
|
||||
; RUN: -ppc-track-subreg-liveness < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 \
|
||||
; RUN: -ppc-asm-full-reg-names -ppc-quadword-atomics \
|
||||
; RUN: -ppc-track-subreg-liveness < %s | FileCheck --check-prefix=PWR7 %s
|
||||
|
||||
|
||||
define i128 @swap(i128* %a, i128 %x) {
|
||||
; CHECK-LABEL: swap:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sync
|
||||
; CHECK-NEXT: .LBB0_1: # %entry
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: lqarx r6, 0, r3
|
||||
; CHECK-NEXT: mr r9, r5
|
||||
; CHECK-NEXT: mr r8, r4
|
||||
; CHECK-NEXT: stqcx. r8, 0, r3
|
||||
; CHECK-NEXT: bne cr0, .LBB0_1
|
||||
; CHECK-NEXT: # %bb.2: # %entry
|
||||
; CHECK-NEXT: lwsync
|
||||
; CHECK-NEXT: mr r3, r6
|
||||
; CHECK-NEXT: mr r4, r7
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; PWR7-LABEL: swap:
|
||||
; PWR7: # %bb.0: # %entry
|
||||
; PWR7-NEXT: mflr r0
|
||||
; PWR7-NEXT: std r0, 16(r1)
|
||||
; PWR7-NEXT: stdu r1, -112(r1)
|
||||
; PWR7-NEXT: .cfi_def_cfa_offset 112
|
||||
; PWR7-NEXT: .cfi_offset lr, 16
|
||||
; PWR7-NEXT: sync
|
||||
; PWR7-NEXT: bl __sync_lock_test_and_set_16
|
||||
; PWR7-NEXT: nop
|
||||
; PWR7-NEXT: lwsync
|
||||
; PWR7-NEXT: addi r1, r1, 112
|
||||
; PWR7-NEXT: ld r0, 16(r1)
|
||||
; PWR7-NEXT: mtlr r0
|
||||
; PWR7-NEXT: blr
|
||||
entry:
|
||||
%0 = atomicrmw xchg i128* %a, i128 %x seq_cst, align 16
|
||||
ret i128 %0
|
||||
}
|
||||
|
||||
define i128 @add(i128* %a, i128 %x) {
|
||||
; CHECK-LABEL: add:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sync
|
||||
; CHECK-NEXT: .LBB1_1: # %entry
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: lqarx r6, 0, r3
|
||||
; CHECK-NEXT: addc r9, r5, r7
|
||||
; CHECK-NEXT: adde r8, r4, r6
|
||||
; CHECK-NEXT: stqcx. r8, 0, r3
|
||||
; CHECK-NEXT: bne cr0, .LBB1_1
|
||||
; CHECK-NEXT: # %bb.2: # %entry
|
||||
; CHECK-NEXT: lwsync
|
||||
; CHECK-NEXT: mr r3, r6
|
||||
; CHECK-NEXT: mr r4, r7
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; PWR7-LABEL: add:
|
||||
; PWR7: # %bb.0: # %entry
|
||||
; PWR7-NEXT: mflr r0
|
||||
; PWR7-NEXT: std r0, 16(r1)
|
||||
; PWR7-NEXT: stdu r1, -112(r1)
|
||||
; PWR7-NEXT: .cfi_def_cfa_offset 112
|
||||
; PWR7-NEXT: .cfi_offset lr, 16
|
||||
; PWR7-NEXT: sync
|
||||
; PWR7-NEXT: bl __sync_fetch_and_add_16
|
||||
; PWR7-NEXT: nop
|
||||
; PWR7-NEXT: lwsync
|
||||
; PWR7-NEXT: addi r1, r1, 112
|
||||
; PWR7-NEXT: ld r0, 16(r1)
|
||||
; PWR7-NEXT: mtlr r0
|
||||
; PWR7-NEXT: blr
|
||||
entry:
|
||||
%0 = atomicrmw add i128* %a, i128 %x seq_cst, align 16
|
||||
ret i128 %0
|
||||
}
|
||||
|
||||
define i128 @sub(i128* %a, i128 %x) {
|
||||
; CHECK-LABEL: sub:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sync
|
||||
; CHECK-NEXT: .LBB2_1: # %entry
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: lqarx r6, 0, r3
|
||||
; CHECK-NEXT: subc r9, r7, r5
|
||||
; CHECK-NEXT: subfe r8, r4, r6
|
||||
; CHECK-NEXT: stqcx. r8, 0, r3
|
||||
; CHECK-NEXT: bne cr0, .LBB2_1
|
||||
; CHECK-NEXT: # %bb.2: # %entry
|
||||
; CHECK-NEXT: lwsync
|
||||
; CHECK-NEXT: mr r3, r6
|
||||
; CHECK-NEXT: mr r4, r7
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; PWR7-LABEL: sub:
|
||||
; PWR7: # %bb.0: # %entry
|
||||
; PWR7-NEXT: mflr r0
|
||||
; PWR7-NEXT: std r0, 16(r1)
|
||||
; PWR7-NEXT: stdu r1, -112(r1)
|
||||
; PWR7-NEXT: .cfi_def_cfa_offset 112
|
||||
; PWR7-NEXT: .cfi_offset lr, 16
|
||||
; PWR7-NEXT: sync
|
||||
; PWR7-NEXT: bl __sync_fetch_and_sub_16
|
||||
; PWR7-NEXT: nop
|
||||
; PWR7-NEXT: lwsync
|
||||
; PWR7-NEXT: addi r1, r1, 112
|
||||
; PWR7-NEXT: ld r0, 16(r1)
|
||||
; PWR7-NEXT: mtlr r0
|
||||
; PWR7-NEXT: blr
|
||||
entry:
|
||||
%0 = atomicrmw sub i128* %a, i128 %x seq_cst, align 16
|
||||
ret i128 %0
|
||||
}
|
||||
|
||||
define i128 @and(i128* %a, i128 %x) {
|
||||
; CHECK-LABEL: and:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sync
|
||||
; CHECK-NEXT: .LBB3_1: # %entry
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: lqarx r6, 0, r3
|
||||
; CHECK-NEXT: and r9, r5, r7
|
||||
; CHECK-NEXT: and r8, r4, r6
|
||||
; CHECK-NEXT: stqcx. r8, 0, r3
|
||||
; CHECK-NEXT: bne cr0, .LBB3_1
|
||||
; CHECK-NEXT: # %bb.2: # %entry
|
||||
; CHECK-NEXT: lwsync
|
||||
; CHECK-NEXT: mr r3, r6
|
||||
; CHECK-NEXT: mr r4, r7
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; PWR7-LABEL: and:
|
||||
; PWR7: # %bb.0: # %entry
|
||||
; PWR7-NEXT: mflr r0
|
||||
; PWR7-NEXT: std r0, 16(r1)
|
||||
; PWR7-NEXT: stdu r1, -112(r1)
|
||||
; PWR7-NEXT: .cfi_def_cfa_offset 112
|
||||
; PWR7-NEXT: .cfi_offset lr, 16
|
||||
; PWR7-NEXT: sync
|
||||
; PWR7-NEXT: bl __sync_fetch_and_and_16
|
||||
; PWR7-NEXT: nop
|
||||
; PWR7-NEXT: lwsync
|
||||
; PWR7-NEXT: addi r1, r1, 112
|
||||
; PWR7-NEXT: ld r0, 16(r1)
|
||||
; PWR7-NEXT: mtlr r0
|
||||
; PWR7-NEXT: blr
|
||||
entry:
|
||||
%0 = atomicrmw and i128* %a, i128 %x seq_cst, align 16
|
||||
ret i128 %0
|
||||
}
|
||||
|
||||
define i128 @or(i128* %a, i128 %x) {
|
||||
; CHECK-LABEL: or:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sync
|
||||
; CHECK-NEXT: .LBB4_1: # %entry
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: lqarx r6, 0, r3
|
||||
; CHECK-NEXT: or r9, r5, r7
|
||||
; CHECK-NEXT: or r8, r4, r6
|
||||
; CHECK-NEXT: stqcx. r8, 0, r3
|
||||
; CHECK-NEXT: bne cr0, .LBB4_1
|
||||
; CHECK-NEXT: # %bb.2: # %entry
|
||||
; CHECK-NEXT: lwsync
|
||||
; CHECK-NEXT: mr r3, r6
|
||||
; CHECK-NEXT: mr r4, r7
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; PWR7-LABEL: or:
|
||||
; PWR7: # %bb.0: # %entry
|
||||
; PWR7-NEXT: mflr r0
|
||||
; PWR7-NEXT: std r0, 16(r1)
|
||||
; PWR7-NEXT: stdu r1, -112(r1)
|
||||
; PWR7-NEXT: .cfi_def_cfa_offset 112
|
||||
; PWR7-NEXT: .cfi_offset lr, 16
|
||||
; PWR7-NEXT: sync
|
||||
; PWR7-NEXT: bl __sync_fetch_and_or_16
|
||||
; PWR7-NEXT: nop
|
||||
; PWR7-NEXT: lwsync
|
||||
; PWR7-NEXT: addi r1, r1, 112
|
||||
; PWR7-NEXT: ld r0, 16(r1)
|
||||
; PWR7-NEXT: mtlr r0
|
||||
; PWR7-NEXT: blr
|
||||
entry:
|
||||
%0 = atomicrmw or i128* %a, i128 %x seq_cst, align 16
|
||||
ret i128 %0
|
||||
}
|
||||
|
||||
define i128 @xor(i128* %a, i128 %x) {
|
||||
; CHECK-LABEL: xor:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sync
|
||||
; CHECK-NEXT: .LBB5_1: # %entry
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: lqarx r6, 0, r3
|
||||
; CHECK-NEXT: xor r9, r5, r7
|
||||
; CHECK-NEXT: xor r8, r4, r6
|
||||
; CHECK-NEXT: stqcx. r8, 0, r3
|
||||
; CHECK-NEXT: bne cr0, .LBB5_1
|
||||
; CHECK-NEXT: # %bb.2: # %entry
|
||||
; CHECK-NEXT: lwsync
|
||||
; CHECK-NEXT: mr r3, r6
|
||||
; CHECK-NEXT: mr r4, r7
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; PWR7-LABEL: xor:
|
||||
; PWR7: # %bb.0: # %entry
|
||||
; PWR7-NEXT: mflr r0
|
||||
; PWR7-NEXT: std r0, 16(r1)
|
||||
; PWR7-NEXT: stdu r1, -112(r1)
|
||||
; PWR7-NEXT: .cfi_def_cfa_offset 112
|
||||
; PWR7-NEXT: .cfi_offset lr, 16
|
||||
; PWR7-NEXT: sync
|
||||
; PWR7-NEXT: bl __sync_fetch_and_xor_16
|
||||
; PWR7-NEXT: nop
|
||||
; PWR7-NEXT: lwsync
|
||||
; PWR7-NEXT: addi r1, r1, 112
|
||||
; PWR7-NEXT: ld r0, 16(r1)
|
||||
; PWR7-NEXT: mtlr r0
|
||||
; PWR7-NEXT: blr
|
||||
entry:
|
||||
%0 = atomicrmw xor i128* %a, i128 %x seq_cst, align 16
|
||||
ret i128 %0
|
||||
}
|
||||
|
||||
define i128 @nand(i128* %a, i128 %x) {
|
||||
; CHECK-LABEL: nand:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sync
|
||||
; CHECK-NEXT: .LBB6_1: # %entry
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: lqarx r6, 0, r3
|
||||
; CHECK-NEXT: nand r9, r5, r7
|
||||
; CHECK-NEXT: nand r8, r4, r6
|
||||
; CHECK-NEXT: stqcx. r8, 0, r3
|
||||
; CHECK-NEXT: bne cr0, .LBB6_1
|
||||
; CHECK-NEXT: # %bb.2: # %entry
|
||||
; CHECK-NEXT: lwsync
|
||||
; CHECK-NEXT: mr r3, r6
|
||||
; CHECK-NEXT: mr r4, r7
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; PWR7-LABEL: nand:
|
||||
; PWR7: # %bb.0: # %entry
|
||||
; PWR7-NEXT: mflr r0
|
||||
; PWR7-NEXT: std r0, 16(r1)
|
||||
; PWR7-NEXT: stdu r1, -112(r1)
|
||||
; PWR7-NEXT: .cfi_def_cfa_offset 112
|
||||
; PWR7-NEXT: .cfi_offset lr, 16
|
||||
; PWR7-NEXT: sync
|
||||
; PWR7-NEXT: bl __sync_fetch_and_nand_16
|
||||
; PWR7-NEXT: nop
|
||||
; PWR7-NEXT: lwsync
|
||||
; PWR7-NEXT: addi r1, r1, 112
|
||||
; PWR7-NEXT: ld r0, 16(r1)
|
||||
; PWR7-NEXT: mtlr r0
|
||||
; PWR7-NEXT: blr
|
||||
entry:
|
||||
%0 = atomicrmw nand i128* %a, i128 %x seq_cst, align 16
|
||||
ret i128 %0
|
||||
}
|
||||
|
||||
;; CmpXchg
|
||||
define i128 @cas_weak_acquire_acquire(i128* %a, i128 %cmp, i128 %new) {
|
||||
; CHECK-LABEL: cas_weak_acquire_acquire:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: .LBB7_1: # %entry
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: lqarx r8, 0, r3
|
||||
; CHECK-NEXT: xor r11, r9, r5
|
||||
; CHECK-NEXT: xor r10, r8, r4
|
||||
; CHECK-NEXT: or. r11, r11, r10
|
||||
; CHECK-NEXT: bne cr0, .LBB7_3
|
||||
; CHECK-NEXT: # %bb.2: # %entry
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: mr r11, r7
|
||||
; CHECK-NEXT: mr r10, r6
|
||||
; CHECK-NEXT: stqcx. r10, 0, r3
|
||||
; CHECK-NEXT: bne cr0, .LBB7_1
|
||||
; CHECK-NEXT: b .LBB7_4
|
||||
; CHECK-NEXT: .LBB7_3: # %entry
|
||||
; CHECK-NEXT: stqcx. r8, 0, r3
|
||||
; CHECK-NEXT: .LBB7_4: # %entry
|
||||
; CHECK-NEXT: lwsync
|
||||
; CHECK-NEXT: mr r3, r8
|
||||
; CHECK-NEXT: mr r4, r9
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; PWR7-LABEL: cas_weak_acquire_acquire:
|
||||
; PWR7: # %bb.0: # %entry
|
||||
; PWR7-NEXT: mflr r0
|
||||
; PWR7-NEXT: std r0, 16(r1)
|
||||
; PWR7-NEXT: stdu r1, -112(r1)
|
||||
; PWR7-NEXT: .cfi_def_cfa_offset 112
|
||||
; PWR7-NEXT: .cfi_offset lr, 16
|
||||
; PWR7-NEXT: bl __sync_val_compare_and_swap_16
|
||||
; PWR7-NEXT: nop
|
||||
; PWR7-NEXT: lwsync
|
||||
; PWR7-NEXT: addi r1, r1, 112
|
||||
; PWR7-NEXT: ld r0, 16(r1)
|
||||
; PWR7-NEXT: mtlr r0
|
||||
; PWR7-NEXT: blr
|
||||
entry:
|
||||
%0 = cmpxchg weak i128* %a, i128 %cmp, i128 %new acquire acquire
|
||||
%1 = extractvalue { i128, i1 } %0, 0
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
define i128 @cas_weak_release_monotonic(i128* %a, i128 %cmp, i128 %new) {
|
||||
; CHECK-LABEL: cas_weak_release_monotonic:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lwsync
|
||||
; CHECK-NEXT: .LBB8_1: # %entry
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: lqarx r8, 0, r3
|
||||
; CHECK-NEXT: xor r11, r9, r5
|
||||
; CHECK-NEXT: xor r10, r8, r4
|
||||
; CHECK-NEXT: or. r11, r11, r10
|
||||
; CHECK-NEXT: bne cr0, .LBB8_3
|
||||
; CHECK-NEXT: # %bb.2: # %entry
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: mr r11, r7
|
||||
; CHECK-NEXT: mr r10, r6
|
||||
; CHECK-NEXT: stqcx. r10, 0, r3
|
||||
; CHECK-NEXT: bne cr0, .LBB8_1
|
||||
; CHECK-NEXT: b .LBB8_4
|
||||
; CHECK-NEXT: .LBB8_3: # %entry
|
||||
; CHECK-NEXT: stqcx. r8, 0, r3
|
||||
; CHECK-NEXT: .LBB8_4: # %entry
|
||||
; CHECK-NEXT: mr r3, r8
|
||||
; CHECK-NEXT: mr r4, r9
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; PWR7-LABEL: cas_weak_release_monotonic:
|
||||
; PWR7: # %bb.0: # %entry
|
||||
; PWR7-NEXT: mflr r0
|
||||
; PWR7-NEXT: std r0, 16(r1)
|
||||
; PWR7-NEXT: stdu r1, -112(r1)
|
||||
; PWR7-NEXT: .cfi_def_cfa_offset 112
|
||||
; PWR7-NEXT: .cfi_offset lr, 16
|
||||
; PWR7-NEXT: lwsync
|
||||
; PWR7-NEXT: bl __sync_val_compare_and_swap_16
|
||||
; PWR7-NEXT: nop
|
||||
; PWR7-NEXT: addi r1, r1, 112
|
||||
; PWR7-NEXT: ld r0, 16(r1)
|
||||
; PWR7-NEXT: mtlr r0
|
||||
; PWR7-NEXT: blr
|
||||
entry:
|
||||
%0 = cmpxchg weak i128* %a, i128 %cmp, i128 %new release monotonic
|
||||
%1 = extractvalue { i128, i1 } %0, 0
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
define i128 @cas_sc_sc(i128* %a, i128 %cmp, i128 %new) {
|
||||
; CHECK-LABEL: cas_sc_sc:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: sync
|
||||
; CHECK-NEXT: .LBB9_1: # %entry
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: lqarx r8, 0, r3
|
||||
; CHECK-NEXT: xor r11, r9, r5
|
||||
; CHECK-NEXT: xor r10, r8, r4
|
||||
; CHECK-NEXT: or. r11, r11, r10
|
||||
; CHECK-NEXT: bne cr0, .LBB9_3
|
||||
; CHECK-NEXT: # %bb.2: # %entry
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: mr r11, r7
|
||||
; CHECK-NEXT: mr r10, r6
|
||||
; CHECK-NEXT: stqcx. r10, 0, r3
|
||||
; CHECK-NEXT: bne cr0, .LBB9_1
|
||||
; CHECK-NEXT: b .LBB9_4
|
||||
; CHECK-NEXT: .LBB9_3: # %entry
|
||||
; CHECK-NEXT: stqcx. r8, 0, r3
|
||||
; CHECK-NEXT: .LBB9_4: # %entry
|
||||
; CHECK-NEXT: lwsync
|
||||
; CHECK-NEXT: mr r3, r8
|
||||
; CHECK-NEXT: mr r4, r9
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; PWR7-LABEL: cas_sc_sc:
|
||||
; PWR7: # %bb.0: # %entry
|
||||
; PWR7-NEXT: mflr r0
|
||||
; PWR7-NEXT: std r0, 16(r1)
|
||||
; PWR7-NEXT: stdu r1, -112(r1)
|
||||
; PWR7-NEXT: .cfi_def_cfa_offset 112
|
||||
; PWR7-NEXT: .cfi_offset lr, 16
|
||||
; PWR7-NEXT: sync
|
||||
; PWR7-NEXT: bl __sync_val_compare_and_swap_16
|
||||
; PWR7-NEXT: nop
|
||||
; PWR7-NEXT: lwsync
|
||||
; PWR7-NEXT: addi r1, r1, 112
|
||||
; PWR7-NEXT: ld r0, 16(r1)
|
||||
; PWR7-NEXT: mtlr r0
|
||||
; PWR7-NEXT: blr
|
||||
entry:
|
||||
%0 = cmpxchg i128* %a, i128 %cmp, i128 %new seq_cst seq_cst
|
||||
%1 = extractvalue { i128, i1 } %0, 0
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
define i128 @cas_acqrel_acquire(i128* %a, i128 %cmp, i128 %new) {
|
||||
; CHECK-LABEL: cas_acqrel_acquire:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lwsync
|
||||
; CHECK-NEXT: .LBB10_1: # %entry
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: lqarx r8, 0, r3
|
||||
; CHECK-NEXT: xor r11, r9, r5
|
||||
; CHECK-NEXT: xor r10, r8, r4
|
||||
; CHECK-NEXT: or. r11, r11, r10
|
||||
; CHECK-NEXT: bne cr0, .LBB10_3
|
||||
; CHECK-NEXT: # %bb.2: # %entry
|
||||
; CHECK-NEXT: #
|
||||
; CHECK-NEXT: mr r11, r7
|
||||
; CHECK-NEXT: mr r10, r6
|
||||
; CHECK-NEXT: stqcx. r10, 0, r3
|
||||
; CHECK-NEXT: bne cr0, .LBB10_1
|
||||
; CHECK-NEXT: b .LBB10_4
|
||||
; CHECK-NEXT: .LBB10_3: # %entry
|
||||
; CHECK-NEXT: stqcx. r8, 0, r3
|
||||
; CHECK-NEXT: .LBB10_4: # %entry
|
||||
; CHECK-NEXT: lwsync
|
||||
; CHECK-NEXT: mr r3, r8
|
||||
; CHECK-NEXT: mr r4, r9
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; PWR7-LABEL: cas_acqrel_acquire:
|
||||
; PWR7: # %bb.0: # %entry
|
||||
; PWR7-NEXT: mflr r0
|
||||
; PWR7-NEXT: std r0, 16(r1)
|
||||
; PWR7-NEXT: stdu r1, -112(r1)
|
||||
; PWR7-NEXT: .cfi_def_cfa_offset 112
|
||||
; PWR7-NEXT: .cfi_offset lr, 16
|
||||
; PWR7-NEXT: lwsync
|
||||
; PWR7-NEXT: bl __sync_val_compare_and_swap_16
|
||||
; PWR7-NEXT: nop
|
||||
; PWR7-NEXT: lwsync
|
||||
; PWR7-NEXT: addi r1, r1, 112
|
||||
; PWR7-NEXT: ld r0, 16(r1)
|
||||
; PWR7-NEXT: mtlr r0
|
||||
; PWR7-NEXT: blr
|
||||
entry:
|
||||
%0 = cmpxchg i128* %a, i128 %cmp, i128 %new acq_rel acquire
|
||||
%1 = extractvalue { i128, i1 } %0, 0
|
||||
ret i128 %1
|
||||
}
|
Loading…
Reference in New Issue