forked from OSchip/llvm-project
[NVPTX] Enable AtomicExpandPass for NVPTX
This patch enables `AtomicExpandPass` for NVPTX. Depend on D125652. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D125639
This commit is contained in:
parent
ff60a0a364
commit
ecf5b78053
|
@ -589,6 +589,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
|
|||
// Now deduce the information based on the above mentioned
|
||||
// actions
|
||||
computeRegisterProperties(STI.getRegisterInfo());
|
||||
|
||||
setMinCmpXchgSizeInBits(32);
|
||||
}
|
||||
|
||||
const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
|
@ -5125,6 +5127,67 @@ void NVPTXTargetLowering::ReplaceNodeResults(
|
|||
}
|
||||
}
|
||||
|
||||
NVPTXTargetLowering::AtomicExpansionKind
|
||||
NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
|
||||
Type *Ty = AI->getValOperand()->getType();
|
||||
|
||||
if (AI->isFloatingPointOperation()) {
|
||||
if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {
|
||||
if (Ty->isFloatTy())
|
||||
return AtomicExpansionKind::None;
|
||||
if (Ty->isDoubleTy() && STI.hasAtomAddF64())
|
||||
return AtomicExpansionKind::None;
|
||||
}
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
}
|
||||
|
||||
assert(Ty->isIntegerTy() && "Ty should be integer at this point");
|
||||
auto ITy = cast<llvm::IntegerType>(Ty);
|
||||
|
||||
switch (AI->getOperation()) {
|
||||
default:
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
case AtomicRMWInst::BinOp::And:
|
||||
case AtomicRMWInst::BinOp::Or:
|
||||
case AtomicRMWInst::BinOp::Xor:
|
||||
case AtomicRMWInst::BinOp::Xchg:
|
||||
switch (ITy->getBitWidth()) {
|
||||
case 8:
|
||||
case 16:
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
case 32:
|
||||
return AtomicExpansionKind::None;
|
||||
case 64:
|
||||
if (STI.hasAtomBitwise64())
|
||||
return AtomicExpansionKind::None;
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
default:
|
||||
llvm_unreachable("unsupported width encountered");
|
||||
}
|
||||
case AtomicRMWInst::BinOp::Add:
|
||||
case AtomicRMWInst::BinOp::Sub:
|
||||
case AtomicRMWInst::BinOp::Max:
|
||||
case AtomicRMWInst::BinOp::Min:
|
||||
case AtomicRMWInst::BinOp::UMax:
|
||||
case AtomicRMWInst::BinOp::UMin:
|
||||
switch (ITy->getBitWidth()) {
|
||||
case 8:
|
||||
case 16:
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
case 32:
|
||||
return AtomicExpansionKind::None;
|
||||
case 64:
|
||||
if (STI.hasAtomMinMax64())
|
||||
return AtomicExpansionKind::None;
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
default:
|
||||
llvm_unreachable("unsupported width encountered");
|
||||
}
|
||||
}
|
||||
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
}
|
||||
|
||||
// Pin NVPTXTargetObjectFile's vtables to this file.
|
||||
NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default;
|
||||
|
||||
|
|
|
@ -561,6 +561,17 @@ public:
|
|||
// instruction, so we say that ctlz is cheap to speculate.
|
||||
bool isCheapToSpeculateCtlz() const override { return true; }
|
||||
|
||||
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override {
|
||||
return AtomicExpansionKind::None;
|
||||
}
|
||||
|
||||
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override {
|
||||
return AtomicExpansionKind::None;
|
||||
}
|
||||
|
||||
AtomicExpansionKind
|
||||
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
|
||||
|
||||
private:
|
||||
const NVPTXSubtarget &STI; // cache the subtarget here
|
||||
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
|
||||
|
|
|
@ -330,6 +330,8 @@ void NVPTXPassConfig::addIRPasses() {
|
|||
addStraightLineScalarOptimizationPasses();
|
||||
}
|
||||
|
||||
addPass(createAtomicExpandPass());
|
||||
|
||||
// === LSR and other generic IR passes ===
|
||||
TargetPassConfig::addIRPasses();
|
||||
// EarlyCSE is not always strong enough to clean up what LSR produces. For
|
||||
|
|
|
@ -0,0 +1,165 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefixes=ALL,SM30
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s --check-prefixes=ALL,SM60
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %}
|
||||
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify %}
|
||||
|
||||
; CHECK-LABEL: fadd_double
|
||||
define void @fadd_double(ptr %0, double %1) {
|
||||
entry:
|
||||
; SM30: atom.cas.b64
|
||||
; SM60: atom.add.f64
|
||||
%2 = atomicrmw fadd ptr %0, double %1 monotonic, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fadd_float
|
||||
define void @fadd_float(ptr %0, float %1) {
|
||||
entry:
|
||||
; ALL: atom.add.f32
|
||||
%2 = atomicrmw fadd ptr %0, float %1 monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: bitwise_i32
|
||||
define void @bitwise_i32(ptr %0, i32 %1) {
|
||||
entry:
|
||||
; ALL: atom.and.b32
|
||||
%2 = atomicrmw and ptr %0, i32 %1 monotonic, align 4
|
||||
; ALL: atom.or.b32
|
||||
%3 = atomicrmw or ptr %0, i32 %1 monotonic, align 4
|
||||
; ALL: atom.xor.b32
|
||||
%4 = atomicrmw xor ptr %0, i32 %1 monotonic, align 4
|
||||
; ALL: atom.exch.b32
|
||||
%5 = atomicrmw xchg ptr %0, i32 %1 monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: bitwise_i64
|
||||
define void @bitwise_i64(ptr %0, i64 %1) {
|
||||
entry:
|
||||
; SM30: atom.cas.b64
|
||||
; SM60: atom.and.b64
|
||||
%2 = atomicrmw and ptr %0, i64 %1 monotonic, align 8
|
||||
; SM30: atom.cas.b64
|
||||
; SM60: atom.or.b64
|
||||
%3 = atomicrmw or ptr %0, i64 %1 monotonic, align 8
|
||||
; SM30: atom.cas.b64
|
||||
; SM60: atom.xor.b64
|
||||
%4 = atomicrmw xor ptr %0, i64 %1 monotonic, align 8
|
||||
; SM30: atom.cas.b64
|
||||
; SM60: atom.exch.b64
|
||||
%5 = atomicrmw xchg ptr %0, i64 %1 monotonic, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: minmax_i32
|
||||
define void @minmax_i32(ptr %0, i32 %1) {
|
||||
entry:
|
||||
; ALL: atom.min.s32
|
||||
%2 = atomicrmw min ptr %0, i32 %1 monotonic, align 4
|
||||
; ALL: atom.max.s32
|
||||
%3 = atomicrmw max ptr %0, i32 %1 monotonic, align 4
|
||||
; ALL: atom.min.u32
|
||||
%4 = atomicrmw umin ptr %0, i32 %1 monotonic, align 4
|
||||
; ALL: atom.max.u32
|
||||
%5 = atomicrmw umax ptr %0, i32 %1 monotonic, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: minmax_i64
|
||||
define void @minmax_i64(ptr %0, i64 %1) {
|
||||
entry:
|
||||
; SM30: atom.cas.b64
|
||||
; SM60: atom.min.s64
|
||||
%2 = atomicrmw min ptr %0, i64 %1 monotonic, align 8
|
||||
; SM30: atom.cas.b64
|
||||
; SM60: atom.max.s64
|
||||
%3 = atomicrmw max ptr %0, i64 %1 monotonic, align 8
|
||||
; SM30: atom.cas.b64
|
||||
; SM60: atom.min.u64
|
||||
%4 = atomicrmw umin ptr %0, i64 %1 monotonic, align 8
|
||||
; SM30: atom.cas.b64
|
||||
; SM60: atom.max.u64
|
||||
%5 = atomicrmw umax ptr %0, i64 %1 monotonic, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: bitwise_i8
|
||||
define void @bitwise_i8(ptr %0, i8 %1) {
|
||||
entry:
|
||||
; ALL: atom.and.b32
|
||||
%2 = atomicrmw and ptr %0, i8 %1 monotonic, align 1
|
||||
; ALL: atom.or.b32
|
||||
%3 = atomicrmw or ptr %0, i8 %1 monotonic, align 1
|
||||
; ALL: atom.xor.b32
|
||||
%4 = atomicrmw xor ptr %0, i8 %1 monotonic, align 1
|
||||
; ALL: atom.cas.b32
|
||||
%5 = atomicrmw xchg ptr %0, i8 %1 monotonic, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: minmax_i8
|
||||
define void @minmax_i8(ptr %0, i8 %1) {
|
||||
entry:
|
||||
; ALL: atom.cas.b32
|
||||
%2 = atomicrmw min ptr %0, i8 %1 monotonic, align 1
|
||||
; ALL: atom.cas.b32
|
||||
%3 = atomicrmw max ptr %0, i8 %1 monotonic, align 1
|
||||
; ALL: atom.cas.b32
|
||||
%4 = atomicrmw umin ptr %0, i8 %1 monotonic, align 1
|
||||
; ALL: atom.cas.b32
|
||||
%5 = atomicrmw umax ptr %0, i8 %1 monotonic, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: bitwise_i16
|
||||
define void @bitwise_i16(ptr %0, i16 %1) {
|
||||
entry:
|
||||
; ALL: atom.and.b32
|
||||
%2 = atomicrmw and ptr %0, i16 %1 monotonic, align 2
|
||||
; ALL: atom.or.b32
|
||||
%3 = atomicrmw or ptr %0, i16 %1 monotonic, align 2
|
||||
; ALL: atom.xor.b32
|
||||
%4 = atomicrmw xor ptr %0, i16 %1 monotonic, align 2
|
||||
; ALL: atom.cas.b32
|
||||
%5 = atomicrmw xchg ptr %0, i16 %1 monotonic, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: minmax_i16
|
||||
define void @minmax_i16(ptr %0, i16 %1) {
|
||||
entry:
|
||||
; ALL: atom.cas.b32
|
||||
%2 = atomicrmw min ptr %0, i16 %1 monotonic, align 2
|
||||
; ALL: atom.cas.b32
|
||||
%3 = atomicrmw max ptr %0, i16 %1 monotonic, align 2
|
||||
; ALL: atom.cas.b32
|
||||
%4 = atomicrmw umin ptr %0, i16 %1 monotonic, align 2
|
||||
; ALL: atom.cas.b32
|
||||
%5 = atomicrmw umax ptr %0, i16 %1 monotonic, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; TODO: We might still want to test other types, such as i128. Currently the
|
||||
; backend doesn't support them. Atomic expand only supports expansion to cas of
|
||||
; the same bitwidth, which means even after expansion, the back end still
|
||||
; doesn't support the instruction. Here we still put the tests. Remove the
|
||||
; comment once we have proper support, either from atomic expand or backend.
|
||||
|
||||
; define void @bitwise_i128(ptr %0, i128 %1) {
|
||||
; entry:
|
||||
; %2 = atomicrmw and ptr %0, i128 %1 monotonic, align 16
|
||||
; %3 = atomicrmw or ptr %0, i128 %1 monotonic, align 16
|
||||
; %4 = atomicrmw xor ptr %0, i128 %1 monotonic, align 16
|
||||
; %5 = atomicrmw xchg ptr %0, i128 %1 monotonic, align 16
|
||||
; ret void
|
||||
; }
|
||||
|
||||
; define void @minmax_i128(ptr %0, i128 %1) {
|
||||
; entry:
|
||||
; %2 = atomicrmw min ptr %0, i128 %1 monotonic, align 16
|
||||
; %3 = atomicrmw max ptr %0, i128 %1 monotonic, align 16
|
||||
; %4 = atomicrmw umin ptr %0, i128 %1 monotonic, align 16
|
||||
; %5 = atomicrmw umax ptr %0, i128 %1 monotonic, align 16
|
||||
; ret void
|
||||
; }
|
Loading…
Reference in New Issue