AMDGPU/GlobalISel: Handle atomic_inc/atomic_dec

The intermediate instruction drops the extra volatile argument. We are
missing an atomic ordering on these.
This commit is contained in:
Matt Arsenault 2020-01-17 20:51:01 -05:00 committed by Matt Arsenault
parent 9c928649a0
commit a722cbf77c
10 changed files with 3829 additions and 164 deletions

View File

@ -119,6 +119,12 @@ def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd_glue>;
def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
// FIXME: Check MMO is atomic
def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, SIatomic_inc>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, SIatomic_dec>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, atomic_inc_glue>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, atomic_dec_glue>;
class GISelSop2Pat <
SDPatternOperator node,

View File

@ -1956,6 +1956,10 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return selectG_PTR_MASK(I);
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
return selectG_EXTRACT_VECTOR_ELT(I);
case AMDGPU::G_AMDGPU_ATOMIC_INC:
case AMDGPU::G_AMDGPU_ATOMIC_DEC:
initM0(I);
return selectImpl(I, *CoverageInfo);
default:
return selectImpl(I, *CoverageInfo);
}

View File

@ -2340,6 +2340,22 @@ bool AMDGPULegalizerInfo::legalizeRawBufferStore(MachineInstr &MI,
return Ty == S32;
}
bool AMDGPULegalizerInfo::legalizeAtomicIncDec(MachineInstr &MI,
MachineIRBuilder &B,
bool IsInc) const {
B.setInstr(MI);
unsigned Opc = IsInc ? AMDGPU::G_AMDGPU_ATOMIC_INC :
AMDGPU::G_AMDGPU_ATOMIC_DEC;
B.buildInstr(Opc)
.addDef(MI.getOperand(0).getReg())
.addUse(MI.getOperand(2).getReg())
.addUse(MI.getOperand(3).getReg())
.cloneMemRefs(MI);
MI.eraseFromParent();
return true;
}
// FIMXE: Needs observer like custom
bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@ -2458,6 +2474,10 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
return legalizeRawBufferStore(MI, MRI, B, false);
case Intrinsic::amdgcn_raw_buffer_store_format:
return legalizeRawBufferStore(MI, MRI, B, true);
case Intrinsic::amdgcn_atomic_inc:
return legalizeAtomicIncDec(MI, B, true);
case Intrinsic::amdgcn_atomic_dec:
return legalizeAtomicIncDec(MI, B, false);
default:
return true;
}

View File

@ -106,6 +106,10 @@ public:
Register Reg) const;
bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, bool IsFormat) const;
bool legalizeAtomicIncDec(MachineInstr &MI, MachineIRBuilder &B,
bool IsInc) const;
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const override;

View File

@ -3183,8 +3183,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax:
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
return getDefaultMappingAllVGPR(MI);
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap: {
@ -3380,7 +3378,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_ATOMICRMW_UMIN:
case AMDGPU::G_ATOMICRMW_FADD:
case AMDGPU::G_ATOMIC_CMPXCHG:
case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG:
case AMDGPU::G_AMDGPU_ATOMIC_INC:
case AMDGPU::G_AMDGPU_ATOMIC_DEC: {
return getDefaultMappingAllVGPR(MI);
}
case AMDGPU::G_BRCOND: {

View File

@ -2150,8 +2150,13 @@ def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction {
// operands.
def G_AMDGPU_ATOMIC_CMPXCHG : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$oldval);
let InOperandList = (ins ptype1:$addr, type0:$cmpval_nnenwval);
let InOperandList = (ins ptype1:$addr, type0:$cmpval_newval);
let hasSideEffects = 0;
let mayLoad = 1;
let mayStore = 1;
}
let Namespace = "AMDGPU" in {
def G_AMDGPU_ATOMIC_INC : G_ATOMICRMW_OP;
def G_AMDGPU_ATOMIC_DEC : G_ATOMICRMW_OP;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,80 +0,0 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s
---
name: atomic_dec_p3_ss
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1
; CHECK-LABEL: name: atomic_dec_p3_ss
; CHECK: liveins: $sgpr0, $sgpr1
; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0
%0:_(p3) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), %0, %1, 0, 0, 0
...
---
name: atomic_dec_p3_vs
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0
; CHECK-LABEL: name: atomic_dec_p3_vs
; CHECK: liveins: $vgpr0, $sgpr0
; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0
%0:_(p3) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), %0, %1, 0, 0, 0
...
---
name: atomic_dec_p1_ss
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1, $sgpr2
; CHECK-LABEL: name: atomic_dec_p1_ss
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1)
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), [[COPY2]](p1), [[COPY3]](s32), 0, 0, 0
%0:_(p1) = COPY $sgpr0_sgpr1
%1:_(s32) = COPY $sgpr2
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), %0, %1, 0, 0, 0
...
---
name: atomic_dec_p1_vs
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1, $sgpr2
; CHECK-LABEL: name: atomic_dec_p1_vs
; CHECK: liveins: $vgpr0_vgpr1, $sgpr2
; CHECK: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), [[COPY]](p1), [[COPY2]](s32), 0, 0, 0
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $sgpr2
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), %0, %1, 0, 0, 0
...

View File

@ -1,80 +0,0 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s
---
name: atomic_inc_p3_ss
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1
; CHECK-LABEL: name: atomic_inc_p3_ss
; CHECK: liveins: $sgpr0, $sgpr1
; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0
%0:_(p3) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), %0, %1, 0, 0, 0
...
---
name: atomic_inc_p3_vs
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0
; CHECK-LABEL: name: atomic_inc_p3_vs
; CHECK: liveins: $vgpr0, $sgpr0
; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0
%0:_(p3) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), %0, %1, 0, 0, 0
...
---
name: atomic_inc_p1_ss
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1, $sgpr2
; CHECK-LABEL: name: atomic_inc_p1_ss
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1)
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), [[COPY2]](p1), [[COPY3]](s32), 0, 0, 0
%0:_(p1) = COPY $sgpr0_sgpr1
%1:_(s32) = COPY $sgpr2
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), %0, %1, 0, 0, 0
...
---
name: atomic_inc_p1_vs
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1, $sgpr2
; CHECK-LABEL: name: atomic_inc_p1_vs
; CHECK: liveins: $vgpr0_vgpr1, $sgpr2
; CHECK: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), [[COPY]](p1), [[COPY2]](s32), 0, 0, 0
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $sgpr2
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), %0, %1, 0, 0, 0
...