forked from OSchip/llvm-project
AMDGPU/GlobalISel: Handle atomic_inc/atomic_dec
The intermediate instruction drops the extra volatile argument. We are missing an atomic ordering on these.
This commit is contained in:
parent
9c928649a0
commit
a722cbf77c
llvm
|
@ -119,6 +119,12 @@ def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd_glue>;
|
|||
def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32>;
|
||||
def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
|
||||
|
||||
// FIXME: Check MMO is atomic
|
||||
def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, SIatomic_inc>;
|
||||
def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, SIatomic_dec>;
|
||||
def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, atomic_inc_glue>;
|
||||
def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, atomic_dec_glue>;
|
||||
|
||||
|
||||
class GISelSop2Pat <
|
||||
SDPatternOperator node,
|
||||
|
|
|
@ -1956,6 +1956,10 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
|
|||
return selectG_PTR_MASK(I);
|
||||
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
|
||||
return selectG_EXTRACT_VECTOR_ELT(I);
|
||||
case AMDGPU::G_AMDGPU_ATOMIC_INC:
|
||||
case AMDGPU::G_AMDGPU_ATOMIC_DEC:
|
||||
initM0(I);
|
||||
return selectImpl(I, *CoverageInfo);
|
||||
default:
|
||||
return selectImpl(I, *CoverageInfo);
|
||||
}
|
||||
|
|
|
@ -2340,6 +2340,22 @@ bool AMDGPULegalizerInfo::legalizeRawBufferStore(MachineInstr &MI,
|
|||
return Ty == S32;
|
||||
}
|
||||
|
||||
bool AMDGPULegalizerInfo::legalizeAtomicIncDec(MachineInstr &MI,
|
||||
MachineIRBuilder &B,
|
||||
bool IsInc) const {
|
||||
B.setInstr(MI);
|
||||
unsigned Opc = IsInc ? AMDGPU::G_AMDGPU_ATOMIC_INC :
|
||||
AMDGPU::G_AMDGPU_ATOMIC_DEC;
|
||||
B.buildInstr(Opc)
|
||||
.addDef(MI.getOperand(0).getReg())
|
||||
.addUse(MI.getOperand(2).getReg())
|
||||
.addUse(MI.getOperand(3).getReg())
|
||||
.cloneMemRefs(MI);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
// FIMXE: Needs observer like custom
|
||||
bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B) const {
|
||||
|
@ -2458,6 +2474,10 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
|
|||
return legalizeRawBufferStore(MI, MRI, B, false);
|
||||
case Intrinsic::amdgcn_raw_buffer_store_format:
|
||||
return legalizeRawBufferStore(MI, MRI, B, true);
|
||||
case Intrinsic::amdgcn_atomic_inc:
|
||||
return legalizeAtomicIncDec(MI, B, true);
|
||||
case Intrinsic::amdgcn_atomic_dec:
|
||||
return legalizeAtomicIncDec(MI, B, false);
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -106,6 +106,10 @@ public:
|
|||
Register Reg) const;
|
||||
bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B, bool IsFormat) const;
|
||||
|
||||
bool legalizeAtomicIncDec(MachineInstr &MI, MachineIRBuilder &B,
|
||||
bool IsInc) const;
|
||||
|
||||
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B) const override;
|
||||
|
||||
|
|
|
@ -3183,8 +3183,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
case Intrinsic::amdgcn_ds_fadd:
|
||||
case Intrinsic::amdgcn_ds_fmin:
|
||||
case Intrinsic::amdgcn_ds_fmax:
|
||||
case Intrinsic::amdgcn_atomic_inc:
|
||||
case Intrinsic::amdgcn_atomic_dec:
|
||||
return getDefaultMappingAllVGPR(MI);
|
||||
case Intrinsic::amdgcn_ds_ordered_add:
|
||||
case Intrinsic::amdgcn_ds_ordered_swap: {
|
||||
|
@ -3380,7 +3378,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
|||
case AMDGPU::G_ATOMICRMW_UMIN:
|
||||
case AMDGPU::G_ATOMICRMW_FADD:
|
||||
case AMDGPU::G_ATOMIC_CMPXCHG:
|
||||
case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
|
||||
case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG:
|
||||
case AMDGPU::G_AMDGPU_ATOMIC_INC:
|
||||
case AMDGPU::G_AMDGPU_ATOMIC_DEC: {
|
||||
return getDefaultMappingAllVGPR(MI);
|
||||
}
|
||||
case AMDGPU::G_BRCOND: {
|
||||
|
|
|
@ -2150,8 +2150,13 @@ def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction {
|
|||
// operands.
|
||||
def G_AMDGPU_ATOMIC_CMPXCHG : AMDGPUGenericInstruction {
|
||||
let OutOperandList = (outs type0:$oldval);
|
||||
let InOperandList = (ins ptype1:$addr, type0:$cmpval_nnenwval);
|
||||
let InOperandList = (ins ptype1:$addr, type0:$cmpval_newval);
|
||||
let hasSideEffects = 0;
|
||||
let mayLoad = 1;
|
||||
let mayStore = 1;
|
||||
}
|
||||
|
||||
let Namespace = "AMDGPU" in {
|
||||
def G_AMDGPU_ATOMIC_INC : G_ATOMICRMW_OP;
|
||||
def G_AMDGPU_ATOMIC_DEC : G_ATOMICRMW_OP;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,80 +0,0 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: atomic_dec_p3_ss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
; CHECK-LABEL: name: atomic_dec_p3_ss
|
||||
; CHECK: liveins: $sgpr0, $sgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0
|
||||
%0:_(p3) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), %0, %1, 0, 0, 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: atomic_dec_p3_vs
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $sgpr0
|
||||
; CHECK-LABEL: name: atomic_dec_p3_vs
|
||||
; CHECK: liveins: $vgpr0, $sgpr0
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0
|
||||
%0:_(p3) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $sgpr0
|
||||
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), %0, %1, 0, 0, 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: atomic_dec_p1_ss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1, $sgpr2
|
||||
; CHECK-LABEL: name: atomic_dec_p1_ss
|
||||
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), [[COPY2]](p1), [[COPY3]](s32), 0, 0, 0
|
||||
%0:_(p1) = COPY $sgpr0_sgpr1
|
||||
%1:_(s32) = COPY $sgpr2
|
||||
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), %0, %1, 0, 0, 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: atomic_dec_p1_vs
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $sgpr2
|
||||
; CHECK-LABEL: name: atomic_dec_p1_vs
|
||||
; CHECK: liveins: $vgpr0_vgpr1, $sgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), [[COPY]](p1), [[COPY2]](s32), 0, 0, 0
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s32) = COPY $sgpr2
|
||||
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.dec), %0, %1, 0, 0, 0
|
||||
...
|
|
@ -1,80 +0,0 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
---
|
||||
name: atomic_inc_p3_ss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
; CHECK-LABEL: name: atomic_inc_p3_ss
|
||||
; CHECK: liveins: $sgpr0, $sgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0
|
||||
%0:_(p3) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), %0, %1, 0, 0, 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: atomic_inc_p3_vs
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $sgpr0
|
||||
; CHECK-LABEL: name: atomic_inc_p3_vs
|
||||
; CHECK: liveins: $vgpr0, $sgpr0
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0
|
||||
%0:_(p3) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $sgpr0
|
||||
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), %0, %1, 0, 0, 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: atomic_inc_p1_ss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1, $sgpr2
|
||||
; CHECK-LABEL: name: atomic_inc_p1_ss
|
||||
; CHECK: liveins: $sgpr0_sgpr1, $sgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1)
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), [[COPY2]](p1), [[COPY3]](s32), 0, 0, 0
|
||||
%0:_(p1) = COPY $sgpr0_sgpr1
|
||||
%1:_(s32) = COPY $sgpr2
|
||||
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), %0, %1, 0, 0, 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: atomic_inc_p1_vs
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1, $sgpr2
|
||||
; CHECK-LABEL: name: atomic_inc_p1_vs
|
||||
; CHECK: liveins: $vgpr0_vgpr1, $sgpr2
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), [[COPY]](p1), [[COPY2]](s32), 0, 0, 0
|
||||
%0:_(p1) = COPY $vgpr0_vgpr1
|
||||
%1:_(s32) = COPY $sgpr2
|
||||
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.atomic.inc), %0, %1, 0, 0, 0
|
||||
...
|
Loading…
Reference in New Issue