forked from OSchip/llvm-project
AMDGPU/GlobalISel: Select global MUBUF atomicrmw
This commit is contained in:
parent
9c903d0373
commit
49e424e08e
|
@ -100,6 +100,14 @@ def gi_mubuf_offset :
|
|||
GIComplexOperandMatcher<s64, "selectMUBUFOffset">,
|
||||
GIComplexPatternEquiv<MUBUFOffset>;
|
||||
|
||||
def gi_mubuf_addr64_atomic :
|
||||
GIComplexOperandMatcher<s64, "selectMUBUFAddr64Atomic">,
|
||||
GIComplexPatternEquiv<MUBUFAddr64Atomic>;
|
||||
|
||||
def gi_mubuf_offset_atomic :
|
||||
GIComplexOperandMatcher<s64, "selectMUBUFOffsetAtomic">,
|
||||
GIComplexPatternEquiv<MUBUFOffsetAtomic>;
|
||||
|
||||
// Separate load nodes are defined to glue m0 initialization in
|
||||
// SelectionDAG. The GISel selector can just insert m0 initialization
|
||||
// directly before before selecting a glue-less load, so hide this
|
||||
|
|
|
@ -2722,6 +2722,62 @@ AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
|
|||
}};
|
||||
}
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectMUBUFAddr64Atomic(MachineOperand &Root) const {
|
||||
Register VAddr;
|
||||
Register RSrcReg;
|
||||
Register SOffset;
|
||||
int64_t Offset = 0;
|
||||
|
||||
if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset, Offset))
|
||||
return {};
|
||||
|
||||
// FIXME: Use defaulted operands for trailing 0s and remove from the complex
|
||||
// pattern.
|
||||
return {{
|
||||
[=](MachineInstrBuilder &MIB) { // rsrc
|
||||
MIB.addReg(RSrcReg);
|
||||
},
|
||||
[=](MachineInstrBuilder &MIB) { // vaddr
|
||||
MIB.addReg(VAddr);
|
||||
},
|
||||
[=](MachineInstrBuilder &MIB) { // soffset
|
||||
if (SOffset)
|
||||
MIB.addReg(SOffset);
|
||||
else
|
||||
MIB.addImm(0);
|
||||
},
|
||||
[=](MachineInstrBuilder &MIB) { // offset
|
||||
MIB.addImm(Offset);
|
||||
},
|
||||
addZeroImm // slc
|
||||
}};
|
||||
}
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectMUBUFOffsetAtomic(MachineOperand &Root) const {
|
||||
Register RSrcReg;
|
||||
Register SOffset;
|
||||
int64_t Offset = 0;
|
||||
|
||||
if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset, Offset))
|
||||
return {};
|
||||
|
||||
return {{
|
||||
[=](MachineInstrBuilder &MIB) { // rsrc
|
||||
MIB.addReg(RSrcReg);
|
||||
},
|
||||
[=](MachineInstrBuilder &MIB) { // soffset
|
||||
if (SOffset)
|
||||
MIB.addReg(SOffset);
|
||||
else
|
||||
MIB.addImm(0);
|
||||
},
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, // offset
|
||||
addZeroImm // slc
|
||||
}};
|
||||
}
|
||||
|
||||
void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
|
||||
const MachineInstr &MI,
|
||||
int OpIdx) const {
|
||||
|
|
|
@ -214,6 +214,12 @@ private:
|
|||
InstructionSelector::ComplexRendererFns
|
||||
selectMUBUFOffset(MachineOperand &Root) const;
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
selectMUBUFOffsetAtomic(MachineOperand &Root) const;
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
selectMUBUFAddr64Atomic(MachineOperand &Root) const;
|
||||
|
||||
void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx = -1) const;
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
|
||||
|
@ -16,10 +16,15 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; GFX6: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[ATOMICRMW_ADD]](s32)
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
|
||||
; GFX7-LABEL: name: global_atomicrmw_add_s32
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -57,9 +62,14 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_nortn
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; GFX6: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-LABEL: name: global_atomicrmw_add_s32_nortn
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -93,12 +103,15 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2047
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 2047
|
||||
; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; GFX6: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_ADD [[PTR_ADD]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[ATOMICRMW_ADD]](s32)
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
|
||||
; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2047
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -148,11 +161,14 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 2047
|
||||
; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; GFX6: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[PTR_ADD]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -198,12 +214,15 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2048
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 2048
|
||||
; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; GFX6: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_ADD [[PTR_ADD]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[ATOMICRMW_ADD]](s32)
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
|
||||
; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2048
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -263,11 +282,14 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 2048
|
||||
; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; GFX6: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[PTR_ADD]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -323,12 +345,15 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4095
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 4095
|
||||
; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; GFX6: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_ADD [[PTR_ADD]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[ATOMICRMW_ADD]](s32)
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
|
||||
; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4095
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -388,11 +413,14 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 4095
|
||||
; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; GFX6: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[PTR_ADD]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -448,12 +476,16 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4097
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 4097
|
||||
; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; GFX6: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr_32(s32) = G_ATOMICRMW_ADD [[PTR_ADD]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[ATOMICRMW_ADD]](s32)
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
|
||||
; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4097
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -523,11 +555,15 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 4097
|
||||
; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; GFX6: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s32) = G_ATOMICRMW_ADD [[PTR_ADD]](p1), [[COPY1]] :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -593,10 +629,15 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s64
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; GFX6: [[ATOMICRMW_ADD:%[0-9]+]]:vreg_64(s64) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[ATOMICRMW_ADD]](s64)
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]]
|
||||
; GFX7-LABEL: name: global_atomicrmw_add_s64
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -634,9 +675,14 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s64_nortn
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; GFX6: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[COPY]](p1), [[COPY1]] :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7-LABEL: name: global_atomicrmw_add_s64_nortn
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -670,12 +716,15 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s64_offset4095
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 4095
|
||||
; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; GFX6: [[ATOMICRMW_ADD:%[0-9]+]]:vreg_64(s64) = G_ATOMICRMW_ADD [[PTR_ADD]](p1), [[COPY1]] :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[ATOMICRMW_ADD]](s64)
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]]
|
||||
; GFX7-LABEL: name: global_atomicrmw_add_s64_offset4095
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -735,11 +784,14 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 4095
|
||||
; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; GFX6: [[ATOMICRMW_ADD:%[0-9]+]]:vgpr(s64) = G_ATOMICRMW_ADD [[PTR_ADD]](p1), [[COPY1]] :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
|
|
@ -917,9 +917,185 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(float addrspa
|
|||
ret float %val
|
||||
}
|
||||
|
||||
; define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) {
|
||||
; %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
|
||||
; %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
|
||||
; %cast = bitcast i32 %result to float
|
||||
; ret float %cast
|
||||
; }
|
||||
define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) {
|
||||
; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4095:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b32 s0, s2
|
||||
; GFX6-NEXT: s_mov_b32 s1, s3
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, 2
|
||||
; GFX6-NEXT: s_mov_b32 s2, -1
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_add v0, off, s[0:3], s4 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4095:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, 2
|
||||
; GFX7-NEXT: s_mov_b32 s2, -1
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_atomic_add v0, off, s[0:3], s4 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
|
||||
%result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
|
||||
%cast = bitcast i32 %result to float
|
||||
ret float %cast
|
||||
}
|
||||
|
||||
define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) {
|
||||
; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b32 s4, 0
|
||||
; GFX6-NEXT: s_mov_b32 s5, 4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX6-NEXT: s_mov_b32 s0, s2
|
||||
; GFX6-NEXT: s_mov_b32 s1, s3
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, 2
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_mov_b32 s2, s4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, s5
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b32 s4, 0
|
||||
; GFX7-NEXT: s_mov_b32 s5, 4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, 2
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, s5
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
|
||||
%result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
|
||||
%cast = bitcast i32 %result to float
|
||||
ret float %cast
|
||||
}
|
||||
|
||||
define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) {
|
||||
; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4095:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, 2
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_mov_b64 s[0:1], 0
|
||||
; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX7-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4095:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, 2
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b64 s[0:1], 0
|
||||
; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], s4 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
|
||||
%result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
|
||||
%cast = bitcast i32 %result to float
|
||||
ret float %cast
|
||||
}
|
||||
|
||||
define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) {
|
||||
; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b32 s0, 0
|
||||
; GFX6-NEXT: s_mov_b32 s1, 4
|
||||
; GFX6-NEXT: v_mov_b32_e32 v2, 2
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_mov_b32 s2, s0
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX7-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b32 s0, 0
|
||||
; GFX7-NEXT: s_mov_b32 s1, 4
|
||||
; GFX7-NEXT: v_mov_b32_e32 v2, 2
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_mov_b32 s2, s0
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_atomic_add v2, v[0:1], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
|
||||
%result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
|
||||
%cast = bitcast i32 %result to float
|
||||
ret float %cast
|
||||
}
|
||||
|
||||
define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
|
||||
; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
||||
; GFX6-NEXT: v_lshl_b64 v[1:2], v[0:1], 2
|
||||
; GFX6-NEXT: s_mov_b32 s0, s2
|
||||
; GFX6-NEXT: s_mov_b32 s1, s3
|
||||
; GFX6-NEXT: v_mov_b32_e32 v0, 2
|
||||
; GFX6-NEXT: s_mov_b32 s2, 0
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX6-NEXT: buffer_wbinvl1
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX7-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
|
||||
; GFX7-NEXT: v_lshl_b64 v[1:2], v[0:1], 2
|
||||
; GFX7-NEXT: s_mov_b32 s0, s2
|
||||
; GFX7-NEXT: s_mov_b32 s1, s3
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, 2
|
||||
; GFX7-NEXT: s_mov_b32 s2, 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_atomic_add v0, v[1:2], s[0:3], 0 addr64 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX7-NEXT: buffer_wbinvl1
|
||||
; GFX7-NEXT: ; return to shader part epilog
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset
|
||||
%result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
|
||||
%cast = bitcast i32 %result to float
|
||||
ret float %cast
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue