forked from OSchip/llvm-project
AMDGPU/GlobalISel: Fix selection of private stores
llvm-svn: 366249
This commit is contained in:
parent
7161fb0be5
commit
2d10407719
|
@ -1568,17 +1568,18 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, i16, store_global>;
|
|||
|
||||
multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
|
||||
MUBUF_Pseudo InstrOffset,
|
||||
ValueType vt, PatFrag st> {
|
||||
ValueType vt, PatFrag st,
|
||||
RegisterClass rc = VGPR_32> {
|
||||
def : GCNPat <
|
||||
(st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
|
||||
i32:$soffset, u16imm:$offset)),
|
||||
(InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
|
||||
(InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
|
||||
u16imm:$offset)),
|
||||
(InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0, 0)
|
||||
(InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0)
|
||||
>;
|
||||
}
|
||||
|
||||
|
@ -1587,9 +1588,9 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET
|
|||
defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, i32, store_private>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OFFSET, v3i32, store_private>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private, VReg_64>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OFFSET, v3i32, store_private, VReg_96>;
|
||||
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private, VReg_128>;
|
||||
|
||||
|
||||
let OtherPredicates = [D16PreservesUnusedBits] in {
|
||||
|
|
|
@ -0,0 +1,280 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
|
||||
name: store_private_s32_to_4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; GFX6-LABEL: name: store_private_s32_to_4
|
||||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_s32_to_4
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: store_private_s32_to_2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; GFX6-LABEL: name: store_private_s32_to_2
|
||||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_s32_to_2
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 2, align 2, addrspace 5)
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: store_private_s32_to_1
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; GFX6-LABEL: name: store_private_s32_to_1
|
||||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_s32_to_1
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 1, align 1, addrspace 5)
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: store_private_v2s16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; GFX6-LABEL: name: store_private_v2s16
|
||||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
|
||||
; GFX6: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p5) :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_v2s16
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
|
||||
; GFX9: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p5) :: (store 4, addrspace 5)
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: store_private_p3
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; GFX6-LABEL: name: store_private_p3
|
||||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
|
||||
; GFX6: G_STORE [[COPY]](p3), [[COPY1]](p5) :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_p3
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
|
||||
; GFX9: G_STORE [[COPY]](p3), [[COPY1]](p5) :: (store 4, addrspace 5)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: store_private_p5
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
; GFX6-LABEL: name: store_private_p5
|
||||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
|
||||
; GFX6: G_STORE [[COPY]](p5), [[COPY1]](p5) :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_p5
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
|
||||
; GFX9: G_STORE [[COPY]](p5), [[COPY1]](p5) :: (store 4, addrspace 5)
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: store_private_s32_to_1_fi_offset_4095
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
stack:
|
||||
- { id: 0, size: 4096, alignment: 4 }
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
; GFX6-LABEL: name: store_private_s32_to_1_fi_offset_4095
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
|
||||
; GFX6: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_2]], %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_s32_to_1_fi_offset_4095
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4095
|
||||
%2:vgpr(p5) = G_GEP %0, %1
|
||||
%3:vgpr(s32) = G_CONSTANT i32 0
|
||||
G_STORE %3, %2 :: (store 1, align 1, addrspace 5)
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: store_private_s32_to_1_constant_4095
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
stack:
|
||||
- { id: 0, size: 4096, alignment: 4 }
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
; GFX6-LABEL: name: store_private_s32_to_1_constant_4095
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_s32_to_1_constant_4095
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4095
|
||||
%1:vgpr(s32) = G_CONSTANT i32 0
|
||||
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: store_private_s32_to_1_constant_4096
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
scratchWaveOffsetReg: $sgpr4
|
||||
stackPtrOffsetReg: $sgpr32
|
||||
stack:
|
||||
- { id: 0, size: 4096, alignment: 4 }
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
|
||||
; GFX6-LABEL: name: store_private_s32_to_1_constant_4096
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_s32_to_1_constant_4096
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4096
|
||||
%1:vgpr(s32) = G_CONSTANT i32 0
|
||||
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
|
||||
|
||||
...
|
Loading…
Reference in New Issue