forked from OSchip/llvm-project
AMDGPU/GlobalISel: Legalize llvm.amdgcn.s.buffer.load
The 96-bit results need to be widened. I find the interaction between LegalizerHelper and MIRBuilder somewhat awkward. The custom legalization is called by the LegalizerHelper, but then does not have access to the helper. You have to construct a new helper, which then does not own the MachineIRBuilder, but does modify it. Maybe custom legalization should be passed the helper?
This commit is contained in:
parent
748bb5a0f1
commit
69cc9f3046
|
@ -42,6 +42,20 @@ using namespace LegalizeMutations;
|
|||
using namespace LegalityPredicates;
|
||||
using namespace MIPatternMatch;
|
||||
|
||||
// Round the number of elements to the next power of two elements
|
||||
static LLT getPow2VectorType(LLT Ty) {
|
||||
unsigned NElts = Ty.getNumElements();
|
||||
unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts);
|
||||
return Ty.changeNumElements(Pow2NElts);
|
||||
}
|
||||
|
||||
// Round the number of bits to the next power of two bits
|
||||
static LLT getPow2ScalarType(LLT Ty) {
|
||||
unsigned Bits = Ty.getSizeInBits();
|
||||
unsigned Pow2Bits = 1 << Log2_32_Ceil(Bits);
|
||||
return LLT::scalar(Pow2Bits);
|
||||
}
|
||||
|
||||
static LegalityPredicate isMultiple32(unsigned TypeIdx,
|
||||
unsigned MaxSize = 1024) {
|
||||
return [=](const LegalityQuery &Query) {
|
||||
|
@ -2959,6 +2973,33 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPULegalizerInfo::legalizeSBufferLoad(
|
||||
MachineInstr &MI, MachineIRBuilder &B,
|
||||
GISelChangeObserver &Observer) const {
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
LLT Ty = B.getMRI()->getType(Dst);
|
||||
unsigned Size = Ty.getSizeInBits();
|
||||
|
||||
// There are no 96-bit result scalar loads, but widening to 128-bit should
|
||||
// always be legal. We may need to restore this to a 96-bit result if it turns
|
||||
// out this needs to be converted to a vector load during RegBankSelect.
|
||||
if (isPowerOf2_32(Size))
|
||||
return true;
|
||||
|
||||
LegalizerHelper Helper(B.getMF(), *this, Observer, B);
|
||||
B.setInstr(MI);
|
||||
|
||||
Observer.changingInstr(MI);
|
||||
|
||||
if (Ty.isVector())
|
||||
Helper.moreElementsVectorDst(MI, getPow2VectorType(Ty), 0);
|
||||
else
|
||||
Helper.widenScalarDst(MI, getPow2ScalarType(Ty), 0);
|
||||
|
||||
Observer.changedInstr(MI);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
|
||||
MachineIRBuilder &B,
|
||||
GISelChangeObserver &Observer) const {
|
||||
|
@ -3075,6 +3116,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
|
|||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::amdgcn_s_buffer_load:
|
||||
return legalizeSBufferLoad(MI, B, Observer);
|
||||
case Intrinsic::amdgcn_raw_buffer_store:
|
||||
case Intrinsic::amdgcn_struct_buffer_store:
|
||||
return legalizeBufferStore(MI, MRI, B, false, false);
|
||||
|
|
|
@ -138,6 +138,10 @@ public:
|
|||
GISelChangeObserver &Observer,
|
||||
const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const;
|
||||
|
||||
bool legalizeSBufferLoad(
|
||||
MachineInstr &MI, MachineIRBuilder &B,
|
||||
GISelChangeObserver &Observer) const;
|
||||
|
||||
bool legalizeAtomicIncDec(MachineInstr &MI, MachineIRBuilder &B,
|
||||
bool IsInc) const;
|
||||
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -run-pass=legalizer %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
---
|
||||
name: s_buffer_load_v3s32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; GCN-LABEL: name: s_buffer_load_v3s32
|
||||
; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GCN: [[INT:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[COPY]](<4 x s32>), [[C]](s32), 0
|
||||
; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[INT]](<4 x s32>), 0
|
||||
; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s32>)
|
||||
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:_(s32) = G_CONSTANT i32 0
|
||||
%2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
|
||||
S_ENDPGM 0, implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: s_buffer_load_v3p3
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; GCN-LABEL: name: s_buffer_load_v3p3
|
||||
; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GCN: [[INT:%[0-9]+]]:_(<4 x p3>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[COPY]](<4 x s32>), [[C]](s32), 0
|
||||
; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x p3>) = G_EXTRACT [[INT]](<4 x p3>), 0
|
||||
; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<3 x p3>)
|
||||
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:_(s32) = G_CONSTANT i32 0
|
||||
%2:_(<3 x p3>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
|
||||
S_ENDPGM 0, implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: s_buffer_load_v6s16
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; GCN-LABEL: name: s_buffer_load_v6s16
|
||||
; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GCN: [[INT:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[COPY]](<4 x s32>), [[C]](s32), 0
|
||||
; GCN: [[EXTRACT:%[0-9]+]]:_(<6 x s16>) = G_EXTRACT [[INT]](<8 x s16>), 0
|
||||
; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<6 x s16>)
|
||||
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:_(s32) = G_CONSTANT i32 0
|
||||
%2:_(<6 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
|
||||
S_ENDPGM 0, implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: s_buffer_load_v6s32
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; GCN-LABEL: name: s_buffer_load_v6s32
|
||||
; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GCN: [[INT:%[0-9]+]]:_(<8 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[COPY]](<4 x s32>), [[C]](s32), 0
|
||||
; GCN: [[EXTRACT:%[0-9]+]]:_(<6 x s32>) = G_EXTRACT [[INT]](<8 x s32>), 0
|
||||
; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<6 x s32>)
|
||||
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:_(s32) = G_CONSTANT i32 0
|
||||
%2:_(<6 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
|
||||
S_ENDPGM 0, implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: s_buffer_load_v3s64
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; GCN-LABEL: name: s_buffer_load_v3s64
|
||||
; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GCN: [[INT:%[0-9]+]]:_(<4 x s64>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[COPY]](<4 x s32>), [[C]](s32), 0
|
||||
; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[INT]](<4 x s64>), 0
|
||||
; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s64>)
|
||||
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:_(s32) = G_CONSTANT i32 0
|
||||
%2:_(<3 x s64>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
|
||||
S_ENDPGM 0, implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: s_buffer_load_v12s8
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; GCN-LABEL: name: s_buffer_load_v12s8
|
||||
; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GCN: [[INT:%[0-9]+]]:_(<16 x s8>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[COPY]](<4 x s32>), [[C]](s32), 0
|
||||
; GCN: [[EXTRACT:%[0-9]+]]:_(<12 x s8>) = G_EXTRACT [[INT]](<16 x s8>), 0
|
||||
; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<12 x s8>)
|
||||
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:_(s32) = G_CONSTANT i32 0
|
||||
%2:_(<12 x s8>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
|
||||
S_ENDPGM 0, implicit %2
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: s_buffer_load_s96
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
||||
; GCN-LABEL: name: s_buffer_load_s96
|
||||
; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; GCN: [[INT:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), [[COPY]](<4 x s32>), [[C]](s32), 0
|
||||
; GCN: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[INT]](<4 x s32>), 0
|
||||
; GCN: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s32>)
|
||||
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:_(s32) = G_CONSTANT i32 0
|
||||
%2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0
|
||||
S_ENDPGM 0, implicit %2
|
||||
|
||||
...
|
Loading…
Reference in New Issue