forked from OSchip/llvm-project
AMDGPU/GlobalISel: Select scalar v2s16 G_BUILD_VECTOR
This commit is contained in:
parent
bc763c42bb
commit
e4464bf3d4
|
@ -100,6 +100,7 @@ def : GINodeEquiv<G_CTTZ_ZERO_UNDEF, cttz_zero_undef>;
|
|||
def : GINodeEquiv<G_CTPOP, ctpop>;
|
||||
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
|
||||
def : GINodeEquiv<G_CONCAT_VECTORS, concat_vectors>;
|
||||
def : GINodeEquiv<G_BUILD_VECTOR, build_vector>;
|
||||
def : GINodeEquiv<G_FCEIL, fceil>;
|
||||
def : GINodeEquiv<G_FCOS, fcos>;
|
||||
def : GINodeEquiv<G_FSIN, fsin>;
|
||||
|
|
|
@ -484,7 +484,7 @@ bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
|
|||
|
||||
const unsigned SrcSize = SrcTy.getSizeInBits();
|
||||
if (SrcSize < 32)
|
||||
return false;
|
||||
return selectImpl(MI, *CoverageInfo);
|
||||
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
|
||||
|
|
|
@ -158,20 +158,30 @@ def brtarget : Operand<OtherVT>;
|
|||
class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
|
||||
(ops node:$src0),
|
||||
(op $src0),
|
||||
[{ return N->hasOneUse(); }]
|
||||
>;
|
||||
[{ return N->hasOneUse(); }]> {
|
||||
|
||||
let GISelPredicateCode = [{
|
||||
return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
|
||||
}];
|
||||
}
|
||||
|
||||
class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
|
||||
(ops node:$src0, node:$src1),
|
||||
(op $src0, $src1),
|
||||
[{ return N->hasOneUse(); }]
|
||||
>;
|
||||
[{ return N->hasOneUse(); }]> {
|
||||
let GISelPredicateCode = [{
|
||||
return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
|
||||
}];
|
||||
}
|
||||
|
||||
class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
|
||||
(ops node:$src0, node:$src1, node:$src2),
|
||||
(op $src0, $src1, $src2),
|
||||
[{ return N->hasOneUse(); }]
|
||||
>;
|
||||
[{ return N->hasOneUse(); }]> {
|
||||
let GISelPredicateCode = [{
|
||||
return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
|
||||
}];
|
||||
}
|
||||
|
||||
let Properties = [SDNPCommutative, SDNPAssociative] in {
|
||||
def smax_oneuse : HasOneUseBinOp<smax>;
|
||||
|
|
|
@ -1793,54 +1793,59 @@ def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
|
|||
// COPY is workaround tablegen bug from multiple outputs
|
||||
// from S_LSHL_B32's multiple outputs from implicit scc def.
|
||||
def : GCNPat <
|
||||
(v2i16 (build_vector (i16 0), i16:$src1)),
|
||||
(v2i16 (COPY (S_LSHL_B32 i16:$src1, (i16 16))))
|
||||
(v2i16 (build_vector (i16 0), (i16 SReg_32:$src1))),
|
||||
(S_LSHL_B32 SReg_32:$src1, (i16 16))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(v2i16 (build_vector i16:$src0, (i16 undef))),
|
||||
(v2i16 (COPY $src0))
|
||||
(v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))),
|
||||
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(v2i16 (build_vector (i16 VGPR_32:$src0), (i16 undef))),
|
||||
(COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(v2f16 (build_vector f16:$src0, (f16 undef))),
|
||||
(v2f16 (COPY $src0))
|
||||
(COPY $src0)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(v2i16 (build_vector (i16 undef), i16:$src1)),
|
||||
(v2i16 (COPY (S_LSHL_B32 $src1, (i32 16))))
|
||||
(v2i16 (build_vector (i16 undef), (i16 SReg_32:$src1))),
|
||||
(S_LSHL_B32 SReg_32:$src1, (i32 16))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(v2f16 (build_vector (f16 undef), f16:$src1)),
|
||||
(v2f16 (COPY (S_LSHL_B32 $src1, (i32 16))))
|
||||
(v2f16 (build_vector (f16 undef), (f16 SReg_32:$src1))),
|
||||
(S_LSHL_B32 SReg_32:$src1, (i32 16))
|
||||
>;
|
||||
|
||||
let SubtargetPredicate = HasVOP3PInsts in {
|
||||
def : GCNPat <
|
||||
(v2i16 (build_vector i16:$src0, i16:$src1)),
|
||||
(v2i16 (S_PACK_LL_B32_B16 $src0, $src1))
|
||||
(v2i16 (build_vector (i16 SReg_32:$src0), (i16 SReg_32:$src1))),
|
||||
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
|
||||
>;
|
||||
|
||||
// With multiple uses of the shift, this will duplicate the shift and
|
||||
// increase register pressure.
|
||||
def : GCNPat <
|
||||
(v2i16 (build_vector i16:$src0, (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))),
|
||||
(v2i16 (S_PACK_LH_B32_B16 i16:$src0, i32:$src1))
|
||||
(v2i16 (build_vector (i16 SReg_32:$src0), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))),
|
||||
(v2i16 (S_PACK_LH_B32_B16 SReg_32:$src0, SReg_32:$src1))
|
||||
>;
|
||||
|
||||
|
||||
def : GCNPat <
|
||||
(v2i16 (build_vector (i16 (trunc (srl_oneuse i32:$src0, (i32 16)))),
|
||||
(i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))),
|
||||
(v2i16 (S_PACK_HH_B32_B16 $src0, $src1))
|
||||
(v2i16 (build_vector (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))),
|
||||
(i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))),
|
||||
(S_PACK_HH_B32_B16 SReg_32:$src0, SReg_32:$src1)
|
||||
>;
|
||||
|
||||
// TODO: Should source modifiers be matched to v_pack_b32_f16?
|
||||
def : GCNPat <
|
||||
(v2f16 (build_vector f16:$src0, f16:$src1)),
|
||||
(v2f16 (S_PACK_LL_B32_B16 $src0, $src1))
|
||||
(v2f16 (build_vector (f16 SReg_32:$src0), (f16 SReg_32:$src1))),
|
||||
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
|
||||
>;
|
||||
|
||||
} // End SubtargetPredicate = HasVOP3PInsts
|
||||
|
|
|
@ -0,0 +1,239 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: test_build_vector_s_v2s16_s_s16_s_s16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
|
||||
; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_s16
|
||||
; GFX9: liveins: $sgpr0, $sgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY1]]
|
||||
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
%1:sgpr(s32) = COPY $sgpr1
|
||||
|
||||
%2:sgpr(s16) = G_TRUNC %0
|
||||
%3:sgpr(s16) = G_TRUNC %1
|
||||
|
||||
%4:sgpr(<2 x s16>) = G_BUILD_VECTOR %2, %3
|
||||
S_ENDPGM 0, implicit %4
|
||||
...
|
||||
|
||||
---
|
||||
name: test_build_vector_s_pack_lh
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
|
||||
; GFX9-LABEL: name: test_build_vector_s_pack_lh
|
||||
; GFX9: liveins: $sgpr0, $sgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||
; GFX9: [[S_PACK_LH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LH_B32_B16 [[COPY]], [[COPY1]]
|
||||
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LH_B32_B16_]]
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
%1:sgpr(s32) = COPY $sgpr1
|
||||
|
||||
%2:sgpr(s32) = G_CONSTANT i32 16
|
||||
%3:sgpr(s32) = G_LSHR %1, %2
|
||||
|
||||
%4:sgpr(s16) = G_TRUNC %0
|
||||
%5:sgpr(s16) = G_TRUNC %3
|
||||
|
||||
%6:sgpr(<2 x s16>) = G_BUILD_VECTOR %4, %5
|
||||
S_ENDPGM 0, implicit %6
|
||||
...
|
||||
|
||||
# There is no s_pack_hl_b32
|
||||
---
|
||||
name: test_build_vector_s_pack_lh_swapped
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
|
||||
; GFX9-LABEL: name: test_build_vector_s_pack_lh_swapped
|
||||
; GFX9: liveins: $sgpr0, $sgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
|
||||
; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_LSHR_B32_]], [[COPY]]
|
||||
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
%1:sgpr(s32) = COPY $sgpr1
|
||||
|
||||
%2:sgpr(s32) = G_CONSTANT i32 16
|
||||
%3:sgpr(s32) = G_LSHR %1, %2
|
||||
|
||||
%4:sgpr(s16) = G_TRUNC %0
|
||||
%5:sgpr(s16) = G_TRUNC %3
|
||||
|
||||
%6:sgpr(<2 x s16>) = G_BUILD_VECTOR %5, %4
|
||||
S_ENDPGM 0, implicit %6
|
||||
...
|
||||
|
||||
---
|
||||
name: test_build_vector_s_pack_hh
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1
|
||||
|
||||
; GFX9-LABEL: name: test_build_vector_s_pack_hh
|
||||
; GFX9: liveins: $sgpr0, $sgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||
; GFX9: [[S_PACK_HH_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_HH_B32_B16 [[COPY]], [[COPY1]]
|
||||
; GFX9: S_ENDPGM 0, implicit [[S_PACK_HH_B32_B16_]]
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
%1:sgpr(s32) = COPY $sgpr1
|
||||
|
||||
%2:sgpr(s32) = G_CONSTANT i32 16
|
||||
%3:sgpr(s32) = G_LSHR %0, %2
|
||||
%4:sgpr(s32) = G_LSHR %1, %2
|
||||
|
||||
%5:sgpr(s16) = G_TRUNC %3
|
||||
%6:sgpr(s16) = G_TRUNC %4
|
||||
|
||||
%7:sgpr(<2 x s16>) = G_BUILD_VECTOR %5, %6
|
||||
S_ENDPGM 0, implicit %7
|
||||
...
|
||||
|
||||
# TODO: Should this use an and instead?
|
||||
---
|
||||
name: test_build_vector_s_v2s16_s_s16_s_0_s16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0
|
||||
|
||||
; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_0_s16
|
||||
; GFX9: liveins: $sgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]]
|
||||
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
|
||||
%1:sgpr(s16) = G_TRUNC %0
|
||||
%2:sgpr(s16) = G_CONSTANT i16 0
|
||||
|
||||
%3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
|
||||
S_ENDPGM 0, implicit %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_build_vector_s_v2s16_s_0_s16_s_s16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0
|
||||
|
||||
; GFX9-LABEL: name: test_build_vector_s_v2s16_s_0_s16_s_s16
|
||||
; GFX9: liveins: $sgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
|
||||
; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]]
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
|
||||
%1:sgpr(s16) = G_CONSTANT i16 0
|
||||
%2:sgpr(s16) = G_TRUNC %0
|
||||
|
||||
%3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
|
||||
S_ENDPGM 0, implicit %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_build_vector_v_v2s16_v_s16_s_undef_s16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
|
||||
; GFX9-LABEL: name: test_build_vector_v_v2s16_v_s16_s_undef_s16
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: S_ENDPGM 0, implicit [[COPY]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
|
||||
%1:vgpr(s16) = G_TRUNC %0
|
||||
%2:sgpr(s16) = G_IMPLICIT_DEF
|
||||
|
||||
%3:vgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
|
||||
S_ENDPGM 0, implicit %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_build_vector_s_v2s16_s_s16_s_undef_s16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0
|
||||
|
||||
; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_undef_s16
|
||||
; GFX9: liveins: $sgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX9: S_ENDPGM 0, implicit [[COPY]]
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
|
||||
%1:sgpr(s16) = G_TRUNC %0
|
||||
%2:sgpr(s16) = G_IMPLICIT_DEF
|
||||
|
||||
%3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
|
||||
S_ENDPGM 0, implicit %3
|
||||
...
|
||||
|
||||
---
|
||||
name: test_build_vector_s_v2s16_s_undef_s16_s_s16
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0
|
||||
|
||||
; GFX9-LABEL: name: test_build_vector_s_v2s16_s_undef_s16_s_s16
|
||||
; GFX9: liveins: $sgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
|
||||
; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]]
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
|
||||
%1:sgpr(s16) = G_IMPLICIT_DEF
|
||||
%2:sgpr(s16) = G_TRUNC %0
|
||||
|
||||
%3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
|
||||
S_ENDPGM 0, implicit %3
|
||||
...
|
Loading…
Reference in New Issue