[SVE] Add ISEL patterns for predicated shifts by an immediate.

For scalable vector shifts the prediacte is typically all active,
which gets selected to an unpredicated shift by immediate.  When
code generating for fixed length vectors the predicate is based
on the vector length and so additional patterns are required to
make use of SVE's predicated shift by immediate instructions.

Differential Revision: https://reviews.llvm.org/D86204
This commit is contained in:
Paul Walker 2020-08-14 18:28:38 +01:00
parent 8206257cb8
commit 0015b8db8e
3 changed files with 32 additions and 24 deletions

View File

@ -1353,6 +1353,10 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl", "LSL_ZPZI", int_aarch64_sve_lsl>;
defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
defm LSL_ZPZI : sve_int_shift_pred_bhsd<AArch64lsl_p, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;

View File

@ -7893,3 +7893,19 @@ multiclass sve_int_bin_pred_sd<SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
// Predicated pseudo integer two operand instructions. Second operand is an
// immediate specified by imm_[bhsd].
multiclass sve_int_shift_pred_bhsd<SDPatternOperator op,
ComplexPattern imm_b, ComplexPattern imm_h,
ComplexPattern imm_s, ComplexPattern imm_d> {
def _UNDEF_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, Operand<i32>, FalseLanesUndef>;
def _UNDEF_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, Operand<i32>, FalseLanesUndef>;
def _UNDEF_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, Operand<i32>, FalseLanesUndef>;
def _UNDEF_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, Operand<i32>, FalseLanesUndef>;
def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, imm_b, !cast<Instruction>(NAME # _UNDEF_B)>;
def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1, i32, imm_h, !cast<Instruction>(NAME # _UNDEF_H)>;
def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1, i32, imm_s, !cast<Instruction>(NAME # _UNDEF_S)>;
def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1, i64, imm_d, !cast<Instruction>(NAME # _UNDEF_D)>;
}

View File

@ -156,8 +156,7 @@ define void @ashr_v64i8(<64 x i8>* %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: mov z1.b, #7 // =0x7
; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <64 x i8>, <64 x i8>* %a
@ -173,8 +172,7 @@ define void @ashr_v32i16(<32 x i16>* %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: mov z1.h, #15 // =0xf
; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i16>, <32 x i16>* %a
@ -190,8 +188,7 @@ define void @ashr_v16i32(<16 x i32>* %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: mov z1.s, #31 // =0x1f
; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i32>, <16 x i32>* %a
@ -207,8 +204,7 @@ define void @ashr_v8i64(<8 x i64>* %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: mov z1.d, #63 // =0x3f
; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i64>, <8 x i64>* %a
@ -304,8 +300,7 @@ define void @lshr_v64i8(<64 x i8>* %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: mov z1.b, #7 // =0x7
; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #7
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <64 x i8>, <64 x i8>* %a
@ -321,8 +316,7 @@ define void @lshr_v32i16(<32 x i16>* %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: mov z1.h, #15 // =0xf
; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #15
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i16>, <32 x i16>* %a
@ -338,8 +332,7 @@ define void @lshr_v16i32(<16 x i32>* %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: mov z1.s, #31 // =0x1f
; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #31
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i32>, <16 x i32>* %a
@ -355,8 +348,7 @@ define void @lshr_v8i64(<8 x i64>* %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: mov z1.d, #63 // =0x3f
; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #63
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i64>, <8 x i64>* %a
@ -516,8 +508,7 @@ define void @shl_v64i8(<64 x i8>* %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b, vl64
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: mov z1.b, #7 // =0x7
; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <64 x i8>, <64 x i8>* %a
@ -533,8 +524,7 @@ define void @shl_v32i16(<32 x i16>* %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl32
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: mov z1.h, #15 // =0xf
; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i16>, <32 x i16>* %a
@ -550,8 +540,7 @@ define void @shl_v16i32(<16 x i32>* %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl16
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: mov z1.s, #31 // =0x1f
; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i32>, <16 x i32>* %a
@ -567,8 +556,7 @@ define void @shl_v8i64(<8 x i64>* %a) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl8
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: mov z1.d, #63 // =0x3f
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i64>, <8 x i64>* %a