[AArch64][SVE] Convert SRSHL to LSL when the fed from an ABS intrinsic

Differential Revision: https://reviews.llvm.org/D125233
2022-05-06 14:45:56 +00:00 · 2022-05-06 14:45:56 +00:00 · 5f4541fefb
parent 5bbf6ad5b6
commit 5f4541fefb
2 changed files with 188 additions and 0 deletions
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@ -1229,6 +1229,42 @@ static Optional<Instruction *> instCombineMaxMinNM(InstCombiner &IC,
  return None;
 }
 static Optional<Instruction *> instCombineSVESrshl(InstCombiner &IC,
                                                   IntrinsicInst &II) {
  IRBuilder<> Builder(&II);
  Value *Pred = II.getOperand(0);
  Value *Vec = II.getOperand(1);
  Value *Shift = II.getOperand(2);
  // Convert SRSHL into the simpler LSL intrinsic when fed by an ABS intrinsic.
  Value *AbsPred, *MergedValue;
  if (!match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_sqabs>(
                      m_Value(MergedValue), m_Value(AbsPred), m_Value())) &&
      !match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_abs>(
                      m_Value(MergedValue), m_Value(AbsPred), m_Value())))
    return None;
  // Transform is valid if any of the following are true:
  // * The ABS merge value is an undef or non-negative
  // * The ABS predicate is all active
  // * The ABS predicate and the SRSHL predicates are the same
  if (!isa<UndefValue>(MergedValue) &&
      !match(MergedValue, m_NonNegative()) &&
      AbsPred != Pred && !isAllActivePredicate(AbsPred))
    return None;
  // Only valid when the shift amount is non-negative, otherwise the rounding
  // behaviour of SRSHL cannot be ignored.
  if (!match(Shift, m_NonNegative()))
    return None;
  auto LSL = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_lsl, {II.getType()},
                                     {Pred, Vec, Shift});
  return IC.replaceInstUsesWith(II, LSL);
 }
 Optional<Instruction *>
 AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
                                     IntrinsicInst &II) const {
@ -1296,6 +1332,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
    return instCombineSVESDIV(IC, II);
  case Intrinsic::aarch64_sve_sel:
    return instCombineSVESel(IC, II);
  case Intrinsic::aarch64_sve_srshl:
    return instCombineSVESrshl(IC, II);
  }
  return None;
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
@ -0,0 +1,150 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -passes=instcombine < %s | FileCheck %s
 target triple = "aarch64-unknown-linux-gnu"
 define <vscale x 8 x i16> @srshl_abs_undef_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_undef_merge(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 ;
  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
  ret <vscale x 8 x i16> %shr
 }
 define <vscale x 8 x i16> @srshl_abs_zero_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_zero_merge(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 ;
  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
  ret <vscale x 8 x i16> %shr
 }
 define <vscale x 8 x i16> @srshl_abs_positive_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_positive_merge(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 ;
  %absmerge = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %absmerge, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
  ret <vscale x 8 x i16> %shr
 }
 define <vscale x 8 x i16> @srshl_abs_all_active_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_all_active_pred(
 ; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A:%.*]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 ;
  %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
  ret <vscale x 8 x i16> %shr
 }
 define <vscale x 8 x i16> @srshl_abs_same_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: @srshl_abs_same_pred(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 ;
  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
  ret <vscale x 8 x i16> %shr
 }
 define <vscale x 8 x i16> @srshl_sqabs(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_sqabs(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 ;
  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
  ret <vscale x 8 x i16> %shr
 }
 define <vscale x 8 x i16> @srshl_abs_negative_merge(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_negative_merge(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -1, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
 ; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
 ;
  %absmerge = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 -1)
  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %absmerge, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
  ret <vscale x 8 x i16> %shr
 }
 define <vscale x 8 x i16> @srshl_abs_nonconst_merge(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_nonconst_merge(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
 ; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
 ;
  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
  ret <vscale x 8 x i16> %shr
 }
 define <vscale x 8 x i16> @srshl_abs_not_all_active_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_not_all_active_pred(
 ; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 8)
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A:%.*]])
 ; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
 ;
  %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 8)
  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
  ret <vscale x 8 x i16> %shr
 }
 define <vscale x 8 x i16> @srshl_abs_diff_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_diff_pred(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
 ; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
 ;
  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 2)
  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
  ret <vscale x 8 x i16> %shr
 }
 define <vscale x 8 x i16> @srshl_abs_negative_shift(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_negative_shift(
 ; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x i16> [[A:%.*]])
 ; CHECK-NEXT:    [[SHR:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 -2, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[SHR]]
 ;
  %abs = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
  %splat = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 -2)
  %shr = tail call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg2, <vscale x 8 x i16> %abs, <vscale x 8 x i16> %splat)
  ret <vscale x 8 x i16> %shr
 }
 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
 attributes #0 = { "target-features"="+sve,+sve2" }