[SLPVectorizer][SVE] Bail out early for scalable vector.

Summary: SLPVectorizer try to vectorize list of scalar instructions of the same type, instructions already vectorized are rejected through isValidElementType(). Without this patch, tryToVectorizeList() will first try to determine vectorization factor of a list of Instructions before checking whether each instruction has unsupported type or not. For instructions already vectorized for SVE, it will crash at getVectorElementSize(), where it try to return a fixed size. This patch make sure invalid element types are rejected before trying to get vectorization factor. This make sure we are not trying to vectorize instructions already vectorized. Reviewers: sdesmalen, efriedma, spatel, RKSimon, ABataev, apazos, rengolin Reviewed By: efriedma Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D76017
2020-03-13 11:23:01 -07:00 · 2020-03-13 11:23:01 -07:00 · fc1f205745
parent 842ea709e4
commit fc1f205745
3 changed files with 58 additions and 13 deletions
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@ -5899,25 +5899,15 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
  LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize a list of length = "
                    << VL.size() << ".\n");

-  // Check that all of the parts are scalar instructions of the same type,
+  // Check that all of the parts are instructions of the same type,
  // we permit an alternate opcode via InstructionsState.
  InstructionsState S = getSameOpcode(VL);
  if (!S.getOpcode())
    return false;

  Instruction *I0 = cast<Instruction>(S.OpValue);
-  unsigned Sz = R.getVectorElementSize(I0);
-  unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
-  unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
-  if (MaxVF < 2) {
-    R.getORE()->emit([&]() {
-      return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
-             << "Cannot SLP vectorize list: vectorization factor "
-             << "less than 2 is not supported";
-    });
-    return false;
-  }
-
+  // Make sure invalid types (including vector type) are rejected before
+  // determining vectorization factor for scalar instructions.
  for (Value *V : VL) {
    Type *Ty = V->getType();
    if (!isValidElementType(Ty)) {
@ -5935,6 +5925,18 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
    }
  }

+  unsigned Sz = R.getVectorElementSize(I0);
+  unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
+  unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
+  if (MaxVF < 2) {
+    R.getORE()->emit([&]() {
+      return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
+             << "Cannot SLP vectorize list: vectorization factor "
+             << "less than 2 is not supported";
+    });
+    return false;
+  }
+
  bool Changed = false;
  bool CandidateFound = false;
  int MinCost = SLPCostThreshold;
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll
@ -0,0 +1,18 @@
+; RUN: opt < %s -slp-vectorizer -S -pass-remarks-missed=slp-vectorizer 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+; This test check that slp vectorizer is not trying to vectorize instructions already vectorized.
+; CHECK: remark: <unknown>:0:0: Cannot SLP vectorize list: type <16 x i8> is unsupported by vectorizer
+
+define void @vector() {
+  %load0 = tail call <16 x i8> @vector.load(<16 x i8> *undef, i32 1)
+  %load1 = tail call <16 x i8> @vector.load(<16 x i8> *undef, i32 2)
+  %add = add <16 x i8> %load1, %load0
+  tail call void @vector.store(<16 x i8> %add, <16 x i8>* undef, i32 1)
+  ret void
+}
+
+declare <16 x i8> @vector.load(<16 x i8>*, i32)
+declare void @vector.store(<16 x i8>, <16 x i8>*, i32)
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -slp-vectorizer -S | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+; This test check that we are not crashing or changing the code.
+
+define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[LOAD0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT:    [[LOAD1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 16 x i8> [[LOAD1]], [[LOAD0]]
+; CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[ADD]], <vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef)
+; CHECK-NEXT:    ret void
+;
+  %load0 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+  %load1 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+  %add = add <vscale x 16 x i8> %load1, %load0
+  tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %add, <vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef)
+  ret void
+}
+
+declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>)