[SLPVectorizer][SVE] Bail out early for scalable vector.

Summary:
SLPVectorizer try to vectorize list of scalar instructions of the same type,
instructions already vectorized are rejected through isValidElementType().

Without this patch, tryToVectorizeList() will first try to determine vectorization
factor of a list of Instructions before checking whether each instruction has unsupported
type or not. For instructions already vectorized for SVE, it will crash at getVectorElementSize(),
where it try to return a fixed size.

This patch make sure invalid element types are rejected before trying to get vectorization
factor. This make sure we are not trying to vectorize instructions already vectorized.

Reviewers: sdesmalen, efriedma, spatel, RKSimon, ABataev, apazos, rengolin

Reviewed By: efriedma

Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D76017
This commit is contained in:
Huihui Zhang 2020-03-13 11:23:01 -07:00
parent 842ea709e4
commit fc1f205745
3 changed files with 58 additions and 13 deletions

View File

@ -5899,25 +5899,15 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize a list of length = "
<< VL.size() << ".\n");
// Check that all of the parts are scalar instructions of the same type,
// Check that all of the parts are instructions of the same type,
// we permit an alternate opcode via InstructionsState.
InstructionsState S = getSameOpcode(VL);
if (!S.getOpcode())
return false;
Instruction *I0 = cast<Instruction>(S.OpValue);
unsigned Sz = R.getVectorElementSize(I0);
unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
if (MaxVF < 2) {
R.getORE()->emit([&]() {
return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
<< "Cannot SLP vectorize list: vectorization factor "
<< "less than 2 is not supported";
});
return false;
}
// Make sure invalid types (including vector type) are rejected before
// determining vectorization factor for scalar instructions.
for (Value *V : VL) {
Type *Ty = V->getType();
if (!isValidElementType(Ty)) {
@ -5935,6 +5925,18 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
}
}
unsigned Sz = R.getVectorElementSize(I0);
unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
if (MaxVF < 2) {
R.getORE()->emit([&]() {
return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
<< "Cannot SLP vectorize list: vectorization factor "
<< "less than 2 is not supported";
});
return false;
}
bool Changed = false;
bool CandidateFound = false;
int MinCost = SLPCostThreshold;

View File

@ -0,0 +1,18 @@
; RUN: opt < %s -slp-vectorizer -S -pass-remarks-missed=slp-vectorizer 2>&1 | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
; This test check that slp vectorizer is not trying to vectorize instructions already vectorized.
; CHECK: remark: <unknown>:0:0: Cannot SLP vectorize list: type <16 x i8> is unsupported by vectorizer
define void @vector() {
%load0 = tail call <16 x i8> @vector.load(<16 x i8> *undef, i32 1)
%load1 = tail call <16 x i8> @vector.load(<16 x i8> *undef, i32 2)
%add = add <16 x i8> %load1, %load0
tail call void @vector.store(<16 x i8> %add, <16 x i8>* undef, i32 1)
ret void
}
declare <16 x i8> @vector.load(<16 x i8>*, i32)
declare void @vector.store(<16 x i8>, <16 x i8>*, i32)

View File

@ -0,0 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -S | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
; This test check that we are not crashing or changing the code.
define void @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: [[LOAD0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
; CHECK-NEXT: [[LOAD1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 16 x i8> [[LOAD1]], [[LOAD0]]
; CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[ADD]], <vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef)
; CHECK-NEXT: ret void
;
%load0 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
%load1 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
%add = add <vscale x 16 x i8> %load1, %load0
tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %add, <vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef)
ret void
}
declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>)
declare void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>)