[SLP]Fix a crash when insert subvector is out of range.

If the OffsetBeg + InsertVecSz is greater than VecSz, need to estimate
the cost as shuffle of 2 vector, not as insert of subvector. Otherwise,
the inserted subvector is out of range and compiler may crash.

Differential Revision: https://reviews.llvm.org/D128071
This commit is contained in:
Alexey Bataev 2022-06-17 10:23:12 -07:00
parent ac4cb1775b
commit f1ee2738b3
2 changed files with 32 additions and 1 deletions

View File

@ -6134,6 +6134,16 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
PowerOf2Ceil(OffsetEnd - OffsetBeg + 1),
((OffsetEnd - OffsetBeg + VecScalarsSz) / VecScalarsSz) *
VecScalarsSz);
bool IsWholeSubvector =
OffsetBeg == Offset && ((OffsetEnd + 1) % VecScalarsSz == 0);
// Check if we can safely insert a subvector. If it is not possible, just
// generate a whole-sized vector and shuffle the source vector and the new
// subvector.
if (OffsetBeg + InsertVecSz > VecSz) {
// Align OffsetBeg to generate correct mask.
OffsetBeg = alignDown(OffsetBeg, VecSz, Offset);
InsertVecSz = VecSz;
}
APInt DemandedElts = APInt::getZero(NumElts);
// TODO: Add support for Instruction::InsertValue.
@ -6177,7 +6187,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// TODO: Implement the analysis of the FirstInsert->getOperand(0)
// subvector of ActualVecTy.
if (!isUndefVector(FirstInsert->getOperand(0)) && NumScalars != NumElts &&
(Offset != OffsetBeg || (OffsetEnd + 1) % VecScalarsSz != 0)) {
!IsWholeSubvector) {
if (InsertVecSz != VecSz) {
auto *ActualVecTy =
FixedVectorType::get(SrcVecTy->getElementType(), VecSz);

View File

@ -0,0 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer -S -mtriple=aarch64 < %s | FileCheck %s
define void @test(ptr %p) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[INC]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[P]], align 2
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> zeroinitializer, i16 [[TMP0]], i32 5
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP1]], i32 7
; CHECK-NEXT: ret void
;
entry:
%inc = getelementptr inbounds i16, ptr %p, i64 1
%0 = load i16, ptr %inc, align 4
%1 = load i16, ptr %p, align 2
%2 = insertelement <8 x i16> zeroinitializer, i16 %0, i32 5
%3 = insertelement <8 x i16> %2, i16 %1, i32 7
ret void
}