forked from OSchip/llvm-project
[SLP]Fix a crash when insert subvector is out of range.
If the OffsetBeg + InsertVecSz is greater than VecSz, need to estimate the cost as shuffle of 2 vector, not as insert of subvector. Otherwise, the inserted subvector is out of range and compiler may crash. Differential Revision: https://reviews.llvm.org/D128071
This commit is contained in:
parent
ac4cb1775b
commit
f1ee2738b3
|
@ -6134,6 +6134,16 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
|||
PowerOf2Ceil(OffsetEnd - OffsetBeg + 1),
|
||||
((OffsetEnd - OffsetBeg + VecScalarsSz) / VecScalarsSz) *
|
||||
VecScalarsSz);
|
||||
bool IsWholeSubvector =
|
||||
OffsetBeg == Offset && ((OffsetEnd + 1) % VecScalarsSz == 0);
|
||||
// Check if we can safely insert a subvector. If it is not possible, just
|
||||
// generate a whole-sized vector and shuffle the source vector and the new
|
||||
// subvector.
|
||||
if (OffsetBeg + InsertVecSz > VecSz) {
|
||||
// Align OffsetBeg to generate correct mask.
|
||||
OffsetBeg = alignDown(OffsetBeg, VecSz, Offset);
|
||||
InsertVecSz = VecSz;
|
||||
}
|
||||
|
||||
APInt DemandedElts = APInt::getZero(NumElts);
|
||||
// TODO: Add support for Instruction::InsertValue.
|
||||
|
@ -6177,7 +6187,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
|||
// TODO: Implement the analysis of the FirstInsert->getOperand(0)
|
||||
// subvector of ActualVecTy.
|
||||
if (!isUndefVector(FirstInsert->getOperand(0)) && NumScalars != NumElts &&
|
||||
(Offset != OffsetBeg || (OffsetEnd + 1) % VecScalarsSz != 0)) {
|
||||
!IsWholeSubvector) {
|
||||
if (InsertVecSz != VecSz) {
|
||||
auto *ActualVecTy =
|
||||
FixedVectorType::get(SrcVecTy->getElementType(), VecSz);
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -slp-vectorizer -S -mtriple=aarch64 < %s | FileCheck %s
|
||||
|
||||
define void @test(ptr %p) {
|
||||
; CHECK-LABEL: @test(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[INC]], align 4
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[P]], align 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> zeroinitializer, i16 [[TMP0]], i32 5
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP1]], i32 7
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%inc = getelementptr inbounds i16, ptr %p, i64 1
|
||||
%0 = load i16, ptr %inc, align 4
|
||||
%1 = load i16, ptr %p, align 2
|
||||
%2 = insertelement <8 x i16> zeroinitializer, i16 %0, i32 5
|
||||
%3 = insertelement <8 x i16> %2, i16 %1, i32 7
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue