forked from OSchip/llvm-project
[SLP]Fix a crash when insert subvector is out of range.
If the OffsetBeg + InsertVecSz is greater than VecSz, need to estimate the cost as shuffle of 2 vector, not as insert of subvector. Otherwise, the inserted subvector is out of range and compiler may crash. Differential Revision: https://reviews.llvm.org/D128071
This commit is contained in:
parent
ac4cb1775b
commit
f1ee2738b3
|
@ -6134,6 +6134,16 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
||||||
PowerOf2Ceil(OffsetEnd - OffsetBeg + 1),
|
PowerOf2Ceil(OffsetEnd - OffsetBeg + 1),
|
||||||
((OffsetEnd - OffsetBeg + VecScalarsSz) / VecScalarsSz) *
|
((OffsetEnd - OffsetBeg + VecScalarsSz) / VecScalarsSz) *
|
||||||
VecScalarsSz);
|
VecScalarsSz);
|
||||||
|
bool IsWholeSubvector =
|
||||||
|
OffsetBeg == Offset && ((OffsetEnd + 1) % VecScalarsSz == 0);
|
||||||
|
// Check if we can safely insert a subvector. If it is not possible, just
|
||||||
|
// generate a whole-sized vector and shuffle the source vector and the new
|
||||||
|
// subvector.
|
||||||
|
if (OffsetBeg + InsertVecSz > VecSz) {
|
||||||
|
// Align OffsetBeg to generate correct mask.
|
||||||
|
OffsetBeg = alignDown(OffsetBeg, VecSz, Offset);
|
||||||
|
InsertVecSz = VecSz;
|
||||||
|
}
|
||||||
|
|
||||||
APInt DemandedElts = APInt::getZero(NumElts);
|
APInt DemandedElts = APInt::getZero(NumElts);
|
||||||
// TODO: Add support for Instruction::InsertValue.
|
// TODO: Add support for Instruction::InsertValue.
|
||||||
|
@ -6177,7 +6187,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
||||||
// TODO: Implement the analysis of the FirstInsert->getOperand(0)
|
// TODO: Implement the analysis of the FirstInsert->getOperand(0)
|
||||||
// subvector of ActualVecTy.
|
// subvector of ActualVecTy.
|
||||||
if (!isUndefVector(FirstInsert->getOperand(0)) && NumScalars != NumElts &&
|
if (!isUndefVector(FirstInsert->getOperand(0)) && NumScalars != NumElts &&
|
||||||
(Offset != OffsetBeg || (OffsetEnd + 1) % VecScalarsSz != 0)) {
|
!IsWholeSubvector) {
|
||||||
if (InsertVecSz != VecSz) {
|
if (InsertVecSz != VecSz) {
|
||||||
auto *ActualVecTy =
|
auto *ActualVecTy =
|
||||||
FixedVectorType::get(SrcVecTy->getElementType(), VecSz);
|
FixedVectorType::get(SrcVecTy->getElementType(), VecSz);
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||||
|
; RUN: opt -slp-vectorizer -S -mtriple=aarch64 < %s | FileCheck %s
|
||||||
|
|
||||||
|
define void @test(ptr %p) {
|
||||||
|
; CHECK-LABEL: @test(
|
||||||
|
; CHECK-NEXT: entry:
|
||||||
|
; CHECK-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 1
|
||||||
|
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[INC]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[P]], align 2
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> zeroinitializer, i16 [[TMP0]], i32 5
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP1]], i32 7
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
%inc = getelementptr inbounds i16, ptr %p, i64 1
|
||||||
|
%0 = load i16, ptr %inc, align 4
|
||||||
|
%1 = load i16, ptr %p, align 2
|
||||||
|
%2 = insertelement <8 x i16> zeroinitializer, i16 %0, i32 5
|
||||||
|
%3 = insertelement <8 x i16> %2, i16 %1, i32 7
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue