forked from OSchip/llvm-project
[COST]Fix crash for non-power-2 vector shuffle mask.
Need to normalizize the mask to avoid possible crashes during attempts to estimate cost of the very long shuffles with non-power-2 number of elements in masks.
This commit is contained in:
parent
a80081763c
commit
371412e065
|
@ -1238,15 +1238,18 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
|
|||
// copy of the previous destination register (the cost is
|
||||
// TTI::TCC_Basic). If the source register is just reused, the cost for
|
||||
// this operation is 0.
|
||||
unsigned NormalizedVF = LT.second.getVectorNumElements() * NumOfSrcs;
|
||||
unsigned E = *NumOfDests.getValue();
|
||||
unsigned NormalizedVF =
|
||||
LegalVT.getVectorNumElements() * std::max(NumOfSrcs, E);
|
||||
unsigned NumOfSrcRegs = NormalizedVF / LegalVT.getVectorNumElements();
|
||||
unsigned NumOfDestRegs = NormalizedVF / LegalVT.getVectorNumElements();
|
||||
SmallVector<int> NormalizedMask(NormalizedVF, UndefMaskElem);
|
||||
copy(Mask, NormalizedMask.begin());
|
||||
unsigned E = *NumOfDests.getValue();
|
||||
unsigned PrevSrcReg = 0;
|
||||
ArrayRef<int> PrevRegMask;
|
||||
InstructionCost Cost = 0;
|
||||
processShuffleMasks(
|
||||
NormalizedMask, NumOfSrcs, E, E, []() {},
|
||||
NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
|
||||
[this, SingleOpTy, &PrevSrcReg, &PrevRegMask,
|
||||
&Cost](ArrayRef<int> RegMask, unsigned SrcReg, unsigned DestReg) {
|
||||
if (!ShuffleVectorInst::isIdentityMask(RegMask)) {
|
||||
|
|
|
@ -1,13 +1,31 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
|
||||
; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s
|
||||
; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE
|
||||
; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s -check-prefixes=AVX
|
||||
|
||||
define void @test() {
|
||||
; CHECK-LABEL: 'test'
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> <i32 0, i32 3, i32 6, i32 1, i32 4, i32 7, i32 2, i32 5, i32 8>
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
; SSE-LABEL: 'test'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> <i32 0, i32 3, i32 6, i32 1, i32 4, i32 7, i32 2, i32 5, i32 8>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
; AVX-LABEL: 'test'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> <i32 0, i32 3, i32 6, i32 1, i32 4, i32 7, i32 2, i32 5, i32 8>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
entry:
|
||||
%matins.2.2 = shufflevector <9 x double> undef, <9 x double> undef, <9 x i32> <i32 0, i32 3, i32 6, i32 1, i32 4, i32 7, i32 2, i32 5, i32 8>
|
||||
ret void
|
||||
}
|
||||
|
||||
define <12 x i64> @foo(<12 x i64> noundef %src) {
|
||||
; SSE-LABEL: 'foo'
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> <i32 0, i32 3, i32 6, i32 9, i32 1, i32 4, i32 7, i32 10, i32 2, i32 5, i32 8, i32 11>
|
||||
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <12 x i64> %shuffle
|
||||
;
|
||||
; AVX-LABEL: 'foo'
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> <i32 0, i32 3, i32 6, i32 9, i32 1, i32 4, i32 7, i32 10, i32 2, i32 5, i32 8, i32 11>
|
||||
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <12 x i64> %shuffle
|
||||
;
|
||||
entry:
|
||||
%shuffle = shufflevector <12 x i64> %src, <12 x i64> poison, <12 x i32> <i32 0, i32 3, i32 6, i32 9, i32 1, i32 4, i32 7, i32 10, i32 2, i32 5, i32 8, i32 11>
|
||||
ret <12 x i64> %shuffle
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue