[SLP] Fix vector element size for the store chains

Vector element size could be different for different store chains.
This patch prevents wrong computation of maximum number of elements
for that case.

Differential Revision: https://reviews.llvm.org/D93192
This commit is contained in:
Anton Afanasyev 2020-12-13 14:42:25 +03:00
parent 6c8ded0d8c
commit fac7c7ec3c
2 changed files with 15 additions and 22 deletions

View File

@ -6076,7 +6076,7 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
// If a vector register can't hold 1 element, we are done.
unsigned MaxVecRegSize = R.getMaxVecRegSize();
unsigned EltSize = R.getVectorElementSize(Stores[0]);
unsigned EltSize = R.getVectorElementSize(Operands[0]);
if (MaxVecRegSize % EltSize != 0)
continue;

View File

@ -23,28 +23,21 @@ define void @foo(i8* %v0, i8* readonly %v1) {
; CHECK-NEXT: [[T252:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 9
; CHECK-NEXT: [[T292:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 10
; CHECK-NEXT: [[T322:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 11
; CHECK-NEXT: [[T19:%.*]] = load i32, i32* [[T14]], align 4
; CHECK-NEXT: [[T23:%.*]] = load i32, i32* [[T18]], align 4
; CHECK-NEXT: [[T27:%.*]] = load i32, i32* [[T22]], align 4
; CHECK-NEXT: [[T30:%.*]] = load i32, i32* [[T26]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[T142]] to <2 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[T222]] to <2 x i64>*
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[T14]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[T142]] to <2 x i64>*
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
; CHECK-NEXT: [[T20:%.*]] = add nsw i32 [[T19]], 4
; CHECK-NEXT: [[T24:%.*]] = add nsw i32 [[T23]], 4
; CHECK-NEXT: [[T28:%.*]] = add nsw i32 [[T27]], 6
; CHECK-NEXT: [[T31:%.*]] = add nsw i32 [[T30]], 7
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP2]], <i64 4, i64 4>
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[TMP4]], <i64 6, i64 7>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[T212]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP7]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[T292]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP8]], align 8
; CHECK-NEXT: store i32 [[T20]], i32* [[T21]], align 4
; CHECK-NEXT: store i32 [[T24]], i32* [[T25]], align 4
; CHECK-NEXT: store i32 [[T28]], i32* [[T29]], align 4
; CHECK-NEXT: store i32 [[T31]], i32* [[T32]], align 4
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[T222]] to <2 x i64>*
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 4, i32 4, i32 6, i32 7>
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i64> [[TMP4]], <i64 4, i64 4>
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[TMP6]], <i64 6, i64 7>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[T212]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP8]], <2 x i64>* [[TMP10]], align 8
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[T292]] to <2 x i64>*
; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP11]], align 8
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[T21]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP12]], align 4
; CHECK-NEXT: ret void
;
%t0 = bitcast i8* %v0 to i32*