forked from OSchip/llvm-project
[SLP] Fix vector element size for the store chains
Vector element size could be different for different store chains. This patch prevents wrong computation of maximum number of elements for that case. Differential Revision: https://reviews.llvm.org/D93192
This commit is contained in:
parent
6c8ded0d8c
commit
fac7c7ec3c
|
@ -6076,7 +6076,7 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
|
|||
|
||||
// If a vector register can't hold 1 element, we are done.
|
||||
unsigned MaxVecRegSize = R.getMaxVecRegSize();
|
||||
unsigned EltSize = R.getVectorElementSize(Stores[0]);
|
||||
unsigned EltSize = R.getVectorElementSize(Operands[0]);
|
||||
if (MaxVecRegSize % EltSize != 0)
|
||||
continue;
|
||||
|
||||
|
|
|
@ -23,28 +23,21 @@ define void @foo(i8* %v0, i8* readonly %v1) {
|
|||
; CHECK-NEXT: [[T252:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 9
|
||||
; CHECK-NEXT: [[T292:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 10
|
||||
; CHECK-NEXT: [[T322:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 11
|
||||
; CHECK-NEXT: [[T19:%.*]] = load i32, i32* [[T14]], align 4
|
||||
; CHECK-NEXT: [[T23:%.*]] = load i32, i32* [[T18]], align 4
|
||||
; CHECK-NEXT: [[T27:%.*]] = load i32, i32* [[T22]], align 4
|
||||
; CHECK-NEXT: [[T30:%.*]] = load i32, i32* [[T26]], align 4
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[T142]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[T222]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[T14]] to <4 x i32>*
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[T142]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
|
||||
; CHECK-NEXT: [[T20:%.*]] = add nsw i32 [[T19]], 4
|
||||
; CHECK-NEXT: [[T24:%.*]] = add nsw i32 [[T23]], 4
|
||||
; CHECK-NEXT: [[T28:%.*]] = add nsw i32 [[T27]], 6
|
||||
; CHECK-NEXT: [[T31:%.*]] = add nsw i32 [[T30]], 7
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP2]], <i64 4, i64 4>
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[TMP4]], <i64 6, i64 7>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[T212]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP7]], align 8
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[T292]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP8]], align 8
|
||||
; CHECK-NEXT: store i32 [[T20]], i32* [[T21]], align 4
|
||||
; CHECK-NEXT: store i32 [[T24]], i32* [[T25]], align 4
|
||||
; CHECK-NEXT: store i32 [[T28]], i32* [[T29]], align 4
|
||||
; CHECK-NEXT: store i32 [[T31]], i32* [[T32]], align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[T222]] to <2 x i64>*
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 8
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 4, i32 4, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i64> [[TMP4]], <i64 4, i64 4>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[TMP6]], <i64 6, i64 7>
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[T212]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP8]], <2 x i64>* [[TMP10]], align 8
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[T292]] to <2 x i64>*
|
||||
; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP11]], align 8
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[T21]] to <4 x i32>*
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP12]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%t0 = bitcast i8* %v0 to i32*
|
||||
|
|
Loading…
Reference in New Issue