[VectorCombine] Fix alignment in single element store

This fixes the concern in single element store scalarization that the
alignment of new store may be larger than it should be. It calculates
the largest alignment if index is constant, and a safe one if not.

Reviewed By: lebedev.ri, spatel

Differential Revision: https://reviews.llvm.org/D103419
This commit is contained in:
Qiu Chaofan 2021-06-11 10:28:15 +08:00
parent 420bd5ee8e
commit 2670c7dd5b
2 changed files with 13 additions and 5 deletions

View File

@ -831,8 +831,15 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
Builder.Insert(GEP);
StoreInst *NSI = Builder.CreateStore(NewElement, GEP);
NSI->copyMetadata(*SI);
if (SI->getAlign() < NSI->getAlign())
NSI->setAlignment(SI->getAlign());
Align NewAlignment = std::max(SI->getAlign(), Load->getAlign());
if (auto *C = dyn_cast<ConstantInt>(Idx))
NewAlignment = commonAlignment(
NewAlignment,
C->getZExtValue() * DL.getTypeStoreSize(NewElement->getType()));
else
NewAlignment = commonAlignment(
NewAlignment, DL.getTypeStoreSize(NewElement->getType()));
NSI->setAlignment(NewAlignment);
replaceValue(I, *NSI);
// Need erasing the store manually.
I.eraseFromParent();

View File

@ -20,7 +20,7 @@ define void @insert_store_i16_align1(<8 x i16>* %q, i16 zeroext %s) {
; CHECK-LABEL: @insert_store_i16_align1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[Q:%.*]], i32 0, i32 3
; CHECK-NEXT: store i16 [[S:%.*]], i16* [[TMP0]], align 1
; CHECK-NEXT: store i16 [[S:%.*]], i16* [[TMP0]], align 2
; CHECK-NEXT: ret void
;
entry:
@ -125,6 +125,7 @@ entry:
ret void
}
; To verify align here is narrowed to scalar store size
define void @insert_store_nonconst_large_alignment(<4 x i32>* %q, i32 zeroext %s, i32 %idx) {
; CHECK-LABEL: @insert_store_nonconst_large_alignment(
; CHECK-NEXT: entry:
@ -148,7 +149,7 @@ define void @insert_store_nonconst_align_maximum_8(<8 x i64>* %q, i64 %s, i32 %i
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 4
; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 8
; CHECK-NEXT: ret void
;
%cmp = icmp ult i32 %idx, 2
@ -180,7 +181,7 @@ define void @insert_store_nonconst_align_larger(<8 x i64>* %q, i64 %s, i32 %idx)
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 2
; CHECK-NEXT: store i64 [[S:%.*]], i64* [[TMP1]], align 4
; CHECK-NEXT: ret void
;
%cmp = icmp ult i32 %idx, 2