[VectorCombine] Fix alignment in single element store

This fixes the concern in single element store scalarization that the alignment of new store may be larger than it should be. It calculates the largest alignment if index is constant, and a safe one if not. Reviewed By: lebedev.ri, spatel Differential Revision: https://reviews.llvm.org/D103419
2021-06-11 10:28:15 +08:00 · 2021-06-11 10:28:15 +08:00 · 2670c7dd5b
parent 420bd5ee8e
commit 2670c7dd5b
2 changed files with 13 additions and 5 deletions
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@ -831,8 +831,15 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
    Builder.Insert(GEP);
    StoreInst *NSI = Builder.CreateStore(NewElement, GEP);
    NSI->copyMetadata(*SI);
-    if (SI->getAlign() < NSI->getAlign())
-      NSI->setAlignment(SI->getAlign());
+    Align NewAlignment = std::max(SI->getAlign(), Load->getAlign());
+    if (auto *C = dyn_cast<ConstantInt>(Idx))
+      NewAlignment = commonAlignment(
+          NewAlignment,
+          C->getZExtValue() * DL.getTypeStoreSize(NewElement->getType()));
+    else
+      NewAlignment = commonAlignment(
+          NewAlignment, DL.getTypeStoreSize(NewElement->getType()));
+    NSI->setAlignment(NewAlignment);
    replaceValue(I, *NSI);
    // Need erasing the store manually.
    I.eraseFromParent();
--- a/llvm/test/Transforms/VectorCombine/load-insert-store.ll
+++ b/llvm/test/Transforms/VectorCombine/load-insert-store.ll
@ -20,7 +20,7 @@ define void @insert_store_i16_align1(<8 x i16>* %q, i16 zeroext %s) {
 ; CHECK-LABEL: @insert_store_i16_align1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[Q:%.*]], i32 0, i32 3
-; CHECK-NEXT:    store i16 [[S:%.*]], i16* [[TMP0]], align 1
+; CHECK-NEXT:    store i16 [[S:%.*]], i16* [[TMP0]], align 2
 ; CHECK-NEXT:    ret void
 ;
 entry:
@ -125,6 +125,7 @@ entry:
  ret void
 }

+; To verify align here is narrowed to scalar store size
 define void @insert_store_nonconst_large_alignment(<4 x i32>* %q, i32 zeroext %s, i32 %idx) {
 ; CHECK-LABEL: @insert_store_nonconst_large_alignment(
 ; CHECK-NEXT:  entry:
@ -148,7 +149,7 @@ define void @insert_store_nonconst_align_maximum_8(<8 x i64>* %q, i64 %s, i32 %i
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
-; CHECK-NEXT:    store i64 [[S:%.*]], i64* [[TMP1]], align 4
+; CHECK-NEXT:    store i64 [[S:%.*]], i64* [[TMP1]], align 8
 ; CHECK-NEXT:    ret void
 ;
  %cmp = icmp ult i32 %idx, 2
@ -180,7 +181,7 @@ define void @insert_store_nonconst_align_larger(<8 x i64>* %q, i64 %s, i32 %idx)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 2
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <8 x i64>, <8 x i64>* [[Q:%.*]], i32 0, i32 [[IDX]]
-; CHECK-NEXT:    store i64 [[S:%.*]], i64* [[TMP1]], align 2
+; CHECK-NEXT:    store i64 [[S:%.*]], i64* [[TMP1]], align 4
 ; CHECK-NEXT:    ret void
 ;
  %cmp = icmp ult i32 %idx, 2