[GlobalOpt] recompute alignments for loads and stores of updated globals

GlobalOpt can slice structs/arrays and change GEPs in the process,
but it was not updating alignments for load/store users. This
eventually causes the crashing seen in:
https://llvm.org/PR49661
https://llvm.org/PR50253

On x86, this required SLP+codegen to create an aligned vector
store on an invalid address. The bugs would be easier to
demonstrate on a target with stricter alignment requirements.

I'm not sure if this is a complete solution. The alignment
updating code is adapted from InstCombine, so I assume that
part is tested and good.

Differential Revision: https://reviews.llvm.org/D102552
This commit is contained in:
Sanjay Patel 2021-05-20 11:32:03 -04:00
parent ee4055cf23
commit f34311c402
3 changed files with 36 additions and 14 deletions

View File

@ -630,6 +630,23 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
} }
GEP->replaceAllUsesWith(NewPtr); GEP->replaceAllUsesWith(NewPtr);
// We changed the pointer of any memory access user. Recalculate alignments.
for (User *U : NewPtr->users()) {
if (auto *Load = dyn_cast<LoadInst>(U)) {
Align PrefAlign = DL.getPrefTypeAlign(Load->getType());
Align NewAlign = getOrEnforceKnownAlignment(Load->getPointerOperand(),
PrefAlign, DL, Load);
Load->setAlignment(NewAlign);
}
if (auto *Store = dyn_cast<StoreInst>(U)) {
Align PrefAlign =
DL.getPrefTypeAlign(Store->getValueOperand()->getType());
Align NewAlign = getOrEnforceKnownAlignment(Store->getPointerOperand(),
PrefAlign, DL, Store);
Store->setAlignment(NewAlign);
}
}
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP)) if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
GEPI->eraseFromParent(); GEPI->eraseFromParent();
else else

View File

@ -17,7 +17,9 @@ target datalayout = "p:16:32:128"
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__I_a, i8* null }] @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__I_a, i8* null }]
@llvm.used = appending global [2 x i8*] [i8* getelementptr inbounds ([7 x i8], [7 x i8]* @"\01L_OBJC_METH_VAR_NAME_40", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_41" to i8*)] @llvm.used = appending global [2 x i8*] [i8* getelementptr inbounds ([7 x i8], [7 x i8]* @"\01L_OBJC_METH_VAR_NAME_40", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_41" to i8*)]
; CHECK: @[[_ZL14BUTTONINITDATA_0_0:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr global i8* null, align 4 ; Choose the preferred alignment.
; CHECK: @[[_ZL14BUTTONINITDATA_0_0:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr global i8* null, align 16
;. ;.
define internal void @__cxx_global_var_init() section "__TEXT,__StaticInit,regular,pure_instructions" { define internal void @__cxx_global_var_init() section "__TEXT,__StaticInit,regular,pure_instructions" {
%1 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_41", !invariant.load !2009 %1 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_41", !invariant.load !2009
@ -32,9 +34,11 @@ define internal void @_GLOBAL__I_a() section "__TEXT,__StaticInit,regular,pure_i
declare void @test(i8*) declare void @test(i8*)
; The preferred alignment is available.
define void @print() { define void @print() {
; CHECK-LABEL: @print( ; CHECK-LABEL: @print(
; CHECK-NEXT: [[TMP1:%.*]] = load i8*, i8** @_ZL14buttonInitData.0.0, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load i8*, i8** @_ZL14buttonInitData.0.0, align 16
; CHECK-NEXT: call void @test(i8* [[TMP1]]) ; CHECK-NEXT: call void @test(i8* [[TMP1]])
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
; ;

View File

@ -5,12 +5,13 @@ target datalayout = "p:16:32:64" ; 16-bit pointers with 32-bit ABI alignment and
@a = internal externally_initialized global [3 x [7 x i32*]] zeroinitializer, align 16 @a = internal externally_initialized global [3 x [7 x i32*]] zeroinitializer, align 16
; FIXME:
; PR50253 ; PR50253
; The store alignments are correct initially, but they should be updated ; The alignments are correct initially, but they should be updated
; after transforming the global. The global pointer retains its original ; after transforming the global. The stored global pointer array retains
; "align 16", so access to element N into the new array should be offset ; its original "align 16", so access to element N into the new array
; by the ABI alignment of N pointers. ; should be offset by the ABI alignment of N pointers.
; Loaded globals are split into individual pointers and use the
; preferred alignment from the datalayout.
;. ;.
; CHECK: @[[A_1:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global [7 x i32*] zeroinitializer, align 16 ; CHECK: @[[A_1:[a-zA-Z0-9_$"\\.-]+]] = internal unnamed_addr externally_initialized global [7 x i32*] zeroinitializer, align 16
@ -22,7 +23,7 @@ target datalayout = "p:16:32:64" ; 16-bit pointers with 32-bit ABI alignment and
define i32* @reduce_align_0() { define i32* @reduce_align_0() {
; CHECK-LABEL: @reduce_align_0( ; CHECK-LABEL: @reduce_align_0(
; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.2.0, align 8 ; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.2.0, align 8
; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32*], [7 x i32*]* @a.1, i32 0, i64 0), align 4 ; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32*], [7 x i32*]* @a.1, i32 0, i64 0), align 16
; CHECK-NEXT: ret i32* [[X]] ; CHECK-NEXT: ret i32* [[X]]
; ;
%x = load i32*, i32** getelementptr inbounds ([3 x [7 x i32*]], [3 x [7 x i32*]]* @a, i64 0, i64 2, i64 0), align 8 %x = load i32*, i32** getelementptr inbounds ([3 x [7 x i32*]], [3 x [7 x i32*]]* @a, i64 0, i64 2, i64 0), align 8
@ -32,8 +33,8 @@ define i32* @reduce_align_0() {
define i32* @reduce_align_1() { define i32* @reduce_align_1() {
; CHECK-LABEL: @reduce_align_1( ; CHECK-LABEL: @reduce_align_1(
; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.2.1, align 4 ; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.2.1, align 8
; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32*], [7 x i32*]* @a.1, i32 0, i64 1), align 16 ; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32*], [7 x i32*]* @a.1, i32 0, i64 1), align 4
; CHECK-NEXT: ret i32* [[X]] ; CHECK-NEXT: ret i32* [[X]]
; ;
%x = load i32*, i32** getelementptr inbounds ([3 x [7 x i32*]], [3 x [7 x i32*]]* @a, i64 0, i64 2, i64 1), align 4 %x = load i32*, i32** getelementptr inbounds ([3 x [7 x i32*]], [3 x [7 x i32*]]* @a, i64 0, i64 2, i64 1), align 4
@ -43,8 +44,8 @@ define i32* @reduce_align_1() {
define i32* @reduce_align_2() { define i32* @reduce_align_2() {
; CHECK-LABEL: @reduce_align_2( ; CHECK-LABEL: @reduce_align_2(
; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.2.2, align 16 ; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.2.2, align 8
; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32*], [7 x i32*]* @a.1, i32 0, i64 2), align 4 ; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32*], [7 x i32*]* @a.1, i32 0, i64 2), align 8
; CHECK-NEXT: ret i32* [[X]] ; CHECK-NEXT: ret i32* [[X]]
; ;
%x = load i32*, i32** getelementptr inbounds ([3 x [7 x i32*]], [3 x [7 x i32*]]* @a, i64 0, i64 2, i64 2), align 16 %x = load i32*, i32** getelementptr inbounds ([3 x [7 x i32*]], [3 x [7 x i32*]]* @a, i64 0, i64 2, i64 2), align 16
@ -54,8 +55,8 @@ define i32* @reduce_align_2() {
define i32* @reduce_align_3() { define i32* @reduce_align_3() {
; CHECK-LABEL: @reduce_align_3( ; CHECK-LABEL: @reduce_align_3(
; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.2.3, align 4 ; CHECK-NEXT: [[X:%.*]] = load i32*, i32** @a.2.3, align 8
; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32*], [7 x i32*]* @a.1, i32 0, i64 3), align 8 ; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([7 x i32*], [7 x i32*]* @a.1, i32 0, i64 3), align 4
; CHECK-NEXT: ret i32* [[X]] ; CHECK-NEXT: ret i32* [[X]]
; ;
%x = load i32*, i32** getelementptr inbounds ([3 x [7 x i32*]], [3 x [7 x i32*]]* @a, i64 0, i64 2, i64 3), align 4 %x = load i32*, i32** getelementptr inbounds ([3 x [7 x i32*]], [3 x [7 x i32*]]* @a, i64 0, i64 2, i64 3), align 4