forked from OSchip/llvm-project
Preserve more MD_mem_parallel_loop_access and MD_access_group in SROA
SROA sometimes preserves MD_mem_parallel_loop_access and MD_access_group metadata on loads/stores, and sometimes fails to do so. This change adds copying of the MD after other CreateAlignedLoad/CreateAlignedStores. Also fix a case where the metadata was being copied from a load, rather than the store. Added a LIT test to catch one case. Patch by Mark Mendell Differential Revision: https://reviews.llvm.org/D103254
This commit is contained in:
parent
462f8f0611
commit
41555eaf65
|
@ -2468,14 +2468,17 @@ private:
|
|||
Pass.DeadInsts.push_back(I);
|
||||
}
|
||||
|
||||
Value *rewriteVectorizedLoadInst() {
|
||||
Value *rewriteVectorizedLoadInst(LoadInst &LI) {
|
||||
unsigned BeginIndex = getIndex(NewBeginOffset);
|
||||
unsigned EndIndex = getIndex(NewEndOffset);
|
||||
assert(EndIndex > BeginIndex && "Empty vector!");
|
||||
|
||||
Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
|
||||
NewAI.getAlign(), "load");
|
||||
return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
|
||||
LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
|
||||
NewAI.getAlign(), "load");
|
||||
|
||||
Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
|
||||
LLVMContext::MD_access_group});
|
||||
return extractVector(IRB, Load, BeginIndex, EndIndex, "vec");
|
||||
}
|
||||
|
||||
Value *rewriteIntegerLoad(LoadInst &LI) {
|
||||
|
@ -2519,7 +2522,7 @@ private:
|
|||
bool IsPtrAdjusted = false;
|
||||
Value *V;
|
||||
if (VecTy) {
|
||||
V = rewriteVectorizedLoadInst();
|
||||
V = rewriteVectorizedLoadInst(LI);
|
||||
} else if (IntTy && LI.getType()->isIntegerTy()) {
|
||||
V = rewriteIntegerLoad(LI);
|
||||
} else if (NewBeginOffset == NewAllocaBeginOffset &&
|
||||
|
@ -2573,6 +2576,8 @@ private:
|
|||
NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
|
||||
if (LI.isVolatile())
|
||||
NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
|
||||
NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
|
||||
LLVMContext::MD_access_group});
|
||||
|
||||
V = NewLI;
|
||||
IsPtrAdjusted = true;
|
||||
|
@ -2632,6 +2637,8 @@ private:
|
|||
V = insertVector(IRB, Old, V, BeginIndex, "vec");
|
||||
}
|
||||
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
|
||||
Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
|
||||
LLVMContext::MD_access_group});
|
||||
if (AATags)
|
||||
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
|
||||
Pass.DeadInsts.push_back(&SI);
|
||||
|
@ -2891,6 +2898,8 @@ private:
|
|||
|
||||
StoreInst *New =
|
||||
IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile());
|
||||
New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
|
||||
LLVMContext::MD_access_group});
|
||||
if (AATags)
|
||||
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
|
||||
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
|
||||
|
@ -3066,6 +3075,8 @@ private:
|
|||
} else {
|
||||
LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
|
||||
II.isVolatile(), "copyload");
|
||||
Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
|
||||
LLVMContext::MD_access_group});
|
||||
if (AATags)
|
||||
Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
|
||||
Src = Load;
|
||||
|
@ -3086,6 +3097,8 @@ private:
|
|||
|
||||
StoreInst *Store = cast<StoreInst>(
|
||||
IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
|
||||
Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
|
||||
LLVMContext::MD_access_group});
|
||||
if (AATags)
|
||||
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
|
||||
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
|
||||
|
@ -4077,7 +4090,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
|
|||
PartPtrTy, StoreBasePtr->getName() + "."),
|
||||
getAdjustedAlignment(SI, PartOffset),
|
||||
/*IsVolatile*/ false);
|
||||
PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
|
||||
PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
|
||||
LLVMContext::MD_access_group});
|
||||
LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
|
||||
}
|
||||
|
@ -4163,6 +4176,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
|
|||
LoadPartPtrTy, LoadBasePtr->getName() + "."),
|
||||
getAdjustedAlignment(LI, PartOffset),
|
||||
/*IsVolatile*/ false, LI->getName());
|
||||
PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
|
||||
LLVMContext::MD_access_group});
|
||||
}
|
||||
|
||||
// And store this partition.
|
||||
|
@ -4175,6 +4190,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
|
|||
StorePartPtrTy, StoreBasePtr->getName() + "."),
|
||||
getAdjustedAlignment(SI, PartOffset),
|
||||
/*IsVolatile*/ false);
|
||||
PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
|
||||
LLVMContext::MD_access_group});
|
||||
|
||||
// Now build a new slice for the alloca.
|
||||
NewSlices.push_back(
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
; RUN: opt < %s -sroa -S | FileCheck %s
|
||||
;
|
||||
; Make sure the llvm.access.group meta-data is preserved
|
||||
; when a load/store is replaced with another load/store by sroa
|
||||
; Ensure this is done for casting too.
|
||||
;
|
||||
; CHECK: entry:
|
||||
; CHECK: load i32, i32* {{.*}}, !llvm.access.group [[DISTINCT:![0-9]*]]
|
||||
; CHECK: load i32, i32* {{.*}}, !llvm.access.group [[DISTINCT]]
|
||||
; CHECK: ret void
|
||||
; CHECK: [[DISTINCT]] = distinct !{}
|
||||
|
||||
%CMPLX = type { float, float }
|
||||
|
||||
define dso_local void @test() {
|
||||
entry:
|
||||
%PART = alloca %CMPLX, align 8
|
||||
%PREV = alloca %CMPLX, align 8
|
||||
%r2 = getelementptr %CMPLX, %CMPLX* %PREV, i32 0, i32 0
|
||||
store float 0.000000e+00, float* %r2, align 4
|
||||
%i2 = getelementptr %CMPLX, %CMPLX* %PREV, i32 0, i32 1
|
||||
store float 0.000000e+00, float* %i2, align 4
|
||||
%dummy = sext i16 0 to i64
|
||||
%T = getelementptr %CMPLX, %CMPLX* %PART, i64 %dummy
|
||||
%X35 = bitcast %CMPLX* %T to i64*
|
||||
%X36 = bitcast %CMPLX* %PREV to i64*
|
||||
%X37 = load i64, i64* %X35, align 8, !llvm.access.group !0
|
||||
store i64 %X37, i64* %X36, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = distinct !{}
|
Loading…
Reference in New Issue