forked from OSchip/llvm-project
Do not eliminate allocas whose alignment exceeds that of the
copied-in constant, as a subsequent user may rely on over alignment. Fixes PR12885. llvm-svn: 157134
This commit is contained in:
parent
66b0c93553
commit
97b1076435
|
@ -29,6 +29,7 @@
|
|||
#include "llvm/IntrinsicInst.h"
|
||||
#include "llvm/LLVMContext.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Operator.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Analysis/DebugInfo.h"
|
||||
#include "llvm/Analysis/DIBuilder.h"
|
||||
|
@ -1346,6 +1347,25 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
|
|||
return false;
|
||||
}
|
||||
|
||||
/// getPointeeAlignment - Compute the minimum alignment of the value pointed
|
||||
/// to by the given pointer.
|
||||
static unsigned getPointeeAlignment(Value *V, const TargetData &TD) {
|
||||
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
|
||||
if (CE->getOpcode() == Instruction::BitCast ||
|
||||
(CE->getOpcode() == Instruction::GetElementPtr &&
|
||||
cast<GEPOperator>(CE)->hasAllZeroIndices()))
|
||||
return getPointeeAlignment(CE->getOperand(0), TD);
|
||||
|
||||
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
|
||||
if (!GV->isDeclaration())
|
||||
return TD.getPreferredAlignment(GV);
|
||||
|
||||
if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
|
||||
return TD.getABITypeAlignment(PT->getElementType());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// performScalarRepl - This algorithm is a simple worklist driven algorithm,
|
||||
// which runs on all of the alloca instructions in the function, removing them
|
||||
|
@ -1379,23 +1399,26 @@ bool SROA::performScalarRepl(Function &F) {
|
|||
continue;
|
||||
|
||||
// Check to see if this allocation is only modified by a memcpy/memmove from
|
||||
// a constant global. If this is the case, we can change all users to use
|
||||
// a constant global whose alignment is equal to or exceeds that of the
|
||||
// allocation. If this is the case, we can change all users to use
|
||||
// the constant global instead. This is commonly produced by the CFE by
|
||||
// constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
|
||||
// is only subsequently read.
|
||||
SmallVector<Instruction *, 4> ToDelete;
|
||||
if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) {
|
||||
DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
|
||||
DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
|
||||
for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
|
||||
ToDelete[i]->eraseFromParent();
|
||||
Constant *TheSrc = cast<Constant>(Copy->getSource());
|
||||
AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
|
||||
Copy->eraseFromParent(); // Don't mutate the global.
|
||||
AI->eraseFromParent();
|
||||
++NumGlobals;
|
||||
Changed = true;
|
||||
continue;
|
||||
if (AI->getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
|
||||
DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
|
||||
DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
|
||||
for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
|
||||
ToDelete[i]->eraseFromParent();
|
||||
Constant *TheSrc = cast<Constant>(Copy->getSource());
|
||||
AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
|
||||
Copy->eraseFromParent(); // Don't mutate the global.
|
||||
AI->eraseFromParent();
|
||||
++NumGlobals;
|
||||
Changed = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Check to see if we can perform the core SROA transformation. We cannot
|
||||
|
|
|
@ -45,8 +45,10 @@ declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
|
|||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
|
||||
|
||||
%T = type { i8, [123 x i8] }
|
||||
%U = type { i32, i32, i32, i32, i32 }
|
||||
|
||||
@G = constant %T {i8 1, [123 x i8] zeroinitializer }
|
||||
@H = constant [2 x %U] zeroinitializer, align 16
|
||||
|
||||
define void @test2() {
|
||||
%A = alloca %T
|
||||
|
@ -108,3 +110,37 @@ define void @test5() {
|
|||
|
||||
|
||||
declare void @baz(i8* byval)
|
||||
|
||||
|
||||
define void @test6() {
|
||||
%A = alloca %U, align 16
|
||||
%a = bitcast %U* %A to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([2 x %U]* @H to i8*), i64 20, i32 16, i1 false)
|
||||
call void @bar(i8* %a) readonly
|
||||
; CHECK: @test6
|
||||
; CHECK-NEXT: %a = bitcast
|
||||
; CHECK-NEXT: call void @bar(i8* %a)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test7() {
|
||||
%A = alloca %U, align 16
|
||||
%a = bitcast %U* %A to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 0) to i8*), i64 20, i32 4, i1 false)
|
||||
call void @bar(i8* %a) readonly
|
||||
; CHECK: @test7
|
||||
; CHECK-NEXT: %a = bitcast
|
||||
; CHECK-NEXT: call void @bar(i8* %a)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test8() {
|
||||
%A = alloca %U, align 16
|
||||
%a = bitcast %U* %A to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 1) to i8*), i64 20, i32 4, i1 false)
|
||||
call void @bar(i8* %a) readonly
|
||||
; CHECK: @test8
|
||||
; CHECK: llvm.memcpy
|
||||
; CHECK: bar
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue