forked from OSchip/llvm-project
[GlobalMerge] Take into account minsize on Global users' parents.
Now that we can look at users, we can trivially do this: when we would have otherwise disabled GlobalMerge (currently -O<3), we can just run it for minsize functions, as it's usually a codesize win. Differential Revision: http://reviews.llvm.org/D10054 llvm-svn: 239087
This commit is contained in:
parent
a1c4da99f6
commit
8207641251
|
@ -152,7 +152,14 @@ Pass *createLoopInterchangePass();
|
|||
//
|
||||
Pass *createLoopStrengthReducePass();
|
||||
|
||||
Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset);
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// GlobalMerge - This pass merges internal (by default) globals into structs
|
||||
// to enable reuse of a base pointer by indexed addressing modes.
|
||||
// It can also be configured to focus on size optimizations only.
|
||||
//
|
||||
Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset,
|
||||
bool OnlyOptimizeForSize = false);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
|
|
@ -124,6 +124,12 @@ namespace {
|
|||
// for more information.
|
||||
unsigned MaxOffset;
|
||||
|
||||
/// Whether we should try to optimize for size only.
|
||||
/// Currently, this applies a dead simple heuristic: only consider globals
|
||||
/// used in minsize functions for merging.
|
||||
/// FIXME: This could learn about optsize, and be used in the cost model.
|
||||
bool OnlyOptimizeForSize;
|
||||
|
||||
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
|
||||
Module &M, bool isConst, unsigned AddrSpace) const;
|
||||
/// \brief Merge everything in \p Globals for which the corresponding bit
|
||||
|
@ -152,9 +158,10 @@ namespace {
|
|||
public:
|
||||
static char ID; // Pass identification, replacement for typeid.
|
||||
explicit GlobalMerge(const TargetMachine *TM = nullptr,
|
||||
unsigned MaximalOffset = 0)
|
||||
unsigned MaximalOffset = 0,
|
||||
bool OnlyOptimizeForSize = false)
|
||||
: FunctionPass(ID), TM(TM), DL(TM->getDataLayout()),
|
||||
MaxOffset(MaximalOffset) {
|
||||
MaxOffset(MaximalOffset), OnlyOptimizeForSize(OnlyOptimizeForSize) {
|
||||
initializeGlobalMergePass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
|
@ -290,6 +297,12 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
|
|||
continue;
|
||||
|
||||
Function *ParentFn = I->getParent()->getParent();
|
||||
|
||||
// If we're only optimizing for size, ignore non-minsize functions.
|
||||
if (OnlyOptimizeForSize &&
|
||||
!ParentFn->hasFnAttribute(Attribute::MinSize))
|
||||
continue;
|
||||
|
||||
size_t UGSIdx = GlobalUsesByFunction[ParentFn];
|
||||
|
||||
// If this is the first global the basic block uses, map it to the set
|
||||
|
@ -585,6 +598,7 @@ bool GlobalMerge::doFinalization(Module &M) {
|
|||
return false;
|
||||
}
|
||||
|
||||
Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset) {
|
||||
return new GlobalMerge(TM, Offset);
|
||||
Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset,
|
||||
bool OnlyOptimizeForSize) {
|
||||
return new GlobalMerge(TM, Offset, OnlyOptimizeForSize);
|
||||
}
|
||||
|
|
|
@ -250,10 +250,14 @@ bool AArch64PassConfig::addPreISel() {
|
|||
// FIXME: On AArch64, this depends on the type.
|
||||
// Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
|
||||
// and the offset has to be a multiple of the related size in bytes.
|
||||
if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
|
||||
if ((TM->getOptLevel() != CodeGenOpt::None &&
|
||||
EnableGlobalMerge == cl::BOU_UNSET) ||
|
||||
EnableGlobalMerge == cl::BOU_TRUE)
|
||||
addPass(createGlobalMergePass(TM, 4095));
|
||||
EnableGlobalMerge == cl::BOU_TRUE) {
|
||||
bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
|
||||
(EnableGlobalMerge == cl::BOU_UNSET);
|
||||
addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize));
|
||||
}
|
||||
|
||||
if (TM->getOptLevel() != CodeGenOpt::None)
|
||||
addPass(createAArch64AddressTypePromotionPass());
|
||||
|
||||
|
|
|
@ -339,15 +339,18 @@ void ARMPassConfig::addIRPasses() {
|
|||
}
|
||||
|
||||
bool ARMPassConfig::addPreISel() {
|
||||
if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
|
||||
if ((TM->getOptLevel() != CodeGenOpt::None &&
|
||||
EnableGlobalMerge == cl::BOU_UNSET) ||
|
||||
EnableGlobalMerge == cl::BOU_TRUE)
|
||||
EnableGlobalMerge == cl::BOU_TRUE) {
|
||||
// FIXME: This is using the thumb1 only constant value for
|
||||
// maximal global offset for merging globals. We may want
|
||||
// to look into using the old value for non-thumb1 code of
|
||||
// 4095 based on the TargetMachine, but this starts to become
|
||||
// tricky when doing code gen per function.
|
||||
addPass(createGlobalMergePass(TM, 127));
|
||||
bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
|
||||
(EnableGlobalMerge == cl::BOU_UNSET);
|
||||
addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
; RUN: llc -mtriple=aarch64-apple-ios -asm-verbose=false -aarch64-collect-loh=false \
|
||||
; RUN: -O1 -global-merge-group-by-use -global-merge-ignore-single-use \
|
||||
; RUN: %s -o - | FileCheck %s
|
||||
|
||||
; Check that, at -O1, we only merge globals used in minsize functions.
|
||||
; We assume that globals of the same size aren't reordered inside a set.
|
||||
; We use -global-merge-ignore-single-use, and thus only expect one merged set.
|
||||
|
||||
@m1 = internal global i32 0, align 4
|
||||
@n1 = internal global i32 0, align 4
|
||||
|
||||
; CHECK-LABEL: f1:
|
||||
define void @f1(i32 %a1, i32 %a2) minsize nounwind {
|
||||
; CHECK-NEXT: adrp x8, [[SET:__MergedGlobals]]@PAGE
|
||||
; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
|
||||
; CHECK-NEXT: stp w0, w1, [x8]
|
||||
; CHECK-NEXT: ret
|
||||
store i32 %a1, i32* @m1, align 4
|
||||
store i32 %a2, i32* @n1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
@m2 = internal global i32 0, align 4
|
||||
@n2 = internal global i32 0, align 4
|
||||
|
||||
; CHECK-LABEL: f2:
|
||||
define void @f2(i32 %a1, i32 %a2) nounwind {
|
||||
; CHECK-NEXT: adrp x8, _m2@PAGE
|
||||
; CHECK-NEXT: adrp x9, _n2@PAGE
|
||||
; CHECK-NEXT: str w0, [x8, _m2@PAGEOFF]
|
||||
; CHECK-NEXT: str w1, [x9, _n2@PAGEOFF]
|
||||
; CHECK-NEXT: ret
|
||||
store i32 %a1, i32* @m2, align 4
|
||||
store i32 %a2, i32* @n2, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; If we have use sets partially overlapping between a minsize and a non-minsize
|
||||
; function, explicitly check that we only consider the globals used in the
|
||||
; minsize function for merging.
|
||||
|
||||
@m3 = internal global i32 0, align 4
|
||||
@n3 = internal global i32 0, align 4
|
||||
|
||||
; CHECK-LABEL: f3:
|
||||
define void @f3(i32 %a1, i32 %a2) minsize nounwind {
|
||||
; CHECK-NEXT: adrp x8, [[SET]]@PAGE
|
||||
; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
|
||||
; CHECK-NEXT: stp w0, w1, [x8, #8]
|
||||
; CHECK-NEXT: ret
|
||||
store i32 %a1, i32* @m3, align 4
|
||||
store i32 %a2, i32* @n3, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
@n4 = internal global i32 0, align 4
|
||||
|
||||
; CHECK-LABEL: f4:
|
||||
define void @f4(i32 %a1, i32 %a2) nounwind {
|
||||
; CHECK-NEXT: adrp x8, [[SET]]@PAGE
|
||||
; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
|
||||
; CHECK-NEXT: adrp x9, _n4@PAGE
|
||||
; CHECK-NEXT: str w0, [x8, #8]
|
||||
; CHECK-NEXT: str w1, [x9, _n4@PAGEOFF]
|
||||
; CHECK-NEXT: ret
|
||||
store i32 %a1, i32* @m3, align 4
|
||||
store i32 %a2, i32* @n4, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-DAG: .zerofill __DATA,__bss,[[SET]],16,3
|
||||
; CHECK-DAG: .zerofill __DATA,__bss,_m2,4,2
|
||||
; CHECK-DAG: .zerofill __DATA,__bss,_n2,4,2
|
||||
; CHECK-DAG: .zerofill __DATA,__bss,_n4,4,2
|
Loading…
Reference in New Issue