diff --git a/bolt/lib/Passes/TailDuplication.cpp b/bolt/lib/Passes/TailDuplication.cpp index 75b4f3d32dba..8dbe390e6052 100644 --- a/bolt/lib/Passes/TailDuplication.cpp +++ b/bolt/lib/Passes/TailDuplication.cpp @@ -12,6 +12,8 @@ #include +#define DEBUG_TYPE "taildup" + using namespace llvm; namespace opts { @@ -37,6 +39,11 @@ static cl::opt TailDuplicationMaximumDuplication( cl::desc("maximum size of duplicated blocks (in bytes)"), cl::ZeroOrMore, cl::ReallyHidden, cl::init(64), cl::cat(BoltOptCategory)); +static cl::opt TailDuplicationConstCopyPropagation( + "tail-duplication-const-copy-propagation", + cl::desc("enable const and copy propagation after tail duplication"), + cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory)); + } // namespace opts namespace llvm { @@ -254,9 +261,14 @@ TailDuplication::aggressiveCodeToDuplicate(BinaryBasicBlock &BB) const { std::vector BlocksToDuplicate; BinaryBasicBlock *CurrBB = &BB; while (CurrBB) { + LLVM_DEBUG(dbgs() << "Aggressive tail duplication: adding " + << CurrBB->getName() << " to duplication list\n";); BlocksToDuplicate.push_back(CurrBB); if (CurrBB->hasJumpTable()) { + LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing duplication " + "list due to a JT in " + << CurrBB->getName() << '\n';); BlocksToDuplicate.clear(); break; } @@ -273,8 +285,12 @@ TailDuplication::aggressiveCodeToDuplicate(BinaryBasicBlock &BB) const { if (CurrBB->getConditionalSuccessor(false)->getLayoutIndex() == CurrBB->getLayoutIndex() + 1 || CurrBB->getConditionalSuccessor(true)->getLayoutIndex() == - CurrBB->getLayoutIndex() + 1) + CurrBB->getLayoutIndex() + 1) { + LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing " + "duplication list, can't find a simple stream at " + << CurrBB->getName() << '\n';); BlocksToDuplicate.clear(); + } break; } @@ -291,8 +307,14 @@ TailDuplication::aggressiveCodeToDuplicate(BinaryBasicBlock &BB) const { [](int value, BinaryBasicBlock *p) { return value + p->getOriginalSize(); }); - if (DuplicationByteCount > opts::TailDuplicationMaximumDuplication) + if (DuplicationByteCount > opts::TailDuplicationMaximumDuplication) { + LLVM_DEBUG(dbgs() << "Aggressive tail duplication: duplication byte count (" + << DuplicationByteCount << ") exceeds maximum " + << opts::TailDuplicationMaximumDuplication << '\n';); BlocksToDuplicate.clear(); + } + LLVM_DEBUG(dbgs() << "Aggressive tail duplication: found " + << BlocksToDuplicate.size() << " blocks to duplicate\n";); return BlocksToDuplicate; } @@ -401,18 +423,22 @@ void TailDuplication::runOnFunction(BinaryFunction &Function) { BlocksToDuplicate = aggressiveCodeToDuplicate(*Succ); else BlocksToDuplicate = moderateCodeToDuplicate(*Succ); - if (BlocksToDuplicate.size() > 0) { - PossibleDuplications++; - PossibleDuplicationsDynamicCount += BB->getExecutionCount(); - std::vector DuplicatedBlocks = - tailDuplicate(*BB, BlocksToDuplicate); - constantAndCopyPropagate(*BB, DuplicatedBlocks); - BinaryBasicBlock *FirstBB = BlocksToDuplicate[0]; - if (FirstBB->pred_size() == 1) { - BinaryBasicBlock *PredBB = *FirstBB->pred_begin(); - if (PredBB->succ_size() == 1) - constantAndCopyPropagate(*PredBB, BlocksToDuplicate); - } + + if (BlocksToDuplicate.size() == 0) + continue; + PossibleDuplications++; + PossibleDuplicationsDynamicCount += BB->getExecutionCount(); + std::vector DuplicatedBlocks = + tailDuplicate(*BB, BlocksToDuplicate); + if (!opts::TailDuplicationConstCopyPropagation) + continue; + + constantAndCopyPropagate(*BB, DuplicatedBlocks); + BinaryBasicBlock *FirstBB = BlocksToDuplicate[0]; + if (FirstBB->pred_size() == 1) { + BinaryBasicBlock *PredBB = *FirstBB->pred_begin(); + if (PredBB->succ_size() == 1) + constantAndCopyPropagate(*PredBB, BlocksToDuplicate); } } } diff --git a/bolt/test/X86/tail-duplication-prop-bug.s b/bolt/test/X86/tail-duplication-prop-bug.s new file mode 100644 index 000000000000..b6dcbe70a35b --- /dev/null +++ b/bolt/test/X86/tail-duplication-prop-bug.s @@ -0,0 +1,40 @@ +# This reproduces a bug in aggressive tail duplication/copy propagation. +# XFAIL: * + +# REQUIRES: system-linux +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o +# RUN: link_fdata %s %t.o %t.fdata +# RUN: llvm-strip --strip-unneeded %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.out -data %t.fdata -relocs \ +# RUN: -tail-duplication=1 -tail-duplication-aggressive=1 \ +# RUN: -tail-duplication-const-copy-propagation=1 + + .globl a +a: + .cfi_startproc + jmpq *JT(,%rcx,8) +b: + jb d +# FDATA: 1 a #b# 1 a #d# 6 60 +e: + cmpl %eax, %ebx +f: + jmp g +# FDATA: 1 a #f# 1 a #g# 0 8 +d: + movl $0x1, %ebx + jmp e + jmp g +h: + jmp h +i: + jne j +g: + jmp g +j: + .cfi_endproc +.rodata +JT: + .quad b + .quad i