[BOLT] Tail duplication: disable const/copy propagation by default as a workaround

Summary:
Disable const/copy propagation as a bug workaround.
Also add the debug logging in aggressive duplication.

(cherry picked from FBD32774744)
This commit is contained in:
Amir Ayupov 2021-12-01 14:05:05 -08:00 committed by Maksim Panchenko
parent 4f91538f57
commit 02145d20ab
2 changed files with 80 additions and 14 deletions

View File

@ -12,6 +12,8 @@
#include <numeric>
#define DEBUG_TYPE "taildup"
using namespace llvm;
namespace opts {
@ -37,6 +39,11 @@ static cl::opt<unsigned> TailDuplicationMaximumDuplication(
cl::desc("maximum size of duplicated blocks (in bytes)"), cl::ZeroOrMore,
cl::ReallyHidden, cl::init(64), cl::cat(BoltOptCategory));
static cl::opt<bool> TailDuplicationConstCopyPropagation(
"tail-duplication-const-copy-propagation",
cl::desc("enable const and copy propagation after tail duplication"),
cl::ReallyHidden, cl::init(false), cl::cat(BoltOptCategory));
} // namespace opts
namespace llvm {
@ -254,9 +261,14 @@ TailDuplication::aggressiveCodeToDuplicate(BinaryBasicBlock &BB) const {
std::vector<BinaryBasicBlock *> BlocksToDuplicate;
BinaryBasicBlock *CurrBB = &BB;
while (CurrBB) {
LLVM_DEBUG(dbgs() << "Aggressive tail duplication: adding "
<< CurrBB->getName() << " to duplication list\n";);
BlocksToDuplicate.push_back(CurrBB);
if (CurrBB->hasJumpTable()) {
LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing duplication "
"list due to a JT in "
<< CurrBB->getName() << '\n';);
BlocksToDuplicate.clear();
break;
}
@ -273,8 +285,12 @@ TailDuplication::aggressiveCodeToDuplicate(BinaryBasicBlock &BB) const {
if (CurrBB->getConditionalSuccessor(false)->getLayoutIndex() ==
CurrBB->getLayoutIndex() + 1 ||
CurrBB->getConditionalSuccessor(true)->getLayoutIndex() ==
CurrBB->getLayoutIndex() + 1)
CurrBB->getLayoutIndex() + 1) {
LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing "
"duplication list, can't find a simple stream at "
<< CurrBB->getName() << '\n';);
BlocksToDuplicate.clear();
}
break;
}
@ -291,8 +307,14 @@ TailDuplication::aggressiveCodeToDuplicate(BinaryBasicBlock &BB) const {
[](int value, BinaryBasicBlock *p) {
return value + p->getOriginalSize();
});
if (DuplicationByteCount > opts::TailDuplicationMaximumDuplication)
if (DuplicationByteCount > opts::TailDuplicationMaximumDuplication) {
LLVM_DEBUG(dbgs() << "Aggressive tail duplication: duplication byte count ("
<< DuplicationByteCount << ") exceeds maximum "
<< opts::TailDuplicationMaximumDuplication << '\n';);
BlocksToDuplicate.clear();
}
LLVM_DEBUG(dbgs() << "Aggressive tail duplication: found "
<< BlocksToDuplicate.size() << " blocks to duplicate\n";);
return BlocksToDuplicate;
}
@ -401,18 +423,22 @@ void TailDuplication::runOnFunction(BinaryFunction &Function) {
BlocksToDuplicate = aggressiveCodeToDuplicate(*Succ);
else
BlocksToDuplicate = moderateCodeToDuplicate(*Succ);
if (BlocksToDuplicate.size() > 0) {
PossibleDuplications++;
PossibleDuplicationsDynamicCount += BB->getExecutionCount();
std::vector<BinaryBasicBlock *> DuplicatedBlocks =
tailDuplicate(*BB, BlocksToDuplicate);
constantAndCopyPropagate(*BB, DuplicatedBlocks);
BinaryBasicBlock *FirstBB = BlocksToDuplicate[0];
if (FirstBB->pred_size() == 1) {
BinaryBasicBlock *PredBB = *FirstBB->pred_begin();
if (PredBB->succ_size() == 1)
constantAndCopyPropagate(*PredBB, BlocksToDuplicate);
}
if (BlocksToDuplicate.size() == 0)
continue;
PossibleDuplications++;
PossibleDuplicationsDynamicCount += BB->getExecutionCount();
std::vector<BinaryBasicBlock *> DuplicatedBlocks =
tailDuplicate(*BB, BlocksToDuplicate);
if (!opts::TailDuplicationConstCopyPropagation)
continue;
constantAndCopyPropagate(*BB, DuplicatedBlocks);
BinaryBasicBlock *FirstBB = BlocksToDuplicate[0];
if (FirstBB->pred_size() == 1) {
BinaryBasicBlock *PredBB = *FirstBB->pred_begin();
if (PredBB->succ_size() == 1)
constantAndCopyPropagate(*PredBB, BlocksToDuplicate);
}
}
}

View File

@ -0,0 +1,40 @@
# This reproduces a bug in aggressive tail duplication/copy propagation.
# XFAIL: *
# REQUIRES: system-linux
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
# RUN: link_fdata %s %t.o %t.fdata
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
# RUN: llvm-bolt %t.exe -o %t.out -data %t.fdata -relocs \
# RUN: -tail-duplication=1 -tail-duplication-aggressive=1 \
# RUN: -tail-duplication-const-copy-propagation=1
.globl a
a:
.cfi_startproc
jmpq *JT(,%rcx,8)
b:
jb d
# FDATA: 1 a #b# 1 a #d# 6 60
e:
cmpl %eax, %ebx
f:
jmp g
# FDATA: 1 a #f# 1 a #g# 0 8
d:
movl $0x1, %ebx
jmp e
jmp g
h:
jmp h
i:
jne j
g:
jmp g
j:
.cfi_endproc
.rodata
JT:
.quad b
.quad i