forked from OSchip/llvm-project
[CodeGen][AArch64] Add TargetInstrInfo hook to modify the TailDuplicateSize default threshold
Different targets might handle branch performance differently, so this patch allows for targets to specify the TailDuplicateSize threshold. Said threshold defines how small a branch can be and still be duplicated to generate straight-line code instead. This patch also specifies said override values for the AArch64 subtarget. Differential Revision: https://reviews.llvm.org/D95631
This commit is contained in:
parent
88d5c4c2ee
commit
cd880442ae
|
@ -1937,6 +1937,13 @@ public:
|
|||
return Formatter.get();
|
||||
}
|
||||
|
||||
/// Returns the target-specific default value for tail duplication.
|
||||
/// This value will be used if the tail-dup-placement-threshold argument is
|
||||
/// not provided.
|
||||
virtual unsigned getTailDuplicateSize(CodeGenOpt::Level OptLevel) const {
|
||||
return OptLevel >= CodeGenOpt::Aggressive ? 4 : 2;
|
||||
}
|
||||
|
||||
private:
|
||||
mutable std::unique_ptr<MIRFormatter> Formatter;
|
||||
unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
|
||||
|
|
|
@ -3337,6 +3337,13 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
|
|||
TailDupSize = TailDupPlacementAggressiveThreshold;
|
||||
}
|
||||
|
||||
// If there's no threshold provided through options, query the target
|
||||
// information for a threshold instead.
|
||||
if (TailDupPlacementThreshold.getNumOccurrences() == 0 &&
|
||||
(PassConfig->getOptLevel() < CodeGenOpt::Aggressive ||
|
||||
TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0))
|
||||
TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel());
|
||||
|
||||
if (allowTailDupPlacement()) {
|
||||
MPDT = &getAnalysis<MachinePostDominatorTree>();
|
||||
bool OptForSize = MF.getFunction().hasOptSize() ||
|
||||
|
|
|
@ -7183,6 +7183,11 @@ bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
|
|||
return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
AArch64InstrInfo::getTailDuplicateSize(CodeGenOpt::Level OptLevel) const {
|
||||
return OptLevel >= CodeGenOpt::Aggressive ? 6 : 2;
|
||||
}
|
||||
|
||||
unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
|
||||
if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
|
||||
return AArch64::BLRNoIP;
|
||||
|
|
|
@ -299,6 +299,8 @@ public:
|
|||
Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI,
|
||||
Register Reg) const override;
|
||||
|
||||
unsigned int getTailDuplicateSize(CodeGenOpt::Level OptLevel) const override;
|
||||
|
||||
static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset,
|
||||
int64_t &NumBytes,
|
||||
int64_t &NumPredicateVectors,
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-none-linux -O2 < %s | FileCheck %s --check-prefix=CHECK-O2
|
||||
; RUN: llc -mtriple=aarch64-none-linux -O3 < %s | FileCheck %s --check-prefix=CHECK-O3
|
||||
|
||||
; RUN: llc -mtriple=aarch64-none-linux -tail-dup-size=4 < %s | FileCheck %s --check-prefix=CHECK-O2
|
||||
; RUN: llc -mtriple=aarch64-none-linux -tail-dup-placement-threshold=4 < %s | FileCheck %s --check-prefix=CHECK-O2
|
||||
; RUN: llc -mtriple=aarch64-none-linux -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3
|
||||
|
||||
%a = type { %a*, i32, %b }
|
||||
%b = type { %c }
|
||||
%c = type { i32, i32, [31 x i8] }
|
||||
|
||||
@global_ptr = dso_local local_unnamed_addr global %a* null, align 8
|
||||
@global_int = dso_local local_unnamed_addr global i32 0, align 4
|
||||
|
||||
define dso_local void @testcase(%a** nocapture %arg){
|
||||
; CHECK-O2-LABEL: testcase:
|
||||
; CHECK-O2: // %bb.0: // %entry
|
||||
; CHECK-O2-NEXT: adrp x8, global_ptr
|
||||
; CHECK-O2-NEXT: ldr x9, [x8, :lo12:global_ptr]
|
||||
; CHECK-O2-NEXT: cbz x9, .LBB0_2
|
||||
; CHECK-O2-NEXT: // %bb.1: // %if.then
|
||||
; CHECK-O2-NEXT: ldr x9, [x9]
|
||||
; CHECK-O2-NEXT: str x9, [x0]
|
||||
; CHECK-O2-NEXT: ldr x8, [x8, :lo12:global_ptr]
|
||||
; CHECK-O2-NEXT: b .LBB0_3
|
||||
; CHECK-O2-NEXT: .LBB0_2:
|
||||
; CHECK-O2-NEXT: mov x8, xzr
|
||||
; CHECK-O2-NEXT: .LBB0_3: // %if.end
|
||||
; CHECK-O2-NEXT: adrp x9, global_int
|
||||
; CHECK-O2-NEXT: ldr w1, [x9, :lo12:global_int]
|
||||
; CHECK-O2-NEXT: add x2, x8, #16 // =16
|
||||
; CHECK-O2-NEXT: mov w0, #10
|
||||
; CHECK-O2-NEXT: b externalfunc
|
||||
;
|
||||
; CHECK-O3-LABEL: testcase:
|
||||
; CHECK-O3: // %bb.0: // %entry
|
||||
; CHECK-O3-NEXT: adrp x8, global_ptr
|
||||
; CHECK-O3-NEXT: ldr x9, [x8, :lo12:global_ptr]
|
||||
; CHECK-O3-NEXT: cbz x9, .LBB0_2
|
||||
; CHECK-O3-NEXT: // %bb.1: // %if.then
|
||||
; CHECK-O3-NEXT: ldr x9, [x9]
|
||||
; CHECK-O3-NEXT: str x9, [x0]
|
||||
; CHECK-O3-NEXT: ldr x8, [x8, :lo12:global_ptr]
|
||||
; CHECK-O3-NEXT: adrp x9, global_int
|
||||
; CHECK-O3-NEXT: ldr w1, [x9, :lo12:global_int]
|
||||
; CHECK-O3-NEXT: add x2, x8, #16 // =16
|
||||
; CHECK-O3-NEXT: mov w0, #10
|
||||
; CHECK-O3-NEXT: b externalfunc
|
||||
; CHECK-O3-NEXT: .LBB0_2:
|
||||
; CHECK-O3-NEXT: mov x8, xzr
|
||||
; CHECK-O3-NEXT: adrp x9, global_int
|
||||
; CHECK-O3-NEXT: ldr w1, [x9, :lo12:global_int]
|
||||
; CHECK-O3-NEXT: add x2, x8, #16 // =16
|
||||
; CHECK-O3-NEXT: mov w0, #10
|
||||
; CHECK-O3-NEXT: b externalfunc
|
||||
entry:
|
||||
%0 = load %a*, %a** @global_ptr, align 8
|
||||
%cmp.not = icmp eq %a* %0, null
|
||||
br i1 %cmp.not, label %if.end, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%1 = getelementptr inbounds %a, %a* %0, i64 0, i32 0
|
||||
%2 = load %a*, %a** %1, align 8
|
||||
store %a* %2, %a** %arg, align 8
|
||||
%.pre = load %a*, %a** @global_ptr, align 8
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.then, %entry
|
||||
%3 = phi %a* [ %.pre, %if.then ], [ null, %entry ]
|
||||
%4 = load i32, i32* @global_int, align 4
|
||||
%5 = getelementptr inbounds %a, %a* %3, i64 0, i32 2, i32 0, i32 1
|
||||
tail call void @externalfunc(i32 10, i32 %4, i32* nonnull %5)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare dso_local void @externalfunc(i32, i32, i32*)
|
Loading…
Reference in New Issue