[X86] Add experimental option to separately tune alignment of innermost loops

We already have an experimental option to tune loop alignment. Its impact
is very wide (and there is a suspicion that it's not always profitable). We want
to have something more narrow to play with. This patch adds similar option that
overrides preferred alignment for innermost loops. This is for experimental
purposes, default values do not change the existing behavior.

Differential Revision: https://reviews.llvm.org/D94895
Reviewed By: pengfei
This commit is contained in:
Max Kazantsev 2021-01-21 11:15:16 +07:00
parent a8b96eadfd
commit d6bb96e677
3 changed files with 77 additions and 0 deletions

View File

@ -35,6 +35,7 @@
#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetLowering.h"
@ -76,6 +77,14 @@ static cl::opt<int> ExperimentalPrefLoopAlignment(
" of the loop header PC will be 0)."), " of the loop header PC will be 0)."),
cl::Hidden); cl::Hidden);
static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
"x86-experimental-pref-innermost-loop-alignment", cl::init(4),
cl::desc(
"Sets the preferable loop alignment for experiments (as log2 bytes) "
"for innermost loops only. If specified, this option overrides "
"alignment set by x86-experimental-pref-loop-alignment."),
cl::Hidden);
static cl::opt<bool> MulConstantOptimization( static cl::opt<bool> MulConstantOptimization(
"mul-constant-optimization", cl::init(true), "mul-constant-optimization", cl::init(true),
cl::desc("Replace 'mul x, Const' with more effective instructions like " cl::desc("Replace 'mul x, Const' with more effective instructions like "
@ -51696,3 +51705,10 @@ X86TargetLowering::getStackProbeSize(MachineFunction &MF) const {
.getAsInteger(0, StackProbeSize); .getAsInteger(0, StackProbeSize);
return StackProbeSize; return StackProbeSize;
} }
Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
if (ML->isInnermost() &&
ExperimentalPrefInnermostLoopAlignment.getNumOccurrences())
return Align(1ULL << ExperimentalPrefInnermostLoopAlignment);
return TargetLowering::getPrefLoopAlignment();
}

View File

@ -1408,6 +1408,8 @@ namespace llvm {
SDValue Addr, SelectionDAG &DAG) SDValue Addr, SelectionDAG &DAG)
const override; const override;
Align getPrefLoopAlignment(MachineLoop *ML) const override;
protected: protected:
std::pair<const TargetRegisterClass *, uint8_t> std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI, findRepresentativeClass(const TargetRegisterInfo *TRI,

View File

@ -0,0 +1,59 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s -check-prefix=DEFAULT
; RUN: llc < %s -mtriple=i686-pc-linux-gnu -x86-experimental-pref-innermost-loop-alignment=5 | FileCheck %s -check-prefix=ALIGN32
; RUN: llc < %s -mtriple=i686-pc-linux-gnu -x86-experimental-pref-loop-alignment=5 -x86-experimental-pref-innermost-loop-alignment=6 | FileCheck %s -check-prefix=ALIGN64
declare void @foo()
define void @test(i32 %n, i32 %m) {
; DEFAULT-LABEL: test:
; DEFAULT: .p2align 4, 0x90
; DEFAULT-NEXT: .LBB0_1: # %outer
; DEFAULT-NEXT: # =>This Loop Header: Depth=1
; DEFAULT-NEXT: # Child Loop BB0_2 Depth 2
; DEFAULT: .p2align 4, 0x90
; DEFAULT-NEXT: .LBB0_2: # %inner
; DEFAULT-NEXT: # Parent Loop BB0_1 Depth=1
; ALIGN32-LABEL: test:
; ALIGN32: .p2align 4, 0x90
; ALIGN32-NEXT: .LBB0_1: # %outer
; ALIGN32-NEXT: # =>This Loop Header: Depth=1
; ALIGN32-NEXT: # Child Loop BB0_2 Depth 2
; ALIGN32: .p2align 5, 0x90
; ALIGN32-NEXT: .LBB0_2: # %inner
; ALIGN32-NEXT: # Parent Loop BB0_1 Depth=1
; ALIGN32-NEXT: # => This Inner Loop Header: Depth=2
; ALIGN64-LABEL: test:
; ALIGN64: .p2align 5, 0x90
; ALIGN64-NEXT: .LBB0_1: # %outer
; ALIGN64-NEXT: # =>This Loop Header: Depth=1
; ALIGN64-NEXT: # Child Loop BB0_2 Depth 2
; ALIGN64: .p2align 6, 0x90
; ALIGN64-NEXT: .LBB0_2: # %inner
; ALIGN64-NEXT: # Parent Loop BB0_1 Depth=1
; ALIGN64-NEXT: # => This Inner Loop Header: Depth=2
entry:
br label %outer
outer:
%outer.iv = phi i32 [0, %entry], [%outer.iv.next, %outer_bb]
br label %inner
inner:
%inner.iv = phi i32 [0, %outer], [%inner.iv.next, %inner]
call void @foo()
%inner.iv.next = add i32 %inner.iv, 1
%inner.cond = icmp ne i32 %inner.iv.next, %m
br i1 %inner.cond, label %inner, label %outer_bb
outer_bb:
%outer.iv.next = add i32 %outer.iv, 1
%outer.cond = icmp ne i32 %outer.iv.next, %n
br i1 %outer.cond, label %outer, label %exit
exit:
ret void
}