[CodeGen] Add -align-loops

to `lib/CodeGen/CommandFlags.cpp`. It can replace -x86-experimental-pref-loop-alignment=. The loop alignment is only used by MachineBlockPlacement. The implementation uses a new `llvm::TargetOptions` for now, as an IR function attribute/module flags metadata may be overkill. This is the llvm part of D106701.
2021-08-04 12:45:17 -07:00 · 2021-08-04 12:45:17 -07:00 · a194438615
parent 5edc886e90
commit a194438615
7 changed files with 64 additions and 4 deletions
--- a/llvm/include/llvm/CodeGen/CommandFlags.h
+++ b/llvm/include/llvm/CodeGen/CommandFlags.h
@ -138,6 +138,8 @@ bool getXRayOmitFunctionIndex();

 bool getDebugStrictDwarf();

+unsigned getAlignLoops();
+
 /// Create this object with static storage to register codegen-related command
 /// line options.
 struct RegisterCodeGenFlags {
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@ -1768,9 +1768,7 @@ public:
  Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }

  /// Return the preferred loop alignment.
-  virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
-    return PrefLoopAlignment;
-  }
+  virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const;

  /// Should loops be aligned even when the function is marked OptSize (but not
  /// MinSize).
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@ -328,6 +328,9 @@ namespace llvm {
    /// passed on the command line.
    std::string StackUsageOutput;

+    /// If greater than 0, override TargetLoweringBase::PrefLoopAlignment.
+    unsigned LoopAlignment = 0;
+
    /// FloatABIType - This setting is set by -float-abi=xxx option is specfied
    /// on the command line. This setting may either be Default, Soft, or Hard.
    /// Default selects the target's default behavior. Soft selects the ABI for
--- a/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/llvm/lib/CodeGen/CommandFlags.cpp
@ -94,6 +94,7 @@ CGOPT(bool, ValueTrackingVariableLocations)
 CGOPT(bool, ForceDwarfFrameSection)
 CGOPT(bool, XRayOmitFunctionIndex)
 CGOPT(bool, DebugStrictDwarf)
+CGOPT(unsigned, AlignLoops)

 codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
 #define CGBINDOPT(NAME)                                                        \
@ -452,6 +453,10 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
      "strict-dwarf", cl::desc("use strict dwarf"), cl::init(false));
  CGBINDOPT(DebugStrictDwarf);

+  static cl::opt<unsigned> AlignLoops("align-loops",
+                                      cl::desc("Default alignment for loops"));
+  CGBINDOPT(AlignLoops);
+
 #undef CGBINDOPT

  mc::RegisterMCTargetOptionsFlags();
@ -527,6 +532,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
  Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
  Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
  Options.DebugStrictDwarf = getDebugStrictDwarf();
+  Options.LoopAlignment = getAlignLoops();

  Options.MCOptions = mc::InitMCTargetOptionsFromFlags();

--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@ -52,6 +52,7 @@
 #include "llvm/Support/MachineValueType.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/Utils/SizeOpts.h"
 #include <algorithm>
 #include <cassert>
@ -2018,6 +2019,12 @@ bool TargetLoweringBase::isJumpTableRelative() const {
  return getTargetMachine().isPositionIndependent();
 }

+Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const {
+  if (TM.Options.LoopAlignment)
+    return Align(TM.Options.LoopAlignment);
+  return PrefLoopAlignment;
+}
+
 //===----------------------------------------------------------------------===//
 //  Reciprocal Estimates
 //===----------------------------------------------------------------------===//
--- a/llvm/test/CodeGen/RISCV/align-loops.ll
+++ b/llvm/test/CodeGen/RISCV/align-loops.ll
@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=riscv64 | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -align-loops=16 | FileCheck %s -check-prefix=ALIGN_16
+; RUN: llc < %s -mtriple=riscv64 -align-loops=32 | FileCheck %s -check-prefix=ALIGN_32
+
+declare void @foo()
+
+define void @test(i32 %n, i32 %m) nounwind {
+; CHECK-LABEL:    test:
+; CHECK-NOT:        .p2align
+; CHECK:            ret
+
+; ALIGN_16-LABEL: test:
+; ALIGN_16:         .p2align 4{{$}}
+; ALIGN_16-NEXT:  .LBB0_1: # %outer
+; ALIGN_16:         .p2align 4{{$}}
+; ALIGN_16-NEXT:  .LBB0_2: # %inner
+
+; ALIGN_32-LABEL: test:
+; ALIGN_32:         .p2align 5{{$}}
+; ALIGN_32-NEXT:  .LBB0_1: # %outer
+; ALIGN_32:         .p2align 5{{$}}
+; ALIGN_32-NEXT:  .LBB0_2: # %inner
+entry:
+  br label %outer
+
+outer:
+  %outer.iv = phi i32 [0, %entry], [%outer.iv.next, %outer_bb]
+  br label %inner
+
+inner:
+  %inner.iv = phi i32 [0, %outer], [%inner.iv.next, %inner]
+  call void @foo()
+  %inner.iv.next = add i32 %inner.iv, 1
+  %inner.cond = icmp ne i32 %inner.iv.next, %m
+  br i1 %inner.cond, label %inner, label %outer_bb
+
+outer_bb:
+  %outer.iv.next = add i32 %outer.iv, 1
+  %outer.cond = icmp ne i32 %outer.iv.next, %n
+  br i1 %outer.cond, label %outer, label %exit
+
+exit:
+  ret void
+}
--- a/llvm/test/CodeGen/X86/innermost-loop-alignment.ll
+++ b/llvm/test/CodeGen/X86/innermost-loop-alignment.ll
@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s -check-prefix=DEFAULT
 ; RUN: llc < %s -mtriple=i686-pc-linux-gnu -x86-experimental-pref-innermost-loop-alignment=5 | FileCheck %s -check-prefix=ALIGN32
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -x86-experimental-pref-loop-alignment=5 -x86-experimental-pref-innermost-loop-alignment=6 | FileCheck %s -check-prefix=ALIGN64
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -align-loops=32 -x86-experimental-pref-innermost-loop-alignment=6 | FileCheck %s -check-prefix=ALIGN64

 declare void @foo()