[ARM64][fast-isel] Disable target specific optimizations at -O0. Functionally,

this patch disables the dead register elimination pass and the load/store pair optimization pass at -O0. The ILP optimizations don't require the optimization level to be checked because the call to addILPOpts is predicated with the necessary check. The AdvSIMDScalar pass is disabled by default at all optimization levels. This patch leaves that pass disabled by default. Also, move command-line options into ARM64TargetMachine.cpp and add a few additional flags to aid in debugging. This fixes an issue with the -debug-pass=Structure flag where passes were printed, but not actually run (i.e., AdvSIMDScalar pass). llvm-svn: 208223
2014-05-07 16:41:55 +00:00 · 2014-05-07 16:41:55 +00:00 · 788e5e3d7c
parent 3bf77c5d30
commit 788e5e3d7c
5 changed files with 36 additions and 35 deletions
--- a/llvm/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp
+++ b/llvm/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp
@ -49,10 +49,6 @@ using namespace llvm;

 #define DEBUG_TYPE "arm64-simd-scalar"

-static cl::opt<bool>
-AdvSIMDScalar("arm64-simd-scalar",
-              cl::desc("enable use of AdvSIMD scalar integer instructions"),
-              cl::init(false), cl::Hidden);
 // Allow forcing all i64 operations with equivalent SIMD instructions to use
 // them. For stress-testing the transformation function.
 static cl::opt<bool>
@ -368,10 +364,6 @@ bool ARM64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {

 // runOnMachineFunction - Pass entry point from PassManager.
 bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
-  // Early exit if pass disabled.
-  if (!AdvSIMDScalar)
-    return false;
-
  bool Changed = false;
  DEBUG(dbgs() << "***** ARM64AdvSIMDScalar *****\n");

--- a/llvm/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp
@ -40,8 +40,6 @@ STATISTIC(NumPreFolded, "Number of pre-index updates folded");
 STATISTIC(NumUnscaledPairCreated,
          "Number of load/store from unscaled generated");

-static cl::opt<bool> DoLoadStoreOpt("arm64-load-store-opt", cl::init(true),
-                                    cl::Hidden);
 static cl::opt<unsigned> ScanLimit("arm64-load-store-scan-limit", cl::init(20),
                                   cl::Hidden);

@ -923,10 +921,6 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
 }

 bool ARM64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
-  // Early exit if pass disabled.
-  if (!DoLoadStoreOpt)
-    return false;
-
  const TargetMachine &TM = Fn.getTarget();
  TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
  TRI = TM.getRegisterInfo();
--- a/llvm/lib/Target/ARM64/ARM64TargetMachine.cpp
+++ b/llvm/lib/Target/ARM64/ARM64TargetMachine.cpp
@ -20,24 +20,30 @@
 #include "llvm/Transforms/Scalar.h"
 using namespace llvm;

-static cl::opt<bool> EnableCCMP("arm64-ccmp",
-                                cl::desc("Enable the CCMP formation pass"),
-                                cl::init(true));
-
-static cl::opt<bool> EnableStPairSuppress("arm64-stp-suppress", cl::Hidden,
-                                          cl::desc("Suppress STP for ARM64"),
-                                          cl::init(true));
+static cl::opt<bool>
+EnableCCMP("arm64-ccmp", cl::desc("Enable the CCMP formation pass"),
+           cl::init(true), cl::Hidden);

 static cl::opt<bool>
-EnablePromoteConstant("arm64-promote-const", cl::Hidden,
-                      cl::desc("Enable the promote constant pass"),
-                      cl::init(true));
+EnableEarlyIfConvert("arm64-early-ifcvt", cl::desc("Enable the early if "
+                     "converter pass"), cl::init(true), cl::Hidden);

 static cl::opt<bool>
-EnableCollectLOH("arm64-collect-loh", cl::Hidden,
-                 cl::desc("Enable the pass that emits the linker"
-                          " optimization hints (LOH)"),
-                 cl::init(true));
+EnableStPairSuppress("arm64-stp-suppress", cl::desc("Suppress STP for ARM64"),
+                     cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+EnableAdvSIMDScalar("arm64-simd-scalar", cl::desc("Enable use of AdvSIMD scalar"
+                    " integer instructions"), cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+EnablePromoteConstant("arm64-promote-const", cl::desc("Enable the promote "
+                      "constant pass"), cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+EnableCollectLOH("arm64-collect-loh", cl::desc("Enable the pass that emits the"
+                 " linker optimization hints (LOH)"), cl::init(true),
+                 cl::Hidden);

 static cl::opt<bool>
 EnableDeadRegisterElimination("arm64-dead-def-elimination", cl::Hidden,
@ -47,6 +53,10 @@ EnableDeadRegisterElimination("arm64-dead-def-elimination", cl::Hidden,
                                       " register"),
                              cl::init(true));

+static cl::opt<bool>
+EnableLoadStoreOpt("arm64-load-store-opt", cl::desc("Enable the load/store pair"
+                   " optimization pass"), cl::init(true), cl::Hidden);
+
 extern "C" void LLVMInitializeARM64Target() {
  // Register the target.
  RegisterTargetMachine<ARM64leTargetMachine> X(TheARM64leTarget);
@ -159,6 +169,7 @@ bool ARM64PassConfig::addInstSelector() {
 bool ARM64PassConfig::addILPOpts() {
  if (EnableCCMP)
    addPass(createARM64ConditionalCompares());
+  if (EnableEarlyIfConvert)
    addPass(&EarlyIfConverterID);
  if (EnableStPairSuppress)
    addPass(createARM64StorePairSuppressPass());
@ -167,13 +178,14 @@ bool ARM64PassConfig::addILPOpts() {

 bool ARM64PassConfig::addPreRegAlloc() {
  // Use AdvSIMD scalar instructions whenever profitable.
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar)
    addPass(createARM64AdvSIMDScalar());
  return true;
 }

 bool ARM64PassConfig::addPostRegAlloc() {
  // Change dead register definitions to refer to the zero register.
-  if (EnableDeadRegisterElimination)
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
    addPass(createARM64DeadRegisterDefinitions());
  return true;
 }
@ -182,6 +194,7 @@ bool ARM64PassConfig::addPreSched2() {
  // Expand some pseudo instructions to allow proper scheduling.
  addPass(createARM64ExpandPseudoPass());
  // Use load/store pair instructions when possible.
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableLoadStoreOpt)
    addPass(createARM64LoadStoreOptimizationPass());
  return true;
 }
--- a/llvm/test/CodeGen/ARM64/abi_align.ll
+++ b/llvm/test/CodeGen/ARM64/abi_align.ll
@ -490,7 +490,9 @@ entry:
 ; FAST: sub sp, sp, #48
 ; FAST: mov x[[ADDR:[0-9]+]], sp
 ; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
-; FAST: stp {{x[0-9]+}}, {{x[0-9]+}}, [x[[ADDR]]]
+; Load/Store opt is disabled with -O0, so the i128 is split.
+; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8]
+; FAST: str {{x[0-9]+}}, [x[[ADDR]]]
  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
  %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
                                           i32 6, i32 7, i128 %0, i32 8) #5
--- a/llvm/test/CodeGen/ARM64/fast-isel-conversion.ll
+++ b/llvm/test/CodeGen/ARM64/fast-isel-conversion.ll
@ -60,7 +60,7 @@ entry:
 ; CHECK: mov x3, x0
 ; CHECK: ubfx x3, x3, #0, #32
 ; CHECK: str x3, [sp]
-; CHECK: ldr x0, [sp], #16
+; CHECK: ldr x0, [sp]
 ; CHECK: ret
  %a.addr = alloca i8, align 1
  %b.addr = alloca i16, align 2
@ -117,7 +117,7 @@ entry:
 ; CHECK: mov x3, x0
 ; CHECK: sxtw x3, w3
 ; CHECK: str x3, [sp]
-; CHECK: ldr x0, [sp], #16
+; CHECK: ldr x0, [sp]
 ; CHECK: ret
  %a.addr = alloca i8, align 1
  %b.addr = alloca i16, align 2