forked from OSchip/llvm-project
[ARM64][fast-isel] Disable target specific optimizations at -O0. Functionally,
this patch disables the dead register elimination pass and the load/store pair optimization pass at -O0. The ILP optimizations don't require the optimization level to be checked because the call to addILPOpts is predicated with the necessary check. The AdvSIMDScalar pass is disabled by default at all optimization levels. This patch leaves that pass disabled by default. Also, move command-line options into ARM64TargetMachine.cpp and add a few additional flags to aid in debugging. This fixes an issue with the -debug-pass=Structure flag where passes were printed, but not actually run (i.e., AdvSIMDScalar pass). llvm-svn: 208223
This commit is contained in:
parent
3bf77c5d30
commit
788e5e3d7c
|
@ -49,10 +49,6 @@ using namespace llvm;
|
||||||
|
|
||||||
#define DEBUG_TYPE "arm64-simd-scalar"
|
#define DEBUG_TYPE "arm64-simd-scalar"
|
||||||
|
|
||||||
static cl::opt<bool>
|
|
||||||
AdvSIMDScalar("arm64-simd-scalar",
|
|
||||||
cl::desc("enable use of AdvSIMD scalar integer instructions"),
|
|
||||||
cl::init(false), cl::Hidden);
|
|
||||||
// Allow forcing all i64 operations with equivalent SIMD instructions to use
|
// Allow forcing all i64 operations with equivalent SIMD instructions to use
|
||||||
// them. For stress-testing the transformation function.
|
// them. For stress-testing the transformation function.
|
||||||
static cl::opt<bool>
|
static cl::opt<bool>
|
||||||
|
@ -368,10 +364,6 @@ bool ARM64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
|
||||||
|
|
||||||
// runOnMachineFunction - Pass entry point from PassManager.
|
// runOnMachineFunction - Pass entry point from PassManager.
|
||||||
bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
|
bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
|
||||||
// Early exit if pass disabled.
|
|
||||||
if (!AdvSIMDScalar)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
bool Changed = false;
|
bool Changed = false;
|
||||||
DEBUG(dbgs() << "***** ARM64AdvSIMDScalar *****\n");
|
DEBUG(dbgs() << "***** ARM64AdvSIMDScalar *****\n");
|
||||||
|
|
||||||
|
|
|
@ -40,8 +40,6 @@ STATISTIC(NumPreFolded, "Number of pre-index updates folded");
|
||||||
STATISTIC(NumUnscaledPairCreated,
|
STATISTIC(NumUnscaledPairCreated,
|
||||||
"Number of load/store from unscaled generated");
|
"Number of load/store from unscaled generated");
|
||||||
|
|
||||||
static cl::opt<bool> DoLoadStoreOpt("arm64-load-store-opt", cl::init(true),
|
|
||||||
cl::Hidden);
|
|
||||||
static cl::opt<unsigned> ScanLimit("arm64-load-store-scan-limit", cl::init(20),
|
static cl::opt<unsigned> ScanLimit("arm64-load-store-scan-limit", cl::init(20),
|
||||||
cl::Hidden);
|
cl::Hidden);
|
||||||
|
|
||||||
|
@ -923,10 +921,6 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ARM64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
|
bool ARM64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
|
||||||
// Early exit if pass disabled.
|
|
||||||
if (!DoLoadStoreOpt)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
const TargetMachine &TM = Fn.getTarget();
|
const TargetMachine &TM = Fn.getTarget();
|
||||||
TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
|
TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
|
||||||
TRI = TM.getRegisterInfo();
|
TRI = TM.getRegisterInfo();
|
||||||
|
|
|
@ -20,24 +20,30 @@
|
||||||
#include "llvm/Transforms/Scalar.h"
|
#include "llvm/Transforms/Scalar.h"
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
static cl::opt<bool> EnableCCMP("arm64-ccmp",
|
static cl::opt<bool>
|
||||||
cl::desc("Enable the CCMP formation pass"),
|
EnableCCMP("arm64-ccmp", cl::desc("Enable the CCMP formation pass"),
|
||||||
cl::init(true));
|
cl::init(true), cl::Hidden);
|
||||||
|
|
||||||
static cl::opt<bool> EnableStPairSuppress("arm64-stp-suppress", cl::Hidden,
|
|
||||||
cl::desc("Suppress STP for ARM64"),
|
|
||||||
cl::init(true));
|
|
||||||
|
|
||||||
static cl::opt<bool>
|
static cl::opt<bool>
|
||||||
EnablePromoteConstant("arm64-promote-const", cl::Hidden,
|
EnableEarlyIfConvert("arm64-early-ifcvt", cl::desc("Enable the early if "
|
||||||
cl::desc("Enable the promote constant pass"),
|
"converter pass"), cl::init(true), cl::Hidden);
|
||||||
cl::init(true));
|
|
||||||
|
|
||||||
static cl::opt<bool>
|
static cl::opt<bool>
|
||||||
EnableCollectLOH("arm64-collect-loh", cl::Hidden,
|
EnableStPairSuppress("arm64-stp-suppress", cl::desc("Suppress STP for ARM64"),
|
||||||
cl::desc("Enable the pass that emits the linker"
|
cl::init(true), cl::Hidden);
|
||||||
" optimization hints (LOH)"),
|
|
||||||
cl::init(true));
|
static cl::opt<bool>
|
||||||
|
EnableAdvSIMDScalar("arm64-simd-scalar", cl::desc("Enable use of AdvSIMD scalar"
|
||||||
|
" integer instructions"), cl::init(false), cl::Hidden);
|
||||||
|
|
||||||
|
static cl::opt<bool>
|
||||||
|
EnablePromoteConstant("arm64-promote-const", cl::desc("Enable the promote "
|
||||||
|
"constant pass"), cl::init(true), cl::Hidden);
|
||||||
|
|
||||||
|
static cl::opt<bool>
|
||||||
|
EnableCollectLOH("arm64-collect-loh", cl::desc("Enable the pass that emits the"
|
||||||
|
" linker optimization hints (LOH)"), cl::init(true),
|
||||||
|
cl::Hidden);
|
||||||
|
|
||||||
static cl::opt<bool>
|
static cl::opt<bool>
|
||||||
EnableDeadRegisterElimination("arm64-dead-def-elimination", cl::Hidden,
|
EnableDeadRegisterElimination("arm64-dead-def-elimination", cl::Hidden,
|
||||||
|
@ -47,6 +53,10 @@ EnableDeadRegisterElimination("arm64-dead-def-elimination", cl::Hidden,
|
||||||
" register"),
|
" register"),
|
||||||
cl::init(true));
|
cl::init(true));
|
||||||
|
|
||||||
|
static cl::opt<bool>
|
||||||
|
EnableLoadStoreOpt("arm64-load-store-opt", cl::desc("Enable the load/store pair"
|
||||||
|
" optimization pass"), cl::init(true), cl::Hidden);
|
||||||
|
|
||||||
extern "C" void LLVMInitializeARM64Target() {
|
extern "C" void LLVMInitializeARM64Target() {
|
||||||
// Register the target.
|
// Register the target.
|
||||||
RegisterTargetMachine<ARM64leTargetMachine> X(TheARM64leTarget);
|
RegisterTargetMachine<ARM64leTargetMachine> X(TheARM64leTarget);
|
||||||
|
@ -159,7 +169,8 @@ bool ARM64PassConfig::addInstSelector() {
|
||||||
bool ARM64PassConfig::addILPOpts() {
|
bool ARM64PassConfig::addILPOpts() {
|
||||||
if (EnableCCMP)
|
if (EnableCCMP)
|
||||||
addPass(createARM64ConditionalCompares());
|
addPass(createARM64ConditionalCompares());
|
||||||
addPass(&EarlyIfConverterID);
|
if (EnableEarlyIfConvert)
|
||||||
|
addPass(&EarlyIfConverterID);
|
||||||
if (EnableStPairSuppress)
|
if (EnableStPairSuppress)
|
||||||
addPass(createARM64StorePairSuppressPass());
|
addPass(createARM64StorePairSuppressPass());
|
||||||
return true;
|
return true;
|
||||||
|
@ -167,13 +178,14 @@ bool ARM64PassConfig::addILPOpts() {
|
||||||
|
|
||||||
bool ARM64PassConfig::addPreRegAlloc() {
|
bool ARM64PassConfig::addPreRegAlloc() {
|
||||||
// Use AdvSIMD scalar instructions whenever profitable.
|
// Use AdvSIMD scalar instructions whenever profitable.
|
||||||
addPass(createARM64AdvSIMDScalar());
|
if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar)
|
||||||
|
addPass(createARM64AdvSIMDScalar());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ARM64PassConfig::addPostRegAlloc() {
|
bool ARM64PassConfig::addPostRegAlloc() {
|
||||||
// Change dead register definitions to refer to the zero register.
|
// Change dead register definitions to refer to the zero register.
|
||||||
if (EnableDeadRegisterElimination)
|
if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
|
||||||
addPass(createARM64DeadRegisterDefinitions());
|
addPass(createARM64DeadRegisterDefinitions());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -182,7 +194,8 @@ bool ARM64PassConfig::addPreSched2() {
|
||||||
// Expand some pseudo instructions to allow proper scheduling.
|
// Expand some pseudo instructions to allow proper scheduling.
|
||||||
addPass(createARM64ExpandPseudoPass());
|
addPass(createARM64ExpandPseudoPass());
|
||||||
// Use load/store pair instructions when possible.
|
// Use load/store pair instructions when possible.
|
||||||
addPass(createARM64LoadStoreOptimizationPass());
|
if (TM->getOptLevel() != CodeGenOpt::None && EnableLoadStoreOpt)
|
||||||
|
addPass(createARM64LoadStoreOptimizationPass());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -490,7 +490,9 @@ entry:
|
||||||
; FAST: sub sp, sp, #48
|
; FAST: sub sp, sp, #48
|
||||||
; FAST: mov x[[ADDR:[0-9]+]], sp
|
; FAST: mov x[[ADDR:[0-9]+]], sp
|
||||||
; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
|
; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
|
||||||
; FAST: stp {{x[0-9]+}}, {{x[0-9]+}}, [x[[ADDR]]]
|
; Load/Store opt is disabled with -O0, so the i128 is split.
|
||||||
|
; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8]
|
||||||
|
; FAST: str {{x[0-9]+}}, [x[[ADDR]]]
|
||||||
%0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
|
%0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
|
||||||
%call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
|
%call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
|
||||||
i32 6, i32 7, i128 %0, i32 8) #5
|
i32 6, i32 7, i128 %0, i32 8) #5
|
||||||
|
|
|
@ -60,7 +60,7 @@ entry:
|
||||||
; CHECK: mov x3, x0
|
; CHECK: mov x3, x0
|
||||||
; CHECK: ubfx x3, x3, #0, #32
|
; CHECK: ubfx x3, x3, #0, #32
|
||||||
; CHECK: str x3, [sp]
|
; CHECK: str x3, [sp]
|
||||||
; CHECK: ldr x0, [sp], #16
|
; CHECK: ldr x0, [sp]
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
%a.addr = alloca i8, align 1
|
%a.addr = alloca i8, align 1
|
||||||
%b.addr = alloca i16, align 2
|
%b.addr = alloca i16, align 2
|
||||||
|
@ -117,7 +117,7 @@ entry:
|
||||||
; CHECK: mov x3, x0
|
; CHECK: mov x3, x0
|
||||||
; CHECK: sxtw x3, w3
|
; CHECK: sxtw x3, w3
|
||||||
; CHECK: str x3, [sp]
|
; CHECK: str x3, [sp]
|
||||||
; CHECK: ldr x0, [sp], #16
|
; CHECK: ldr x0, [sp]
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
%a.addr = alloca i8, align 1
|
%a.addr = alloca i8, align 1
|
||||||
%b.addr = alloca i16, align 2
|
%b.addr = alloca i16, align 2
|
||||||
|
|
Loading…
Reference in New Issue