forked from OSchip/llvm-project
[ARM] Improve detection of fallthough when aligning blocks
We align non-fallthrough branches under Cortex-M at O3 to lead to fewer instruction fetches. This improves that for the block after a LE or LETP. These blocks will still have terminating branches until the LowOverheadLoops pass is run (as they are not handled by analyzeBranch, the branch is not removed until later), so canFallThrough will return false. These extra branches will eventually be removed, leaving a fallthrough, so treat them as such and don't add unnecessary alignments. Differential Revision: https://reviews.llvm.org/D107810
This commit is contained in:
parent
1b49a72de9
commit
bb2d23dcd4
|
@ -18,6 +18,7 @@
|
||||||
#include "ARMMachineFunctionInfo.h"
|
#include "ARMMachineFunctionInfo.h"
|
||||||
#include "ARMSubtarget.h"
|
#include "ARMSubtarget.h"
|
||||||
#include "MCTargetDesc/ARMBaseInfo.h"
|
#include "MCTargetDesc/ARMBaseInfo.h"
|
||||||
|
#include "MVETailPredUtils.h"
|
||||||
#include "Thumb2InstrInfo.h"
|
#include "Thumb2InstrInfo.h"
|
||||||
#include "Utils/ARMBaseInfo.h"
|
#include "Utils/ARMBaseInfo.h"
|
||||||
#include "llvm/ADT/DenseMap.h"
|
#include "llvm/ADT/DenseMap.h"
|
||||||
|
@ -340,12 +341,12 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
|
||||||
// Align blocks where the previous block does not fall through. This may add
|
// Align blocks where the previous block does not fall through. This may add
|
||||||
// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a
|
// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a
|
||||||
// measure of how much to align, and only runs at CodeGenOpt::Aggressive.
|
// measure of how much to align, and only runs at CodeGenOpt::Aggressive.
|
||||||
static bool AlignBlocks(MachineFunction *MF) {
|
static bool AlignBlocks(MachineFunction *MF, const ARMSubtarget *STI) {
|
||||||
if (MF->getTarget().getOptLevel() != CodeGenOpt::Aggressive ||
|
if (MF->getTarget().getOptLevel() != CodeGenOpt::Aggressive ||
|
||||||
MF->getFunction().hasOptSize())
|
MF->getFunction().hasOptSize())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
auto *TLI = MF->getSubtarget().getTargetLowering();
|
auto *TLI = STI->getTargetLowering();
|
||||||
const Align Alignment = TLI->getPrefLoopAlignment();
|
const Align Alignment = TLI->getPrefLoopAlignment();
|
||||||
if (Alignment < 4)
|
if (Alignment < 4)
|
||||||
return false;
|
return false;
|
||||||
|
@ -357,7 +358,25 @@ static bool AlignBlocks(MachineFunction *MF) {
|
||||||
Changed = true;
|
Changed = true;
|
||||||
MBB.setAlignment(Alignment);
|
MBB.setAlignment(Alignment);
|
||||||
}
|
}
|
||||||
|
|
||||||
PrevCanFallthough = MBB.canFallThrough();
|
PrevCanFallthough = MBB.canFallThrough();
|
||||||
|
|
||||||
|
// For LOB's, the ARMLowOverheadLoops pass may remove the unconditional
|
||||||
|
// branch later in the pipeline.
|
||||||
|
if (STI->hasLOB()) {
|
||||||
|
for (const auto &MI : reverse(MBB.terminators())) {
|
||||||
|
if (MI.getOpcode() == ARM::t2B &&
|
||||||
|
MI.getOperand(0).getMBB() == MBB.getNextNode())
|
||||||
|
continue;
|
||||||
|
if (isLoopStart(MI) || MI.getOpcode() == ARM::t2LoopEnd ||
|
||||||
|
MI.getOpcode() == ARM::t2LoopEndDec) {
|
||||||
|
PrevCanFallthough = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Any other terminator - nothing to do
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Changed;
|
return Changed;
|
||||||
|
@ -406,7 +425,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Align any non-fallthrough blocks
|
// Align any non-fallthrough blocks
|
||||||
MadeChange |= AlignBlocks(MF);
|
MadeChange |= AlignBlocks(MF, STI);
|
||||||
|
|
||||||
// Perform the initial placement of the constant pool entries. To start with,
|
// Perform the initial placement of the constant pool entries. To start with,
|
||||||
// we put them all at the end of the function.
|
// we put them all at the end of the function.
|
||||||
|
|
|
@ -14,7 +14,6 @@ define i32 @loop(i32* nocapture readonly %x) {
|
||||||
; CHECK-NEXT: ldr r2, [r0], #4
|
; CHECK-NEXT: ldr r2, [r0], #4
|
||||||
; CHECK-NEXT: add r1, r2
|
; CHECK-NEXT: add r1, r2
|
||||||
; CHECK-NEXT: le lr, .LBB0_1
|
; CHECK-NEXT: le lr, .LBB0_1
|
||||||
; CHECK-NEXT: .p2align 2
|
|
||||||
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
|
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
|
||||||
; CHECK-NEXT: mov r0, r1
|
; CHECK-NEXT: mov r0, r1
|
||||||
; CHECK-NEXT: pop {r7, pc}
|
; CHECK-NEXT: pop {r7, pc}
|
||||||
|
@ -54,7 +53,6 @@ define i64 @loopif(i32* nocapture readonly %x, i32 %y, i32 %n) {
|
||||||
; CHECK-NEXT: ldr r2, [r12], #4
|
; CHECK-NEXT: ldr r2, [r12], #4
|
||||||
; CHECK-NEXT: smlal r0, r3, r2, r1
|
; CHECK-NEXT: smlal r0, r3, r2, r1
|
||||||
; CHECK-NEXT: le lr, .LBB1_2
|
; CHECK-NEXT: le lr, .LBB1_2
|
||||||
; CHECK-NEXT: .p2align 2
|
|
||||||
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
|
; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup
|
||||||
; CHECK-NEXT: mov r1, r3
|
; CHECK-NEXT: mov r1, r3
|
||||||
; CHECK-NEXT: pop {r7, pc}
|
; CHECK-NEXT: pop {r7, pc}
|
||||||
|
|
Loading…
Reference in New Issue