[ARM] Revert low overhead loops with calls before registry allocation.

This adds code to revert low overhead loops with calls in them before
register allocation. Ideally we would not create low overhead loops with
calls in them to begin with, but that can be difficult to always get
correct. If we want to try and glue together t2LoopDec and t2LoopEnd
into a single instruction, we need to ensure that no instructions use LR
in the loop. (Technically the final code can be better too, as it
doesn't need to use the same registers but that has not been optimized
for here, as reverting loops with calls is expected to be very rare).

It also adds a MVETailPredUtils.h header to share the revert code
between different passes, and provides a place to expand upon, with
RevertLoopWithCall becoming a place to perform other low overhead loop
alterations like removing copies or combining LoopDec and End into a
single instruction.

Differential Revision: https://reviews.llvm.org/D91273
This commit is contained in:
David Green 2020-12-07 15:44:40 +00:00
parent 71f4693020
commit d9bf6245bf
11 changed files with 344 additions and 109 deletions

View File

@ -19,6 +19,7 @@
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MVETailPredUtils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"

View File

@ -614,56 +614,6 @@ unsigned VCMPOpcodeToVPT(unsigned Opcode) {
}
}
static inline
unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) {
switch (Opcode) {
default:
llvm_unreachable("unhandled vctp opcode");
break;
case ARM::MVE_VCTP8:
return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8;
case ARM::MVE_VCTP16:
return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16;
case ARM::MVE_VCTP32:
return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32;
case ARM::MVE_VCTP64:
return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64;
}
return 0;
}
static inline unsigned getTailPredVectorWidth(unsigned Opcode) {
switch (Opcode) {
default:
llvm_unreachable("unhandled vctp opcode");
case ARM::MVE_VCTP8: return 16;
case ARM::MVE_VCTP16: return 8;
case ARM::MVE_VCTP32: return 4;
case ARM::MVE_VCTP64: return 2;
}
return 0;
}
static inline bool isVCTP(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default:
break;
case ARM::MVE_VCTP8:
case ARM::MVE_VCTP16:
case ARM::MVE_VCTP32:
case ARM::MVE_VCTP64:
return true;
}
return false;
}
static inline
bool isLoopStart(MachineInstr &MI) {
return MI.getOpcode() == ARM::t2DoLoopStart ||
MI.getOpcode() == ARM::t2DoLoopStartTP ||
MI.getOpcode() == ARM::t2WhileLoopStart;
}
static inline
bool isCondBranchOpcode(int Opc) {
return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;

View File

@ -56,6 +56,7 @@
#include "ARMBaseRegisterInfo.h"
#include "ARMBasicBlockInfo.h"
#include "ARMSubtarget.h"
#include "MVETailPredUtils.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallSet.h"
@ -1310,33 +1311,16 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
// another low register.
void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
MachineBasicBlock *MBB = MI->getParent();
MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
TII->get(ARM::t2CMPri));
MIB.add(MI->getOperand(0));
MIB.addImm(0);
MIB.addImm(ARMCC::AL);
MIB.addReg(ARM::NoRegister);
MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
ARM::tBcc : ARM::t2Bcc;
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
MIB.add(MI->getOperand(1)); // branch target
MIB.addImm(ARMCC::EQ); // condition code
MIB.addReg(ARM::CPSR);
MI->eraseFromParent();
RevertWhileLoopStart(MI, TII, BrOpc);
}
void ARMLowOverheadLoops::RevertDo(MachineInstr *MI) const {
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to mov: " << *MI);
MachineBasicBlock *MBB = MI->getParent();
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::tMOVr))
.add(MI->getOperand(0))
.add(MI->getOperand(1))
.add(predOps(ARMCC::AL));
MI->eraseFromParent();
RevertDoLoopStart(MI, TII);
}
bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
@ -1354,21 +1338,7 @@ bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
bool SetFlags =
RDA->isSafeToDefRegAt(MI, MCRegister::from(ARM::CPSR), Ignore);
MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
TII->get(ARM::t2SUBri));
MIB.addDef(ARM::LR);
MIB.add(MI->getOperand(1));
MIB.add(MI->getOperand(2));
MIB.addImm(ARMCC::AL);
MIB.addReg(0);
if (SetFlags) {
MIB.addReg(ARM::CPSR);
MIB->getOperand(5).setIsDef(true);
} else
MIB.addReg(0);
MI->eraseFromParent();
llvm::RevertLoopDec(MI, TII, SetFlags);
return SetFlags;
}
@ -1376,28 +1346,11 @@ bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {
void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);
MachineBasicBlock *MBB = MI->getParent();
// Create cmp
if (!SkipCmp) {
MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
TII->get(ARM::t2CMPri));
MIB.addReg(ARM::LR);
MIB.addImm(0);
MIB.addImm(ARMCC::AL);
MIB.addReg(ARM::NoRegister);
}
MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
ARM::tBcc : ARM::t2Bcc;
// Create bne
MachineInstrBuilder MIB =
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
MIB.add(MI->getOperand(1)); // branch target
MIB.addImm(ARMCC::NE); // condition code
MIB.addReg(ARM::CPSR);
MI->eraseFromParent();
llvm::RevertLoopEnd(MI, TII, BrOpc, SkipCmp);
}
// Perform dead code elimation on the loop iteration count setup expression.

View File

@ -0,0 +1,157 @@
//===-- MVETailPredUtils.h - Tail predication utility functions -*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains utility functions for low overhead and tail predicated
// loops, shared between the ARMLowOverheadLoops pass and anywhere else that
// needs them.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
#define LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
namespace llvm {
static inline unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop) {
switch (Opcode) {
default:
llvm_unreachable("unhandled vctp opcode");
break;
case ARM::MVE_VCTP8:
return IsDoLoop ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8;
case ARM::MVE_VCTP16:
return IsDoLoop ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16;
case ARM::MVE_VCTP32:
return IsDoLoop ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32;
case ARM::MVE_VCTP64:
return IsDoLoop ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64;
}
return 0;
}
static inline unsigned getTailPredVectorWidth(unsigned Opcode) {
switch (Opcode) {
default:
llvm_unreachable("unhandled vctp opcode");
case ARM::MVE_VCTP8:
return 16;
case ARM::MVE_VCTP16:
return 8;
case ARM::MVE_VCTP32:
return 4;
case ARM::MVE_VCTP64:
return 2;
}
return 0;
}
static inline bool isVCTP(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default:
break;
case ARM::MVE_VCTP8:
case ARM::MVE_VCTP16:
case ARM::MVE_VCTP32:
case ARM::MVE_VCTP64:
return true;
}
return false;
}
static inline bool isLoopStart(MachineInstr &MI) {
return MI.getOpcode() == ARM::t2DoLoopStart ||
MI.getOpcode() == ARM::t2DoLoopStartTP ||
MI.getOpcode() == ARM::t2WhileLoopStart;
}
// WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
// beq that branches to the exit branch.
inline void RevertWhileLoopStart(MachineInstr *MI, const TargetInstrInfo *TII,
unsigned BrOpc = ARM::t2Bcc) {
MachineBasicBlock *MBB = MI->getParent();
// Cmp
MachineInstrBuilder MIB =
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri));
MIB.add(MI->getOperand(0));
MIB.addImm(0);
MIB.addImm(ARMCC::AL);
MIB.addReg(ARM::NoRegister);
// Branch
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
MIB.add(MI->getOperand(1)); // branch target
MIB.addImm(ARMCC::EQ); // condition code
MIB.addReg(ARM::CPSR);
MI->eraseFromParent();
}
inline void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII) {
MachineBasicBlock *MBB = MI->getParent();
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::tMOVr))
.add(MI->getOperand(0))
.add(MI->getOperand(1))
.add(predOps(ARMCC::AL));
MI->eraseFromParent();
}
inline void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII,
bool SetFlags = false) {
MachineBasicBlock *MBB = MI->getParent();
MachineInstrBuilder MIB =
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
MIB.add(MI->getOperand(0));
MIB.add(MI->getOperand(1));
MIB.add(MI->getOperand(2));
MIB.addImm(ARMCC::AL);
MIB.addReg(0);
if (SetFlags) {
MIB.addReg(ARM::CPSR);
MIB->getOperand(5).setIsDef(true);
} else
MIB.addReg(0);
MI->eraseFromParent();
}
// Generate a subs, or sub and cmp, and a branch instead of an LE.
inline void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII,
unsigned BrOpc = ARM::t2Bcc, bool SkipCmp = false) {
MachineBasicBlock *MBB = MI->getParent();
// Create cmp
if (!SkipCmp) {
MachineInstrBuilder MIB =
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2CMPri));
MIB.add(MI->getOperand(0));
MIB.addImm(0);
MIB.addImm(ARMCC::AL);
MIB.addReg(ARM::NoRegister);
}
// Create bne
MachineInstrBuilder MIB =
BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
MIB.add(MI->getOperand(1)); // branch target
MIB.addImm(ARMCC::NE); // condition code
MIB.addReg(ARM::CPSR);
MI->eraseFromParent();
}
} // end namespace llvm
#endif // LLVM_LIB_TARGET_ARM_MVETAILPREDUTILS_H

View File

@ -18,6 +18,7 @@
#include "ARM.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MVETailPredUtils.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@ -58,6 +59,7 @@ public:
}
private:
bool RevertLoopWithCall(MachineLoop *ML);
bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
MachineInstr &Instr,
@ -156,6 +158,31 @@ static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
return true;
}
bool MVEVPTOptimisations::RevertLoopWithCall(MachineLoop *ML) {
LLVM_DEBUG(dbgs() << "RevertLoopWithCall on loop " << ML->getHeader()->getName()
<< "\n");
MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
return false;
// Check if there is an illegal instruction (a call) in the low overhead loop
// and if so revert it now before we get any further.
for (MachineBasicBlock *MBB : ML->blocks()) {
for (MachineInstr &MI : *MBB) {
if (MI.isCall()) {
LLVM_DEBUG(dbgs() << "Found call in loop, reverting: " << MI);
RevertDoLoopStart(LoopStart, TII);
RevertLoopDec(LoopDec, TII);
RevertLoopEnd(LoopEnd, TII);
return true;
}
}
}
return false;
}
// Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
// instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
// instruction, making the backend ARMLowOverheadLoops passes job of finding the
@ -662,7 +689,7 @@ bool MVEVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
const ARMSubtarget &STI =
static_cast<const ARMSubtarget &>(Fn.getSubtarget());
if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
if (!STI.isThumb2() || !STI.hasLOB())
return false;
TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
@ -674,8 +701,10 @@ bool MVEVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
<< "********** Function: " << Fn.getName() << '\n');
bool Modified = false;
for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder())
for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
Modified |= RevertLoopWithCall(ML);
Modified |= ConvertTailPredLoop(ML, DT);
}
for (MachineBasicBlock &MBB : Fn) {
Modified |= ReplaceVCMPsByVPNOTs(MBB);

View File

@ -272,7 +272,7 @@ body: |
; CHECK: renamable $r6, renamable $r11 = t2SMLAL renamable $r9, killed renamable $r0, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg
; CHECK: early-clobber renamable $r6, dead early-clobber renamable $r11 = MVE_ASRLr killed renamable $r6, killed renamable $r11, renamable $r2, 14 /* CC::al */, $noreg
; CHECK: early-clobber renamable $r12 = t2STR_POST renamable $r6, killed renamable $r12, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.i39)
; CHECK: dead $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK: dead renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK: renamable $r8 = t2SUBri killed renamable $r8, 1, 14 /* CC::al */, $noreg, $noreg
; CHECK: $r0 = tMOVr $r7, 14 /* CC::al */, $noreg
; CHECK: $r4 = tMOVr $r5, 14 /* CC::al */, $noreg

View File

@ -258,7 +258,7 @@ body: |
; CHECK: renamable $r4 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load 4 from %stack.2)
; CHECK: renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg
; CHECK: renamable $r9, renamable $r1 = t2LDR_POST killed renamable $r1, 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.i38)
; CHECK: dead $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK: dead renamable $lr = t2SUBri killed renamable $lr, 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK: renamable $r6, renamable $r11 = t2SMLAL killed renamable $r8, killed renamable $r4, killed renamable $r6, killed renamable $r11, 14 /* CC::al */, $noreg
; CHECK: renamable $r4 = tLDRspi $sp, 6, 14 /* CC::al */, $noreg :: (load 4 from %stack.3)
; CHECK: $r8 = tMOVr $r5, 14 /* CC::al */, $noreg

View File

@ -296,7 +296,7 @@ body: |
; CHECK: renamable $r4, dead $cpsr = tEOR killed renamable $r4, killed renamable $r5, 14 /* CC::al */, $noreg
; CHECK: renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r11, 14 /* CC::al */, $noreg
; CHECK: tSTRi killed renamable $r4, killed renamable $r6, 3, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep1)
; CHECK: t2CMPri killed $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2CMPri killed renamable $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: tBcc %bb.4, 1 /* CC::ne */, killed $cpsr
; CHECK: tB %bb.5, 14 /* CC::al */, $noreg
; CHECK: bb.5.bb13:

View File

@ -8,7 +8,7 @@
# CHECK: tBcc %bb.4, 0
# CHECK: tB %bb.2
# CHECK: bb.3.while.body:
# CHECK: t2CMPri $lr, 0, 14
# CHECK: t2CMPri renamable $lr, 0, 14
# CHECK: tBcc %bb.3, 1
# CHECK: tB %bb.4
# CHECK: bb.4.while.end:

View File

@ -0,0 +1,145 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+lob -run-pass=arm-mve-vpt-opts --verify-machineinstrs %s -o - | FileCheck %s
--- |
@d = local_unnamed_addr global i32 0, align 4
@c = local_unnamed_addr global [1 x i32] zeroinitializer, align 4
define i32 @e() optsize {
entry:
%.pr = load i32, i32* @d, align 4
%cmp13 = icmp sgt i32 %.pr, -1
br i1 %cmp13, label %for.cond1.preheader.preheader, label %for.end9
for.cond1.preheader.preheader: ; preds = %entry
%0 = add i32 %.pr, 1
%1 = call i32 @llvm.start.loop.iterations.i32(i32 %0)
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.cond1.preheader
%2 = phi i32 [ %1, %for.cond1.preheader.preheader ], [ %3, %for.cond1.preheader ]
call void @llvm.memset.p0i8.i32(i8* nonnull align 4 dereferenceable(24) bitcast ([1 x i32]* @c to i8*), i8 0, i32 24, i1 false)
%3 = call i32 @llvm.loop.decrement.reg.i32(i32 %2, i32 1)
%4 = icmp ne i32 %3, 0
br i1 %4, label %for.cond1.preheader, label %for.cond.for.end9_crit_edge
for.cond.for.end9_crit_edge: ; preds = %for.cond1.preheader
store i32 -1, i32* @d, align 4
br label %for.end9
for.end9: ; preds = %for.cond.for.end9_crit_edge, %entry
ret i32 undef
}
declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
...
---
name: e
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers:
- { id: 0, class: gprnopc, preferred-register: '' }
- { id: 1, class: gpr, preferred-register: '' }
- { id: 2, class: gprlr, preferred-register: '' }
- { id: 3, class: gpr, preferred-register: '' }
- { id: 4, class: rgpr, preferred-register: '' }
- { id: 5, class: rgpr, preferred-register: '' }
- { id: 6, class: gprlr, preferred-register: '' }
- { id: 7, class: rgpr, preferred-register: '' }
- { id: 8, class: rgpr, preferred-register: '' }
- { id: 9, class: gprlr, preferred-register: '' }
- { id: 10, class: gprlr, preferred-register: '' }
- { id: 11, class: rgpr, preferred-register: '' }
- { id: 12, class: rgpr, preferred-register: '' }
- { id: 13, class: gpr, preferred-register: '' }
liveins: []
body: |
; CHECK-LABEL: name: e
; CHECK: bb.0.entry:
; CHECK: successors: %bb.1(0x50000000), %bb.4(0x30000000)
; CHECK: [[t2MOVi32imm:%[0-9]+]]:rgpr = t2MOVi32imm @d
; CHECK: [[t2LDRi12_:%[0-9]+]]:gprnopc = t2LDRi12 [[t2MOVi32imm]], 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d)
; CHECK: t2CMPri [[t2LDRi12_]], 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2Bcc %bb.4, 4 /* CC::mi */, $cpsr
; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg
; CHECK: bb.1.for.cond1.preheader.preheader:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[t2LDRi12_]], 1, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[tMOVr:%[0-9]+]]:gprlr = tMOVr killed [[t2ADDri]], 14 /* CC::al */, $noreg
; CHECK: [[COPY:%[0-9]+]]:gpr = COPY [[tMOVr]]
; CHECK: [[t2MOVi32imm1:%[0-9]+]]:rgpr = t2MOVi32imm @c
; CHECK: [[t2MOVi:%[0-9]+]]:rgpr = t2MOVi 24, 14 /* CC::al */, $noreg, $noreg
; CHECK: bb.2.for.cond1.preheader:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: [[PHI:%[0-9]+]]:gprlr = PHI [[COPY]], %bb.1, %3, %bb.2
; CHECK: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK: $r0 = COPY [[t2MOVi32imm1]]
; CHECK: $r1 = COPY [[t2MOVi]]
; CHECK: tBL 14 /* CC::al */, $noreg, &__aeabi_memclr4, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp
; CHECK: ADJCALLSTACKUP 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK: [[t2SUBri:%[0-9]+]]:gprlr = t2SUBri [[PHI]], 1, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY [[t2SUBri]]
; CHECK: t2CMPri [[t2SUBri]], 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2Bcc %bb.2, 1 /* CC::ne */, $cpsr
; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg
; CHECK: bb.3.for.cond.for.end9_crit_edge:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: [[t2MOVi1:%[0-9]+]]:rgpr = t2MOVi -1, 14 /* CC::al */, $noreg, $noreg
; CHECK: t2STRi12 killed [[t2MOVi1]], [[t2MOVi32imm]], 0, 14 /* CC::al */, $noreg :: (store 4 into @d)
; CHECK: bb.4.for.end9:
; CHECK: [[DEF:%[0-9]+]]:gpr = IMPLICIT_DEF
; CHECK: $r0 = COPY [[DEF]]
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
bb.0.entry:
successors: %bb.1(0x50000000), %bb.4(0x30000000)
%4:rgpr = t2MOVi32imm @d
%0:gprnopc = t2LDRi12 %4, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from @d)
t2CMPri %0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2Bcc %bb.4, 4 /* CC::mi */, $cpsr
t2B %bb.1, 14 /* CC::al */, $noreg
bb.1.for.cond1.preheader.preheader:
successors: %bb.2(0x80000000)
%5:rgpr = t2ADDri %0, 1, 14 /* CC::al */, $noreg, $noreg
%6:gprlr = t2DoLoopStart killed %5
%1:gpr = COPY %6
%7:rgpr = t2MOVi32imm @c
%8:rgpr = t2MOVi 24, 14 /* CC::al */, $noreg, $noreg
bb.2.for.cond1.preheader:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
%2:gprlr = PHI %1, %bb.1, %3, %bb.2
ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
$r0 = COPY %7
$r1 = COPY %8
tBL 14 /* CC::al */, $noreg, &__aeabi_memclr4, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit $r1, implicit-def $sp
ADJCALLSTACKUP 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
%9:gprlr = t2LoopDec %2, 1
%3:gpr = COPY %9
t2LoopEnd %9, %bb.2, implicit-def dead $cpsr
t2B %bb.3, 14 /* CC::al */, $noreg
bb.3.for.cond.for.end9_crit_edge:
successors: %bb.4(0x80000000)
%12:rgpr = t2MOVi -1, 14 /* CC::al */, $noreg, $noreg
t2STRi12 killed %12, %4, 0, 14 /* CC::al */, $noreg :: (store 4 into @d)
bb.4.for.end9:
%13:gpr = IMPLICIT_DEF
$r0 = COPY %13
tBX_RET 14 /* CC::al */, $noreg, implicit $r0
...

View File

@ -113,7 +113,7 @@ body: |
; CHECK: renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2 /* CC::hs */, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate
; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14 /* CC::al */, $noreg :: (store 4 into %ir.scevgep4)
; CHECK: renamable $lr = tMOVr killed $lr, 14 /* CC::al */, $noreg
; CHECK: t2CMPri $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2CMPri renamable $lr, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: tBcc %bb.1, 1 /* CC::ne */, killed $cpsr
; CHECK: tB %bb.2, 14 /* CC::al */, $noreg
; CHECK: bb.2.while.end: