forked from OSchip/llvm-project
[ARM] Distribute post-inc for Thumb2 sign/zero extending loads/stores
This adds sign/zero extending scalar loads/stores to the MVE instructions added in D77813, allowing us to create up more post-inc instructions. These are comparatively simple, compared to LDR/STR (which may be better turned into an LDRD/LDM), but still require some additions over MVE instructions. Because there are i12 and i8 variants of the offset loads/stores dealing with different signs, we may need to convert an i12 address to a i8 negative instruction. t2LDRBi12 can also be shrunk to a tLDRi under the right conditions, so we need to be careful with codesize too. Differential Revision: https://reviews.llvm.org/D78625
This commit is contained in:
parent
eb41f9edde
commit
fd69df62ed
|
@ -829,6 +829,10 @@ inline bool isLegalAddressImm(unsigned Opcode, int Imm,
|
|||
return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0;
|
||||
case ARMII::AddrModeT2_i7s4:
|
||||
return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0;
|
||||
case ARMII::AddrModeT2_i8:
|
||||
return std::abs(Imm) < (((1 << 8) * 1) - 1);
|
||||
case ARMII::AddrModeT2_i12:
|
||||
return Imm >= 0 && Imm < (((1 << 12) * 1) - 1);
|
||||
default:
|
||||
llvm_unreachable("Unhandled Addressing mode");
|
||||
}
|
||||
|
|
|
@ -1382,9 +1382,27 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
|
|||
case ARM::t2LDRi8:
|
||||
case ARM::t2LDRi12:
|
||||
return ARM::t2LDR_POST;
|
||||
case ARM::t2LDRBi8:
|
||||
case ARM::t2LDRBi12:
|
||||
return ARM::t2LDRB_POST;
|
||||
case ARM::t2LDRSBi8:
|
||||
case ARM::t2LDRSBi12:
|
||||
return ARM::t2LDRSB_POST;
|
||||
case ARM::t2LDRHi8:
|
||||
case ARM::t2LDRHi12:
|
||||
return ARM::t2LDRH_POST;
|
||||
case ARM::t2LDRSHi8:
|
||||
case ARM::t2LDRSHi12:
|
||||
return ARM::t2LDRSH_POST;
|
||||
case ARM::t2STRi8:
|
||||
case ARM::t2STRi12:
|
||||
return ARM::t2STR_POST;
|
||||
case ARM::t2STRBi8:
|
||||
case ARM::t2STRBi12:
|
||||
return ARM::t2STRB_POST;
|
||||
case ARM::t2STRHi8:
|
||||
case ARM::t2STRHi12:
|
||||
return ARM::t2STRH_POST;
|
||||
|
||||
case ARM::MVE_VLDRBS16:
|
||||
return ARM::MVE_VLDRBS16_post;
|
||||
|
@ -2539,11 +2557,94 @@ static int getBaseOperandIndex(MachineInstr &MI) {
|
|||
case ARM::MVE_VSTRBU8:
|
||||
case ARM::MVE_VSTRHU16:
|
||||
case ARM::MVE_VSTRWU32:
|
||||
case ARM::t2LDRHi8:
|
||||
case ARM::t2LDRHi12:
|
||||
case ARM::t2LDRSHi8:
|
||||
case ARM::t2LDRSHi12:
|
||||
case ARM::t2LDRBi8:
|
||||
case ARM::t2LDRBi12:
|
||||
case ARM::t2LDRSBi8:
|
||||
case ARM::t2LDRSBi12:
|
||||
case ARM::t2STRBi8:
|
||||
case ARM::t2STRBi12:
|
||||
case ARM::t2STRHi8:
|
||||
case ARM::t2STRHi12:
|
||||
return 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Given a memory access Opcode, check that the give Imm would be a valid Offset
|
||||
// for this instruction (same as isLegalAddressImm), Or if the instruction
|
||||
// could be easily converted to one where that was valid. For example converting
|
||||
// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
|
||||
// AdjustBaseAndOffset below.
|
||||
static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
|
||||
const TargetInstrInfo *TII,
|
||||
int &CodesizeEstimate) {
|
||||
if (isLegalAddressImm(Opcode, Imm, TII))
|
||||
return true;
|
||||
|
||||
// We can convert AddrModeT2_i12 to AddrModeT2_i8.
|
||||
const MCInstrDesc &Desc = TII->get(Opcode);
|
||||
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
|
||||
switch (AddrMode) {
|
||||
case ARMII::AddrModeT2_i12:
|
||||
CodesizeEstimate += 1;
|
||||
return std::abs(Imm) < (((1 << 8) * 1) - 1);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Given an MI adjust its address BaseReg to use NewBaseReg and address offset
|
||||
// by -Offset. This can either happen in-place or be a replacement as MI is
|
||||
// converted to another instruction type.
|
||||
static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg,
|
||||
int Offset, const TargetInstrInfo *TII) {
|
||||
unsigned BaseOp = getBaseOperandIndex(*MI);
|
||||
MI->getOperand(BaseOp).setReg(NewBaseReg);
|
||||
int OldOffset = MI->getOperand(BaseOp + 1).getImm();
|
||||
if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
|
||||
MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
|
||||
else {
|
||||
unsigned ConvOpcode;
|
||||
switch (MI->getOpcode()) {
|
||||
case ARM::t2LDRHi12:
|
||||
ConvOpcode = ARM::t2LDRHi8;
|
||||
break;
|
||||
case ARM::t2LDRSHi12:
|
||||
ConvOpcode = ARM::t2LDRSHi8;
|
||||
break;
|
||||
case ARM::t2LDRBi12:
|
||||
ConvOpcode = ARM::t2LDRBi8;
|
||||
break;
|
||||
case ARM::t2LDRSBi12:
|
||||
ConvOpcode = ARM::t2LDRSBi8;
|
||||
break;
|
||||
case ARM::t2STRHi12:
|
||||
ConvOpcode = ARM::t2STRHi8;
|
||||
break;
|
||||
case ARM::t2STRBi12:
|
||||
ConvOpcode = ARM::t2STRBi8;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unhandled convertable opcode");
|
||||
}
|
||||
assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
|
||||
"Illegal Address Immediate after convert!");
|
||||
|
||||
const MCInstrDesc &MCID = TII->get(ConvOpcode);
|
||||
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
|
||||
.add(MI->getOperand(0))
|
||||
.add(MI->getOperand(1))
|
||||
.addImm(OldOffset - Offset)
|
||||
.add(MI->getOperand(3))
|
||||
.add(MI->getOperand(4))
|
||||
.cloneMemRefs(*MI);
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
|
||||
Register NewReg,
|
||||
const TargetInstrInfo *TII,
|
||||
|
@ -2562,14 +2663,43 @@ static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
|
|||
TRC = TII->getRegClass(MCID, 2, TRI, *MF);
|
||||
MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
|
||||
|
||||
return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
|
||||
.addReg(NewReg, RegState::Define)
|
||||
.add(MI->getOperand(0))
|
||||
.add(MI->getOperand(1))
|
||||
.addImm(Offset)
|
||||
.add(MI->getOperand(3))
|
||||
.add(MI->getOperand(4))
|
||||
.cloneMemRefs(*MI);
|
||||
unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
|
||||
switch (AddrMode) {
|
||||
case ARMII::AddrModeT2_i7:
|
||||
case ARMII::AddrModeT2_i7s2:
|
||||
case ARMII::AddrModeT2_i7s4:
|
||||
// Any MVE load/store
|
||||
return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
|
||||
.addReg(NewReg, RegState::Define)
|
||||
.add(MI->getOperand(0))
|
||||
.add(MI->getOperand(1))
|
||||
.addImm(Offset)
|
||||
.add(MI->getOperand(3))
|
||||
.add(MI->getOperand(4))
|
||||
.cloneMemRefs(*MI);
|
||||
case ARMII::AddrModeT2_i8:
|
||||
if (MI->mayLoad()) {
|
||||
return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
|
||||
.add(MI->getOperand(0))
|
||||
.addReg(NewReg, RegState::Define)
|
||||
.add(MI->getOperand(1))
|
||||
.addImm(Offset)
|
||||
.add(MI->getOperand(3))
|
||||
.add(MI->getOperand(4))
|
||||
.cloneMemRefs(*MI);
|
||||
} else {
|
||||
return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
|
||||
.addReg(NewReg, RegState::Define)
|
||||
.add(MI->getOperand(0))
|
||||
.add(MI->getOperand(1))
|
||||
.addImm(Offset)
|
||||
.add(MI->getOperand(3))
|
||||
.add(MI->getOperand(4))
|
||||
.cloneMemRefs(*MI);
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("Unhandled createPostIncLoadStore");
|
||||
}
|
||||
}
|
||||
|
||||
// Given a Base Register, optimise the load/store uses to attempt to create more
|
||||
|
@ -2589,7 +2719,7 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
|
|||
// An increment that can be folded in
|
||||
MachineInstr *Increment = nullptr;
|
||||
// Other accesses after BaseAccess that will need to be updated to use the
|
||||
// postinc value
|
||||
// postinc value.
|
||||
SmallPtrSet<MachineInstr *, 8> OtherAccesses;
|
||||
for (auto &Use : MRI->use_nodbg_instructions(Base)) {
|
||||
if (!Increment && getAddSubImmediate(Use) != 0) {
|
||||
|
@ -2643,14 +2773,20 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
|
|||
// other offsets after the BaseAccess. We rely on either
|
||||
// dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
|
||||
// to keep things simple.
|
||||
// This also adds a simple codesize metric, to detect if an instruction (like
|
||||
// t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
|
||||
// cannot because it is converted to something else (t2LDRBi8). We start this
|
||||
// at -1 for the gain from removing the increment.
|
||||
SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
|
||||
int CodesizeEstimate = -1;
|
||||
for (auto *Use : OtherAccesses) {
|
||||
if (DT->dominates(BaseAccess, Use)) {
|
||||
SuccessorAccesses.insert(Use);
|
||||
unsigned BaseOp = getBaseOperandIndex(*Use);
|
||||
if (!isLegalAddressImm(
|
||||
Use->getOpcode(),
|
||||
Use->getOperand(BaseOp + 1).getImm() - IncrementOffset, TII)) {
|
||||
if (!isLegalOrConvertableAddressImm(Use->getOpcode(),
|
||||
Use->getOperand(BaseOp + 1).getImm() -
|
||||
IncrementOffset,
|
||||
TII, CodesizeEstimate)) {
|
||||
LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
|
||||
return false;
|
||||
}
|
||||
|
@ -2660,6 +2796,10 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
|
|||
return false;
|
||||
}
|
||||
}
|
||||
if (STI->hasMinSize() && CodesizeEstimate > 0) {
|
||||
LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Replace BaseAccess with a post inc
|
||||
LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
|
||||
|
@ -2674,10 +2814,7 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
|
|||
|
||||
for (auto *Use : SuccessorAccesses) {
|
||||
LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
|
||||
unsigned BaseOp = getBaseOperandIndex(*Use);
|
||||
Use->getOperand(BaseOp).setReg(NewBaseReg);
|
||||
int OldOffset = Use->getOperand(BaseOp + 1).getImm();
|
||||
Use->getOperand(BaseOp + 1).setImm(OldOffset - IncrementOffset);
|
||||
AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII);
|
||||
LLVM_DEBUG(dbgs() << " To : "; Use->dump());
|
||||
}
|
||||
|
||||
|
|
|
@ -1798,20 +1798,20 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
|
|||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrsh.w r4, [r3, #2]
|
||||
; CHECK-NEXT: vldr.16 s2, [r2, #2]
|
||||
; CHECK-NEXT: ldrsh r5, [r3, #-2]
|
||||
; CHECK-NEXT: add.w r12, r12, #4
|
||||
; CHECK-NEXT: vmov s4, r4
|
||||
; CHECK-NEXT: ldrsh.w r4, [r3]
|
||||
; CHECK-NEXT: ldrsh r4, [r3], #8
|
||||
; CHECK-NEXT: vcvt.f16.s32 s4, s4
|
||||
; CHECK-NEXT: vmov s8, r5
|
||||
; CHECK-NEXT: ldrsh r5, [r3, #-10]
|
||||
; CHECK-NEXT: vmul.f16 s2, s2, s4
|
||||
; CHECK-NEXT: vldr.16 s4, [r2]
|
||||
; CHECK-NEXT: vmov s6, r4
|
||||
; CHECK-NEXT: ldrsh r4, [r3, #-4]
|
||||
; CHECK-NEXT: vldr.16 s4, [r2]
|
||||
; CHECK-NEXT: vcvt.f16.s32 s6, s6
|
||||
; CHECK-NEXT: vcvt.f16.s32 s8, s8
|
||||
; CHECK-NEXT: ldrsh r4, [r3, #-12]
|
||||
; CHECK-NEXT: vmul.f16 s4, s4, s6
|
||||
; CHECK-NEXT: vmov s8, r5
|
||||
; CHECK-NEXT: vldr.16 s6, [r2, #-2]
|
||||
; CHECK-NEXT: vcvt.f16.s32 s8, s8
|
||||
; CHECK-NEXT: vmov s10, r4
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s4, s4
|
||||
; CHECK-NEXT: vmul.f16 s6, s6, s8
|
||||
|
@ -1821,9 +1821,8 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
|
|||
; CHECK-NEXT: vmul.f16 s8, s8, s10
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s2, s2
|
||||
; CHECK-NEXT: vcvtb.f32.f16 s8, s8
|
||||
; CHECK-NEXT: adds r3, #8
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s8
|
||||
; CHECK-NEXT: adds r2, #8
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s8
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s6
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s4
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s2
|
||||
|
|
|
@ -437,17 +437,15 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly
|
|||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #-8]
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-2]
|
||||
; CHECK-NEXT: ldrb r7, [r6]
|
||||
; CHECK-NEXT: ldrb r7, [r6], #4
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #-4]
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-1]
|
||||
; CHECK-NEXT: ldrb r7, [r6, #1]
|
||||
; CHECK-NEXT: ldrb r7, [r6, #-3]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4]
|
||||
; CHECK-NEXT: ldrb.w r8, [r5]
|
||||
; CHECK-NEXT: adds r5, #4
|
||||
; CHECK-NEXT: ldrb r7, [r6, #2]
|
||||
; CHECK-NEXT: adds r6, #4
|
||||
; CHECK-NEXT: ldrb r8, [r5], #4
|
||||
; CHECK-NEXT: ldrb r7, [r6, #-2]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #4]
|
||||
; CHECK-NEXT: adds r4, #16
|
||||
|
@ -740,17 +738,15 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl
|
|||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #-8]
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-2]
|
||||
; CHECK-NEXT: ldrb r7, [r6]
|
||||
; CHECK-NEXT: ldrb r7, [r6], #4
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #-4]
|
||||
; CHECK-NEXT: ldrb r8, [r5, #-1]
|
||||
; CHECK-NEXT: ldrb r7, [r6, #1]
|
||||
; CHECK-NEXT: ldrb r7, [r6, #-3]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4]
|
||||
; CHECK-NEXT: ldrb.w r8, [r5]
|
||||
; CHECK-NEXT: adds r5, #4
|
||||
; CHECK-NEXT: ldrb r7, [r6, #2]
|
||||
; CHECK-NEXT: adds r6, #4
|
||||
; CHECK-NEXT: ldrb r8, [r5], #4
|
||||
; CHECK-NEXT: ldrb r7, [r6, #-2]
|
||||
; CHECK-NEXT: smlabb r7, r7, r8, r2
|
||||
; CHECK-NEXT: str r7, [r4, #4]
|
||||
; CHECK-NEXT: adds r4, #16
|
||||
|
|
|
@ -1147,31 +1147,30 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
|
|||
; CHECK-NEXT: .LBB16_6: @ %for.body
|
||||
; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1
|
||||
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: ldrh r0, [r6]
|
||||
; CHECK-NEXT: ldrh r0, [r6], #16
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r5]
|
||||
; CHECK-NEXT: adds r1, r5, #2
|
||||
; CHECK-NEXT: vfma.f16 q0, q1, r0
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1]
|
||||
; CHECK-NEXT: ldrh r0, [r6, #2]
|
||||
; CHECK-NEXT: ldrh r0, [r6, #-14]
|
||||
; CHECK-NEXT: adds r1, r5, #6
|
||||
; CHECK-NEXT: vfma.f16 q0, q1, r0
|
||||
; CHECK-NEXT: ldrh r0, [r6, #4]
|
||||
; CHECK-NEXT: ldrh r0, [r6, #-12]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r5, #4]
|
||||
; CHECK-NEXT: vfma.f16 q0, q1, r0
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1]
|
||||
; CHECK-NEXT: ldrh r0, [r6, #6]
|
||||
; CHECK-NEXT: ldrh r0, [r6, #-10]
|
||||
; CHECK-NEXT: add.w r1, r5, #10
|
||||
; CHECK-NEXT: vfma.f16 q0, q1, r0
|
||||
; CHECK-NEXT: ldrh r0, [r6, #8]
|
||||
; CHECK-NEXT: ldrh r0, [r6, #-8]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r5, #8]
|
||||
; CHECK-NEXT: vfma.f16 q0, q1, r0
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r1]
|
||||
; CHECK-NEXT: ldrh r0, [r6, #10]
|
||||
; CHECK-NEXT: ldrh r1, [r6, #14]
|
||||
; CHECK-NEXT: ldrh r0, [r6, #-6]
|
||||
; CHECK-NEXT: ldrh r1, [r6, #-2]
|
||||
; CHECK-NEXT: vfma.f16 q0, q1, r0
|
||||
; CHECK-NEXT: ldrh r0, [r6, #12]
|
||||
; CHECK-NEXT: ldrh r0, [r6, #-4]
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r5, #12]
|
||||
; CHECK-NEXT: adds r6, #16
|
||||
; CHECK-NEXT: vfma.f16 q0, q1, r0
|
||||
; CHECK-NEXT: add.w r0, r5, #14
|
||||
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
||||
|
|
|
@ -106,14 +106,12 @@ define void @arm_cmplx_dot_prod_q15(i16* nocapture readonly %pSrcA, i16* nocaptu
|
|||
; CHECK-NEXT: wls lr, lr, .LBB1_7
|
||||
; CHECK-NEXT: .LBB1_5: @ %while.body11
|
||||
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldrsh.w r5, [r0, #2]
|
||||
; CHECK-NEXT: ldrsh.w r6, [r1]
|
||||
; CHECK-NEXT: ldrsh.w r9, [r0]
|
||||
; CHECK-NEXT: adds r0, #4
|
||||
; CHECK-NEXT: ldrsh.w r2, [r1, #2]
|
||||
; CHECK-NEXT: adds r1, #4
|
||||
; CHECK-NEXT: smlalbb r4, r11, r6, r5
|
||||
; CHECK-NEXT: ldrsh r9, [r0], #4
|
||||
; CHECK-NEXT: ldrsh r6, [r1], #4
|
||||
; CHECK-NEXT: ldrsh r5, [r0, #-2]
|
||||
; CHECK-NEXT: ldrsh r2, [r1, #-2]
|
||||
; CHECK-NEXT: smlalbb r12, r7, r6, r9
|
||||
; CHECK-NEXT: smlalbb r4, r11, r6, r5
|
||||
; CHECK-NEXT: muls r5, r2, r5
|
||||
; CHECK-NEXT: smlalbb r4, r11, r2, r9
|
||||
; CHECK-NEXT: subs.w r12, r12, r5
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
define i32* @t2STRBi12(i32* %x, i32 %y) { unreachable }
|
||||
|
||||
define i32* @storedadd(i32* %x, i32 %y) { unreachable }
|
||||
define i32* @minsize2(i32* %x, i32 %y) minsize optsize { unreachable }
|
||||
define i32* @minsize3(i32* %x, i32 %y) minsize optsize { unreachable }
|
||||
|
||||
...
|
||||
---
|
||||
|
@ -57,9 +59,8 @@ body: |
|
|||
; CHECK-LABEL: name: t2LDRHi12
|
||||
; CHECK: liveins: $r0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
|
||||
; CHECK: [[t2LDRHi12_:%[0-9]+]]:rgpr = t2LDRHi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4)
|
||||
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: $r0 = COPY [[t2ADDri]]
|
||||
; CHECK: [[t2LDRH_POST:%[0-9]+]]:rgpr, [[t2LDRH_POST1:%[0-9]+]]:rgpr = t2LDRH_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4)
|
||||
; CHECK: $r0 = COPY [[t2LDRH_POST1]]
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
|
||||
%0:gprnopc = COPY $r0
|
||||
%1:rgpr = t2LDRHi12 %0, 0, 14, $noreg :: (load 4, align 4)
|
||||
|
@ -84,9 +85,8 @@ body: |
|
|||
; CHECK-LABEL: name: t2LDRSHi12
|
||||
; CHECK: liveins: $r0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
|
||||
; CHECK: [[t2LDRSHi12_:%[0-9]+]]:rgpr = t2LDRSHi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4)
|
||||
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: $r0 = COPY [[t2ADDri]]
|
||||
; CHECK: [[t2LDRSH_POST:%[0-9]+]]:rgpr, [[t2LDRSH_POST1:%[0-9]+]]:rgpr = t2LDRSH_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4)
|
||||
; CHECK: $r0 = COPY [[t2LDRSH_POST1]]
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
|
||||
%0:gprnopc = COPY $r0
|
||||
%1:rgpr = t2LDRSHi12 %0, 0, 14, $noreg :: (load 4, align 4)
|
||||
|
@ -111,9 +111,8 @@ body: |
|
|||
; CHECK-LABEL: name: t2LDRBi12
|
||||
; CHECK: liveins: $r0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
|
||||
; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4)
|
||||
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: $r0 = COPY [[t2ADDri]]
|
||||
; CHECK: [[t2LDRB_POST:%[0-9]+]]:rgpr, [[t2LDRB_POST1:%[0-9]+]]:rgpr = t2LDRB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4)
|
||||
; CHECK: $r0 = COPY [[t2LDRB_POST1]]
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
|
||||
%0:gprnopc = COPY $r0
|
||||
%1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4)
|
||||
|
@ -138,9 +137,8 @@ body: |
|
|||
; CHECK-LABEL: name: t2LDRSBi12
|
||||
; CHECK: liveins: $r0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
|
||||
; CHECK: [[t2LDRSBi12_:%[0-9]+]]:rgpr = t2LDRSBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4)
|
||||
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: $r0 = COPY [[t2ADDri]]
|
||||
; CHECK: [[t2LDRSB_POST:%[0-9]+]]:rgpr, [[t2LDRSB_POST1:%[0-9]+]]:rgpr = t2LDRSB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4)
|
||||
; CHECK: $r0 = COPY [[t2LDRSB_POST1]]
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
|
||||
%0:gprnopc = COPY $r0
|
||||
%1:rgpr = t2LDRSBi12 %0, 0, 14, $noreg :: (load 4, align 4)
|
||||
|
@ -197,9 +195,8 @@ body: |
|
|||
; CHECK: liveins: $r0, $r1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1
|
||||
; CHECK: t2STRHi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 4)
|
||||
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: $r0 = COPY [[t2ADDri]]
|
||||
; CHECK: early-clobber %2:rgpr = t2STRH_POST [[COPY1]], [[COPY]], 32, 14 /* CC::al */, $noreg :: (store 4)
|
||||
; CHECK: $r0 = COPY %2
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
|
||||
%0:gprnopc = COPY $r0
|
||||
%1:rgpr = COPY $r1
|
||||
|
@ -227,9 +224,8 @@ body: |
|
|||
; CHECK: liveins: $r0, $r1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1
|
||||
; CHECK: t2STRBi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 4)
|
||||
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: $r0 = COPY [[t2ADDri]]
|
||||
; CHECK: early-clobber %2:rgpr = t2STRB_POST [[COPY1]], [[COPY]], 32, 14 /* CC::al */, $noreg :: (store 4)
|
||||
; CHECK: $r0 = COPY %2
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
|
||||
%0:gprnopc = COPY $r0
|
||||
%1:rgpr = COPY $r1
|
||||
|
@ -265,3 +261,65 @@ body: |
|
|||
tBX_RET 14, $noreg, implicit $r0
|
||||
|
||||
...
|
||||
---
|
||||
name: minsize2
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gprnopc, preferred-register: '' }
|
||||
- { id: 1, class: rgpr, preferred-register: '' }
|
||||
- { id: 2, class: rgpr, preferred-register: '' }
|
||||
- { id: 3, class: rgpr, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$r0', virtual-reg: '%0' }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $r0
|
||||
|
||||
; CHECK-LABEL: name: minsize2
|
||||
; CHECK: liveins: $r0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
|
||||
; CHECK: [[t2LDRB_POST:%[0-9]+]]:rgpr, [[t2LDRB_POST1:%[0-9]+]]:rgpr = t2LDRB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4)
|
||||
; CHECK: [[t2LDRBi8_:%[0-9]+]]:rgpr = t2LDRBi8 [[t2LDRB_POST1]], -30, 14 /* CC::al */, $noreg :: (load 4)
|
||||
; CHECK: $r0 = COPY [[t2LDRB_POST1]]
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
|
||||
%0:gprnopc = COPY $r0
|
||||
%1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4)
|
||||
%3:rgpr = t2LDRBi12 %0, 2, 14, $noreg :: (load 4, align 4)
|
||||
%2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg
|
||||
$r0 = COPY %2
|
||||
tBX_RET 14, $noreg, implicit $r0
|
||||
|
||||
...
|
||||
---
|
||||
name: minsize3
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gprnopc, preferred-register: '' }
|
||||
- { id: 1, class: rgpr, preferred-register: '' }
|
||||
- { id: 2, class: rgpr, preferred-register: '' }
|
||||
- { id: 3, class: rgpr, preferred-register: '' }
|
||||
- { id: 4, class: rgpr, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$r0', virtual-reg: '%0' }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $r0
|
||||
|
||||
; CHECK-LABEL: name: minsize3
|
||||
; CHECK: liveins: $r0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
|
||||
; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4)
|
||||
; CHECK: [[t2LDRBi12_1:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 2, 14 /* CC::al */, $noreg :: (load 4)
|
||||
; CHECK: [[t2LDRBi12_2:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 4, 14 /* CC::al */, $noreg :: (load 4)
|
||||
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: $r0 = COPY [[t2ADDri]]
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
|
||||
%0:gprnopc = COPY $r0
|
||||
%1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4)
|
||||
%3:rgpr = t2LDRBi12 %0, 2, 14, $noreg :: (load 4, align 4)
|
||||
%4:rgpr = t2LDRBi12 %0, 4, 14, $noreg :: (load 4, align 4)
|
||||
%2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg
|
||||
$r0 = COPY %2
|
||||
tBX_RET 14, $noreg, implicit $r0
|
||||
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue