[ARM] Distribute post-inc for Thumb2 sign/zero extending loads/stores

This adds sign/zero extending scalar loads/stores to the MVE
instructions added in D77813, allowing us to create up more post-inc
instructions. These are comparatively simple, compared to LDR/STR (which
may be better turned into an LDRD/LDM), but still require some additions
over MVE instructions. Because there are i12 and i8 variants of the
offset loads/stores dealing with different signs, we may need to convert
an i12 address to a i8 negative instruction. t2LDRBi12 can also be
shrunk to a tLDRi under the right conditions, so we need to be careful
with codesize too.

Differential Revision: https://reviews.llvm.org/D78625
This commit is contained in:
David Green 2020-08-01 14:01:18 +01:00
parent eb41f9edde
commit fd69df62ed
7 changed files with 261 additions and 70 deletions

View File

@ -829,6 +829,10 @@ inline bool isLegalAddressImm(unsigned Opcode, int Imm,
return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0; return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0;
case ARMII::AddrModeT2_i7s4: case ARMII::AddrModeT2_i7s4:
return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0; return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0;
case ARMII::AddrModeT2_i8:
return std::abs(Imm) < (((1 << 8) * 1) - 1);
case ARMII::AddrModeT2_i12:
return Imm >= 0 && Imm < (((1 << 12) * 1) - 1);
default: default:
llvm_unreachable("Unhandled Addressing mode"); llvm_unreachable("Unhandled Addressing mode");
} }

View File

@ -1382,9 +1382,27 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
case ARM::t2LDRi8: case ARM::t2LDRi8:
case ARM::t2LDRi12: case ARM::t2LDRi12:
return ARM::t2LDR_POST; return ARM::t2LDR_POST;
case ARM::t2LDRBi8:
case ARM::t2LDRBi12:
return ARM::t2LDRB_POST;
case ARM::t2LDRSBi8:
case ARM::t2LDRSBi12:
return ARM::t2LDRSB_POST;
case ARM::t2LDRHi8:
case ARM::t2LDRHi12:
return ARM::t2LDRH_POST;
case ARM::t2LDRSHi8:
case ARM::t2LDRSHi12:
return ARM::t2LDRSH_POST;
case ARM::t2STRi8: case ARM::t2STRi8:
case ARM::t2STRi12: case ARM::t2STRi12:
return ARM::t2STR_POST; return ARM::t2STR_POST;
case ARM::t2STRBi8:
case ARM::t2STRBi12:
return ARM::t2STRB_POST;
case ARM::t2STRHi8:
case ARM::t2STRHi12:
return ARM::t2STRH_POST;
case ARM::MVE_VLDRBS16: case ARM::MVE_VLDRBS16:
return ARM::MVE_VLDRBS16_post; return ARM::MVE_VLDRBS16_post;
@ -2539,11 +2557,94 @@ static int getBaseOperandIndex(MachineInstr &MI) {
case ARM::MVE_VSTRBU8: case ARM::MVE_VSTRBU8:
case ARM::MVE_VSTRHU16: case ARM::MVE_VSTRHU16:
case ARM::MVE_VSTRWU32: case ARM::MVE_VSTRWU32:
case ARM::t2LDRHi8:
case ARM::t2LDRHi12:
case ARM::t2LDRSHi8:
case ARM::t2LDRSHi12:
case ARM::t2LDRBi8:
case ARM::t2LDRBi12:
case ARM::t2LDRSBi8:
case ARM::t2LDRSBi12:
case ARM::t2STRBi8:
case ARM::t2STRBi12:
case ARM::t2STRHi8:
case ARM::t2STRHi12:
return 1; return 1;
} }
return -1; return -1;
} }
// Given a memory access Opcode, check that the give Imm would be a valid Offset
// for this instruction (same as isLegalAddressImm), Or if the instruction
// could be easily converted to one where that was valid. For example converting
// t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
// AdjustBaseAndOffset below.
static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
const TargetInstrInfo *TII,
int &CodesizeEstimate) {
if (isLegalAddressImm(Opcode, Imm, TII))
return true;
// We can convert AddrModeT2_i12 to AddrModeT2_i8.
const MCInstrDesc &Desc = TII->get(Opcode);
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
switch (AddrMode) {
case ARMII::AddrModeT2_i12:
CodesizeEstimate += 1;
return std::abs(Imm) < (((1 << 8) * 1) - 1);
}
return false;
}
// Given an MI adjust its address BaseReg to use NewBaseReg and address offset
// by -Offset. This can either happen in-place or be a replacement as MI is
// converted to another instruction type.
static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg,
int Offset, const TargetInstrInfo *TII) {
unsigned BaseOp = getBaseOperandIndex(*MI);
MI->getOperand(BaseOp).setReg(NewBaseReg);
int OldOffset = MI->getOperand(BaseOp + 1).getImm();
if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
else {
unsigned ConvOpcode;
switch (MI->getOpcode()) {
case ARM::t2LDRHi12:
ConvOpcode = ARM::t2LDRHi8;
break;
case ARM::t2LDRSHi12:
ConvOpcode = ARM::t2LDRSHi8;
break;
case ARM::t2LDRBi12:
ConvOpcode = ARM::t2LDRBi8;
break;
case ARM::t2LDRSBi12:
ConvOpcode = ARM::t2LDRSBi8;
break;
case ARM::t2STRHi12:
ConvOpcode = ARM::t2STRHi8;
break;
case ARM::t2STRBi12:
ConvOpcode = ARM::t2STRBi8;
break;
default:
llvm_unreachable("Unhandled convertable opcode");
}
assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
"Illegal Address Immediate after convert!");
const MCInstrDesc &MCID = TII->get(ConvOpcode);
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
.add(MI->getOperand(0))
.add(MI->getOperand(1))
.addImm(OldOffset - Offset)
.add(MI->getOperand(3))
.add(MI->getOperand(4))
.cloneMemRefs(*MI);
MI->eraseFromParent();
}
}
static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset, static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
Register NewReg, Register NewReg,
const TargetInstrInfo *TII, const TargetInstrInfo *TII,
@ -2562,14 +2663,43 @@ static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
TRC = TII->getRegClass(MCID, 2, TRI, *MF); TRC = TII->getRegClass(MCID, 2, TRI, *MF);
MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC); MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID) unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
.addReg(NewReg, RegState::Define) switch (AddrMode) {
.add(MI->getOperand(0)) case ARMII::AddrModeT2_i7:
.add(MI->getOperand(1)) case ARMII::AddrModeT2_i7s2:
.addImm(Offset) case ARMII::AddrModeT2_i7s4:
.add(MI->getOperand(3)) // Any MVE load/store
.add(MI->getOperand(4)) return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
.cloneMemRefs(*MI); .addReg(NewReg, RegState::Define)
.add(MI->getOperand(0))
.add(MI->getOperand(1))
.addImm(Offset)
.add(MI->getOperand(3))
.add(MI->getOperand(4))
.cloneMemRefs(*MI);
case ARMII::AddrModeT2_i8:
if (MI->mayLoad()) {
return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
.add(MI->getOperand(0))
.addReg(NewReg, RegState::Define)
.add(MI->getOperand(1))
.addImm(Offset)
.add(MI->getOperand(3))
.add(MI->getOperand(4))
.cloneMemRefs(*MI);
} else {
return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
.addReg(NewReg, RegState::Define)
.add(MI->getOperand(0))
.add(MI->getOperand(1))
.addImm(Offset)
.add(MI->getOperand(3))
.add(MI->getOperand(4))
.cloneMemRefs(*MI);
}
default:
llvm_unreachable("Unhandled createPostIncLoadStore");
}
} }
// Given a Base Register, optimise the load/store uses to attempt to create more // Given a Base Register, optimise the load/store uses to attempt to create more
@ -2589,7 +2719,7 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
// An increment that can be folded in // An increment that can be folded in
MachineInstr *Increment = nullptr; MachineInstr *Increment = nullptr;
// Other accesses after BaseAccess that will need to be updated to use the // Other accesses after BaseAccess that will need to be updated to use the
// postinc value // postinc value.
SmallPtrSet<MachineInstr *, 8> OtherAccesses; SmallPtrSet<MachineInstr *, 8> OtherAccesses;
for (auto &Use : MRI->use_nodbg_instructions(Base)) { for (auto &Use : MRI->use_nodbg_instructions(Base)) {
if (!Increment && getAddSubImmediate(Use) != 0) { if (!Increment && getAddSubImmediate(Use) != 0) {
@ -2643,14 +2773,20 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
// other offsets after the BaseAccess. We rely on either // other offsets after the BaseAccess. We rely on either
// dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess) // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
// to keep things simple. // to keep things simple.
// This also adds a simple codesize metric, to detect if an instruction (like
// t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
// cannot because it is converted to something else (t2LDRBi8). We start this
// at -1 for the gain from removing the increment.
SmallPtrSet<MachineInstr *, 4> SuccessorAccesses; SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
int CodesizeEstimate = -1;
for (auto *Use : OtherAccesses) { for (auto *Use : OtherAccesses) {
if (DT->dominates(BaseAccess, Use)) { if (DT->dominates(BaseAccess, Use)) {
SuccessorAccesses.insert(Use); SuccessorAccesses.insert(Use);
unsigned BaseOp = getBaseOperandIndex(*Use); unsigned BaseOp = getBaseOperandIndex(*Use);
if (!isLegalAddressImm( if (!isLegalOrConvertableAddressImm(Use->getOpcode(),
Use->getOpcode(), Use->getOperand(BaseOp + 1).getImm() -
Use->getOperand(BaseOp + 1).getImm() - IncrementOffset, TII)) { IncrementOffset,
TII, CodesizeEstimate)) {
LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n"); LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
return false; return false;
} }
@ -2660,6 +2796,10 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
return false; return false;
} }
} }
if (STI->hasMinSize() && CodesizeEstimate > 0) {
LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
return false;
}
// Replace BaseAccess with a post inc // Replace BaseAccess with a post inc
LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump()); LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
@ -2674,10 +2814,7 @@ bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
for (auto *Use : SuccessorAccesses) { for (auto *Use : SuccessorAccesses) {
LLVM_DEBUG(dbgs() << "Changing: "; Use->dump()); LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
unsigned BaseOp = getBaseOperandIndex(*Use); AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII);
Use->getOperand(BaseOp).setReg(NewBaseReg);
int OldOffset = Use->getOperand(BaseOp + 1).getImm();
Use->getOperand(BaseOp + 1).setImm(OldOffset - IncrementOffset);
LLVM_DEBUG(dbgs() << " To : "; Use->dump()); LLVM_DEBUG(dbgs() << " To : "; Use->dump());
} }

View File

@ -1798,20 +1798,20 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh.w r4, [r3, #2] ; CHECK-NEXT: ldrsh.w r4, [r3, #2]
; CHECK-NEXT: vldr.16 s2, [r2, #2] ; CHECK-NEXT: vldr.16 s2, [r2, #2]
; CHECK-NEXT: ldrsh r5, [r3, #-2]
; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: add.w r12, r12, #4
; CHECK-NEXT: vmov s4, r4 ; CHECK-NEXT: vmov s4, r4
; CHECK-NEXT: ldrsh.w r4, [r3] ; CHECK-NEXT: ldrsh r4, [r3], #8
; CHECK-NEXT: vcvt.f16.s32 s4, s4 ; CHECK-NEXT: vcvt.f16.s32 s4, s4
; CHECK-NEXT: vmov s8, r5 ; CHECK-NEXT: ldrsh r5, [r3, #-10]
; CHECK-NEXT: vmul.f16 s2, s2, s4 ; CHECK-NEXT: vmul.f16 s2, s2, s4
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vmov s6, r4 ; CHECK-NEXT: vmov s6, r4
; CHECK-NEXT: ldrsh r4, [r3, #-4] ; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vcvt.f16.s32 s6, s6 ; CHECK-NEXT: vcvt.f16.s32 s6, s6
; CHECK-NEXT: vcvt.f16.s32 s8, s8 ; CHECK-NEXT: ldrsh r4, [r3, #-12]
; CHECK-NEXT: vmul.f16 s4, s4, s6 ; CHECK-NEXT: vmul.f16 s4, s4, s6
; CHECK-NEXT: vmov s8, r5
; CHECK-NEXT: vldr.16 s6, [r2, #-2] ; CHECK-NEXT: vldr.16 s6, [r2, #-2]
; CHECK-NEXT: vcvt.f16.s32 s8, s8
; CHECK-NEXT: vmov s10, r4 ; CHECK-NEXT: vmov s10, r4
; CHECK-NEXT: vcvtb.f32.f16 s4, s4 ; CHECK-NEXT: vcvtb.f32.f16 s4, s4
; CHECK-NEXT: vmul.f16 s6, s6, s8 ; CHECK-NEXT: vmul.f16 s6, s6, s8
@ -1821,9 +1821,8 @@ define arm_aapcs_vfpcc float @half_short_mac(half* nocapture readonly %a, i16* n
; CHECK-NEXT: vmul.f16 s8, s8, s10 ; CHECK-NEXT: vmul.f16 s8, s8, s10
; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-NEXT: vcvtb.f32.f16 s8, s8 ; CHECK-NEXT: vcvtb.f32.f16 s8, s8
; CHECK-NEXT: adds r3, #8
; CHECK-NEXT: vadd.f32 s0, s0, s8
; CHECK-NEXT: adds r2, #8 ; CHECK-NEXT: adds r2, #8
; CHECK-NEXT: vadd.f32 s0, s0, s8
; CHECK-NEXT: vadd.f32 s0, s0, s6 ; CHECK-NEXT: vadd.f32 s0, s0, s6
; CHECK-NEXT: vadd.f32 s0, s0, s4 ; CHECK-NEXT: vadd.f32 s0, s0, s4
; CHECK-NEXT: vadd.f32 s0, s0, s2 ; CHECK-NEXT: vadd.f32 s0, s0, s2

View File

@ -437,17 +437,15 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly
; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4, #-8] ; CHECK-NEXT: str r7, [r4, #-8]
; CHECK-NEXT: ldrb r8, [r5, #-2] ; CHECK-NEXT: ldrb r8, [r5, #-2]
; CHECK-NEXT: ldrb r7, [r6] ; CHECK-NEXT: ldrb r7, [r6], #4
; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4, #-4] ; CHECK-NEXT: str r7, [r4, #-4]
; CHECK-NEXT: ldrb r8, [r5, #-1] ; CHECK-NEXT: ldrb r8, [r5, #-1]
; CHECK-NEXT: ldrb r7, [r6, #1] ; CHECK-NEXT: ldrb r7, [r6, #-3]
; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4] ; CHECK-NEXT: str r7, [r4]
; CHECK-NEXT: ldrb.w r8, [r5] ; CHECK-NEXT: ldrb r8, [r5], #4
; CHECK-NEXT: adds r5, #4 ; CHECK-NEXT: ldrb r7, [r6, #-2]
; CHECK-NEXT: ldrb r7, [r6, #2]
; CHECK-NEXT: adds r6, #4
; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4, #4] ; CHECK-NEXT: str r7, [r4, #4]
; CHECK-NEXT: adds r4, #16 ; CHECK-NEXT: adds r4, #16
@ -740,17 +738,15 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl
; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4, #-8] ; CHECK-NEXT: str r7, [r4, #-8]
; CHECK-NEXT: ldrb r8, [r5, #-2] ; CHECK-NEXT: ldrb r8, [r5, #-2]
; CHECK-NEXT: ldrb r7, [r6] ; CHECK-NEXT: ldrb r7, [r6], #4
; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4, #-4] ; CHECK-NEXT: str r7, [r4, #-4]
; CHECK-NEXT: ldrb r8, [r5, #-1] ; CHECK-NEXT: ldrb r8, [r5, #-1]
; CHECK-NEXT: ldrb r7, [r6, #1] ; CHECK-NEXT: ldrb r7, [r6, #-3]
; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4] ; CHECK-NEXT: str r7, [r4]
; CHECK-NEXT: ldrb.w r8, [r5] ; CHECK-NEXT: ldrb r8, [r5], #4
; CHECK-NEXT: adds r5, #4 ; CHECK-NEXT: ldrb r7, [r6, #-2]
; CHECK-NEXT: ldrb r7, [r6, #2]
; CHECK-NEXT: adds r6, #4
; CHECK-NEXT: smlabb r7, r7, r8, r2 ; CHECK-NEXT: smlabb r7, r7, r8, r2
; CHECK-NEXT: str r7, [r4, #4] ; CHECK-NEXT: str r7, [r4, #4]
; CHECK-NEXT: adds r4, #16 ; CHECK-NEXT: adds r4, #16

View File

@ -1147,31 +1147,30 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
; CHECK-NEXT: .LBB16_6: @ %for.body ; CHECK-NEXT: .LBB16_6: @ %for.body
; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1 ; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldrh r0, [r6] ; CHECK-NEXT: ldrh r0, [r6], #16
; CHECK-NEXT: vldrw.u32 q1, [r5] ; CHECK-NEXT: vldrw.u32 q1, [r5]
; CHECK-NEXT: adds r1, r5, #2 ; CHECK-NEXT: adds r1, r5, #2
; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: vldrw.u32 q1, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r1]
; CHECK-NEXT: ldrh r0, [r6, #2] ; CHECK-NEXT: ldrh r0, [r6, #-14]
; CHECK-NEXT: adds r1, r5, #6 ; CHECK-NEXT: adds r1, r5, #6
; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: ldrh r0, [r6, #4] ; CHECK-NEXT: ldrh r0, [r6, #-12]
; CHECK-NEXT: vldrw.u32 q1, [r5, #4] ; CHECK-NEXT: vldrw.u32 q1, [r5, #4]
; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: vldrw.u32 q1, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r1]
; CHECK-NEXT: ldrh r0, [r6, #6] ; CHECK-NEXT: ldrh r0, [r6, #-10]
; CHECK-NEXT: add.w r1, r5, #10 ; CHECK-NEXT: add.w r1, r5, #10
; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: ldrh r0, [r6, #8] ; CHECK-NEXT: ldrh r0, [r6, #-8]
; CHECK-NEXT: vldrw.u32 q1, [r5, #8] ; CHECK-NEXT: vldrw.u32 q1, [r5, #8]
; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: vldrw.u32 q1, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r1]
; CHECK-NEXT: ldrh r0, [r6, #10] ; CHECK-NEXT: ldrh r0, [r6, #-6]
; CHECK-NEXT: ldrh r1, [r6, #14] ; CHECK-NEXT: ldrh r1, [r6, #-2]
; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: ldrh r0, [r6, #12] ; CHECK-NEXT: ldrh r0, [r6, #-4]
; CHECK-NEXT: vldrw.u32 q1, [r5, #12] ; CHECK-NEXT: vldrw.u32 q1, [r5, #12]
; CHECK-NEXT: adds r6, #16
; CHECK-NEXT: vfma.f16 q0, q1, r0 ; CHECK-NEXT: vfma.f16 q0, q1, r0
; CHECK-NEXT: add.w r0, r5, #14 ; CHECK-NEXT: add.w r0, r5, #14
; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vldrw.u32 q1, [r0]

View File

@ -106,14 +106,12 @@ define void @arm_cmplx_dot_prod_q15(i16* nocapture readonly %pSrcA, i16* nocaptu
; CHECK-NEXT: wls lr, lr, .LBB1_7 ; CHECK-NEXT: wls lr, lr, .LBB1_7
; CHECK-NEXT: .LBB1_5: @ %while.body11 ; CHECK-NEXT: .LBB1_5: @ %while.body11
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrsh.w r5, [r0, #2] ; CHECK-NEXT: ldrsh r9, [r0], #4
; CHECK-NEXT: ldrsh.w r6, [r1] ; CHECK-NEXT: ldrsh r6, [r1], #4
; CHECK-NEXT: ldrsh.w r9, [r0] ; CHECK-NEXT: ldrsh r5, [r0, #-2]
; CHECK-NEXT: adds r0, #4 ; CHECK-NEXT: ldrsh r2, [r1, #-2]
; CHECK-NEXT: ldrsh.w r2, [r1, #2]
; CHECK-NEXT: adds r1, #4
; CHECK-NEXT: smlalbb r4, r11, r6, r5
; CHECK-NEXT: smlalbb r12, r7, r6, r9 ; CHECK-NEXT: smlalbb r12, r7, r6, r9
; CHECK-NEXT: smlalbb r4, r11, r6, r5
; CHECK-NEXT: muls r5, r2, r5 ; CHECK-NEXT: muls r5, r2, r5
; CHECK-NEXT: smlalbb r4, r11, r2, r9 ; CHECK-NEXT: smlalbb r4, r11, r2, r9
; CHECK-NEXT: subs.w r12, r12, r5 ; CHECK-NEXT: subs.w r12, r12, r5

View File

@ -12,6 +12,8 @@
define i32* @t2STRBi12(i32* %x, i32 %y) { unreachable } define i32* @t2STRBi12(i32* %x, i32 %y) { unreachable }
define i32* @storedadd(i32* %x, i32 %y) { unreachable } define i32* @storedadd(i32* %x, i32 %y) { unreachable }
define i32* @minsize2(i32* %x, i32 %y) minsize optsize { unreachable }
define i32* @minsize3(i32* %x, i32 %y) minsize optsize { unreachable }
... ...
--- ---
@ -57,9 +59,8 @@ body: |
; CHECK-LABEL: name: t2LDRHi12 ; CHECK-LABEL: name: t2LDRHi12
; CHECK: liveins: $r0 ; CHECK: liveins: $r0
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
; CHECK: [[t2LDRHi12_:%[0-9]+]]:rgpr = t2LDRHi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) ; CHECK: [[t2LDRH_POST:%[0-9]+]]:rgpr, [[t2LDRH_POST1:%[0-9]+]]:rgpr = t2LDRH_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4)
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r0 = COPY [[t2LDRH_POST1]]
; CHECK: $r0 = COPY [[t2ADDri]]
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
%0:gprnopc = COPY $r0 %0:gprnopc = COPY $r0
%1:rgpr = t2LDRHi12 %0, 0, 14, $noreg :: (load 4, align 4) %1:rgpr = t2LDRHi12 %0, 0, 14, $noreg :: (load 4, align 4)
@ -84,9 +85,8 @@ body: |
; CHECK-LABEL: name: t2LDRSHi12 ; CHECK-LABEL: name: t2LDRSHi12
; CHECK: liveins: $r0 ; CHECK: liveins: $r0
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
; CHECK: [[t2LDRSHi12_:%[0-9]+]]:rgpr = t2LDRSHi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) ; CHECK: [[t2LDRSH_POST:%[0-9]+]]:rgpr, [[t2LDRSH_POST1:%[0-9]+]]:rgpr = t2LDRSH_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4)
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r0 = COPY [[t2LDRSH_POST1]]
; CHECK: $r0 = COPY [[t2ADDri]]
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
%0:gprnopc = COPY $r0 %0:gprnopc = COPY $r0
%1:rgpr = t2LDRSHi12 %0, 0, 14, $noreg :: (load 4, align 4) %1:rgpr = t2LDRSHi12 %0, 0, 14, $noreg :: (load 4, align 4)
@ -111,9 +111,8 @@ body: |
; CHECK-LABEL: name: t2LDRBi12 ; CHECK-LABEL: name: t2LDRBi12
; CHECK: liveins: $r0 ; CHECK: liveins: $r0
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) ; CHECK: [[t2LDRB_POST:%[0-9]+]]:rgpr, [[t2LDRB_POST1:%[0-9]+]]:rgpr = t2LDRB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4)
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r0 = COPY [[t2LDRB_POST1]]
; CHECK: $r0 = COPY [[t2ADDri]]
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
%0:gprnopc = COPY $r0 %0:gprnopc = COPY $r0
%1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4) %1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4)
@ -138,9 +137,8 @@ body: |
; CHECK-LABEL: name: t2LDRSBi12 ; CHECK-LABEL: name: t2LDRSBi12
; CHECK: liveins: $r0 ; CHECK: liveins: $r0
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
; CHECK: [[t2LDRSBi12_:%[0-9]+]]:rgpr = t2LDRSBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4) ; CHECK: [[t2LDRSB_POST:%[0-9]+]]:rgpr, [[t2LDRSB_POST1:%[0-9]+]]:rgpr = t2LDRSB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4)
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r0 = COPY [[t2LDRSB_POST1]]
; CHECK: $r0 = COPY [[t2ADDri]]
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
%0:gprnopc = COPY $r0 %0:gprnopc = COPY $r0
%1:rgpr = t2LDRSBi12 %0, 0, 14, $noreg :: (load 4, align 4) %1:rgpr = t2LDRSBi12 %0, 0, 14, $noreg :: (load 4, align 4)
@ -197,9 +195,8 @@ body: |
; CHECK: liveins: $r0, $r1 ; CHECK: liveins: $r0, $r1
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1
; CHECK: t2STRHi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 4) ; CHECK: early-clobber %2:rgpr = t2STRH_POST [[COPY1]], [[COPY]], 32, 14 /* CC::al */, $noreg :: (store 4)
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r0 = COPY %2
; CHECK: $r0 = COPY [[t2ADDri]]
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
%0:gprnopc = COPY $r0 %0:gprnopc = COPY $r0
%1:rgpr = COPY $r1 %1:rgpr = COPY $r1
@ -227,9 +224,8 @@ body: |
; CHECK: liveins: $r0, $r1 ; CHECK: liveins: $r0, $r1
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1
; CHECK: t2STRBi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store 4) ; CHECK: early-clobber %2:rgpr = t2STRB_POST [[COPY1]], [[COPY]], 32, 14 /* CC::al */, $noreg :: (store 4)
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r0 = COPY %2
; CHECK: $r0 = COPY [[t2ADDri]]
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
%0:gprnopc = COPY $r0 %0:gprnopc = COPY $r0
%1:rgpr = COPY $r1 %1:rgpr = COPY $r1
@ -265,3 +261,65 @@ body: |
tBX_RET 14, $noreg, implicit $r0 tBX_RET 14, $noreg, implicit $r0
... ...
---
name: minsize2
tracksRegLiveness: true
registers:
- { id: 0, class: gprnopc, preferred-register: '' }
- { id: 1, class: rgpr, preferred-register: '' }
- { id: 2, class: rgpr, preferred-register: '' }
- { id: 3, class: rgpr, preferred-register: '' }
liveins:
- { reg: '$r0', virtual-reg: '%0' }
body: |
bb.0:
liveins: $r0
; CHECK-LABEL: name: minsize2
; CHECK: liveins: $r0
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
; CHECK: [[t2LDRB_POST:%[0-9]+]]:rgpr, [[t2LDRB_POST1:%[0-9]+]]:rgpr = t2LDRB_POST [[COPY]], 32, 14 /* CC::al */, $noreg :: (load 4)
; CHECK: [[t2LDRBi8_:%[0-9]+]]:rgpr = t2LDRBi8 [[t2LDRB_POST1]], -30, 14 /* CC::al */, $noreg :: (load 4)
; CHECK: $r0 = COPY [[t2LDRB_POST1]]
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
%0:gprnopc = COPY $r0
%1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4)
%3:rgpr = t2LDRBi12 %0, 2, 14, $noreg :: (load 4, align 4)
%2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg
$r0 = COPY %2
tBX_RET 14, $noreg, implicit $r0
...
---
name: minsize3
tracksRegLiveness: true
registers:
- { id: 0, class: gprnopc, preferred-register: '' }
- { id: 1, class: rgpr, preferred-register: '' }
- { id: 2, class: rgpr, preferred-register: '' }
- { id: 3, class: rgpr, preferred-register: '' }
- { id: 4, class: rgpr, preferred-register: '' }
liveins:
- { reg: '$r0', virtual-reg: '%0' }
body: |
bb.0:
liveins: $r0
; CHECK-LABEL: name: minsize3
; CHECK: liveins: $r0
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load 4)
; CHECK: [[t2LDRBi12_1:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 2, 14 /* CC::al */, $noreg :: (load 4)
; CHECK: [[t2LDRBi12_2:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 4, 14 /* CC::al */, $noreg :: (load 4)
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
; CHECK: $r0 = COPY [[t2ADDri]]
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
%0:gprnopc = COPY $r0
%1:rgpr = t2LDRBi12 %0, 0, 14, $noreg :: (load 4, align 4)
%3:rgpr = t2LDRBi12 %0, 2, 14, $noreg :: (load 4, align 4)
%4:rgpr = t2LDRBi12 %0, 4, 14, $noreg :: (load 4, align 4)
%2:rgpr = nuw t2ADDri %0, 32, 14, $noreg, $noreg
$r0 = COPY %2
tBX_RET 14, $noreg, implicit $r0
...