[ARM] Ensure CountReg definition dominates InsertPt when creating t2DoLoopStartTP

Of course there was something missing, in this case a check that the def
of the count register we are adding to a t2DoLoopStartTP would dominate
the insertion point.

In the future, when we remove some of these COPY's in between, the
t2DoLoopStartTP will always become the last instruction in the block,
preventing this from happening. In the meantime we need to check they
are created in a sensible order.

Differential Revision: https://reviews.llvm.org/D91287
This commit is contained in:
David Green 2020-11-12 13:47:46 +00:00
parent ec63dfe368
commit 11dee2eae2
2 changed files with 219 additions and 0 deletions

View File

@ -230,6 +230,11 @@ bool MVEVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) || if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
!DT->dominates(ML->getHeader(), Use.getParent())) !DT->dominates(ML->getHeader(), Use.getParent()))
InsertPt = &Use; InsertPt = &Use;
if (InsertPt != MBB->end() &&
!DT->dominates(MRI->getVRegDef(CountReg), &*InsertPt)) {
LLVM_DEBUG(dbgs() << " InsertPt does not dominate CountReg!\n");
return false;
}
MachineInstrBuilder MI = BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), MachineInstrBuilder MI = BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(),
TII->get(ARM::t2DoLoopStartTP)) TII->get(ARM::t2DoLoopStartTP))

View File

@ -0,0 +1,214 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+lob -run-pass=arm-mve-vpt-opts %s -verify-machineinstrs -o - | FileCheck %s
--- |
define i32 @test(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) {
entry:
%cmp10 = icmp sgt i32 %n, 0
%0 = add i32 %n, 7
%1 = lshr i32 %0, 3
%2 = shl nuw i32 %1, 3
%3 = add i32 %2, -8
%4 = lshr i32 %3, 3
%5 = add nuw nsw i32 %4, 1
br i1 %cmp10, label %vector.ph, label %for.cond.cleanup
vector.ph: ; preds = %entry
%6 = call i32 @llvm.start.loop.iterations.i32(i32 %5)
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv3 = phi i16* [ %scevgep4, %vector.body ], [ %x, %vector.ph ]
%lsr.iv1 = phi i16* [ %scevgep, %vector.body ], [ %y, %vector.ph ]
%vec.phi = phi i32 [ 0, %vector.ph ], [ %16, %vector.body ]
%7 = phi i32 [ %6, %vector.ph ], [ %17, %vector.body ]
%8 = phi i32 [ %n, %vector.ph ], [ %10, %vector.body ]
%lsr.iv12 = bitcast i16* %lsr.iv1 to <8 x i16>*
%lsr.iv35 = bitcast i16* %lsr.iv3 to <8 x i16>*
%9 = call <8 x i1> @llvm.arm.mve.vctp16(i32 %8)
%10 = sub i32 %8, 8
%wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv35, i32 2, <8 x i1> %9, <8 x i16> undef)
%11 = sext <8 x i16> %wide.masked.load to <8 x i32>
%wide.masked.load13 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %lsr.iv12, i32 2, <8 x i1> %9, <8 x i16> undef)
%12 = sext <8 x i16> %wide.masked.load13 to <8 x i32>
%13 = mul nsw <8 x i32> %12, %11
%14 = select <8 x i1> %9, <8 x i32> %13, <8 x i32> zeroinitializer
%15 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %14)
%16 = add i32 %15, %vec.phi
%scevgep = getelementptr i16, i16* %lsr.iv1, i32 8
%scevgep4 = getelementptr i16, i16* %lsr.iv3, i32 8
%17 = call i32 @llvm.loop.decrement.reg.i32(i32 %7, i32 1)
%18 = icmp ne i32 %17, 0
br i1 %18, label %vector.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %vector.body, %entry
%s.0.lcssa = phi i32 [ 0, %entry ], [ %16, %vector.body ]
ret i32 %s.0.lcssa
}
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
declare i32 @llvm.start.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
declare <8 x i1> @llvm.arm.mve.vctp16(i32)
...
---
name: test
alignment: 2
tracksRegLiveness: true
registers:
- { id: 0, class: rgpr, preferred-register: '' }
- { id: 1, class: gpr, preferred-register: '' }
- { id: 2, class: gprnopc, preferred-register: '' }
- { id: 3, class: gprnopc, preferred-register: '' }
- { id: 4, class: tgpreven, preferred-register: '' }
- { id: 5, class: gprlr, preferred-register: '' }
- { id: 6, class: rgpr, preferred-register: '' }
- { id: 7, class: gpr, preferred-register: '' }
- { id: 8, class: gpr, preferred-register: '' }
- { id: 9, class: gpr, preferred-register: '' }
- { id: 10, class: gpr, preferred-register: '' }
- { id: 11, class: gpr, preferred-register: '' }
- { id: 12, class: gpr, preferred-register: '' }
- { id: 13, class: gpr, preferred-register: '' }
- { id: 14, class: gpr, preferred-register: '' }
- { id: 15, class: gprnopc, preferred-register: '' }
- { id: 16, class: gpr, preferred-register: '' }
- { id: 17, class: rgpr, preferred-register: '' }
- { id: 18, class: rgpr, preferred-register: '' }
- { id: 19, class: rgpr, preferred-register: '' }
- { id: 20, class: rgpr, preferred-register: '' }
- { id: 21, class: gprnopc, preferred-register: '' }
- { id: 22, class: rgpr, preferred-register: '' }
- { id: 23, class: gpr, preferred-register: '' }
- { id: 24, class: gprlr, preferred-register: '' }
- { id: 25, class: rgpr, preferred-register: '' }
- { id: 26, class: vccr, preferred-register: '' }
- { id: 27, class: rgpr, preferred-register: '' }
- { id: 28, class: rgpr, preferred-register: '' }
- { id: 29, class: mqpr, preferred-register: '' }
- { id: 30, class: rgpr, preferred-register: '' }
- { id: 31, class: mqpr, preferred-register: '' }
- { id: 32, class: tgpreven, preferred-register: '' }
- { id: 33, class: gprlr, preferred-register: '' }
- { id: 34, class: gprlr, preferred-register: '' }
- { id: 35, class: gprnopc, preferred-register: '' }
liveins:
- { reg: '$r0', virtual-reg: '%13' }
- { reg: '$r1', virtual-reg: '%14' }
- { reg: '$r2', virtual-reg: '%15' }
body: |
; CHECK-LABEL: name: test
; CHECK: bb.0.entry:
; CHECK: successors: %bb.2(0x50000000), %bb.1(0x30000000)
; CHECK: liveins: $r0, $r1, $r2
; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r2
; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1
; CHECK: [[COPY2:%[0-9]+]]:gpr = COPY $r0
; CHECK: t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2Bcc %bb.2, 10 /* CC::ge */, $cpsr
; CHECK: bb.1:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: [[t2MOVi:%[0-9]+]]:rgpr = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY [[t2MOVi]]
; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg
; CHECK: bb.2.vector.ph:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 7, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[t2BICri:%[0-9]+]]:rgpr = t2BICri [[t2ADDri]], 7, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[t2BICri]], 8, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[t2MOVi1:%[0-9]+]]:rgpr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[t2ADDrs:%[0-9]+]]:gprnopc = nuw nsw t2ADDrs [[t2MOVi1]], [[t2SUBri]], 27, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[COPY4:%[0-9]+]]:rgpr = COPY [[t2ADDrs]]
; CHECK: [[t2DoLoopStart:%[0-9]+]]:gprlr = t2DoLoopStart [[COPY4]]
; CHECK: [[t2MOVi2:%[0-9]+]]:rgpr = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[COPY5:%[0-9]+]]:gpr = COPY [[t2MOVi2]]
; CHECK: [[COPY6:%[0-9]+]]:gpr = COPY [[t2DoLoopStart]]
; CHECK: [[COPY7:%[0-9]+]]:gprnopc = COPY [[COPY]]
; CHECK: bb.3.vector.body:
; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000)
; CHECK: [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY2]], %bb.2, %10, %bb.3
; CHECK: [[PHI1:%[0-9]+]]:gprnopc = PHI [[COPY1]], %bb.2, %9, %bb.3
; CHECK: [[PHI2:%[0-9]+]]:tgpreven = PHI [[COPY5]], %bb.2, %8, %bb.3
; CHECK: [[PHI3:%[0-9]+]]:gprlr = PHI [[COPY6]], %bb.2, %11, %bb.3
; CHECK: [[PHI4:%[0-9]+]]:rgpr = PHI [[COPY7]], %bb.2, %7, %bb.3
; CHECK: [[MVE_VCTP16_:%[0-9]+]]:vccr = MVE_VCTP16 [[PHI4]], 0, $noreg
; CHECK: [[t2SUBri1:%[0-9]+]]:rgpr = t2SUBri [[PHI4]], 8, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[COPY8:%[0-9]+]]:gpr = COPY [[t2SUBri1]]
; CHECK: [[MVE_VLDRHU16_post:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_post1:%[0-9]+]]:mqpr = MVE_VLDRHU16_post [[PHI]], 16, 1, [[MVE_VCTP16_]] :: (load 16 from %ir.lsr.iv35, align 2)
; CHECK: [[MVE_VLDRHU16_post2:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_post3:%[0-9]+]]:mqpr = MVE_VLDRHU16_post [[PHI1]], 16, 1, [[MVE_VCTP16_]] :: (load 16 from %ir.lsr.iv12, align 2)
; CHECK: [[MVE_VMLADAVas16_:%[0-9]+]]:tgpreven = MVE_VMLADAVas16 [[PHI2]], killed [[MVE_VLDRHU16_post3]], killed [[MVE_VLDRHU16_post1]], 1, [[MVE_VCTP16_]]
; CHECK: [[COPY9:%[0-9]+]]:gpr = COPY [[MVE_VMLADAVas16_]]
; CHECK: [[COPY10:%[0-9]+]]:gpr = COPY [[MVE_VLDRHU16_post2]]
; CHECK: [[COPY11:%[0-9]+]]:gpr = COPY [[MVE_VLDRHU16_post]]
; CHECK: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[PHI3]], 1
; CHECK: [[COPY12:%[0-9]+]]:gpr = COPY [[t2LoopDec]]
; CHECK: t2LoopEnd [[t2LoopDec]], %bb.3, implicit-def dead $cpsr
; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg
; CHECK: bb.4.for.cond.cleanup:
; CHECK: [[PHI5:%[0-9]+]]:gpr = PHI [[COPY3]], %bb.1, [[COPY9]], %bb.3
; CHECK: $r0 = COPY [[PHI5]]
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
bb.0.entry:
successors: %bb.1(0x50000000), %bb.4(0x30000000)
liveins: $r0, $r1, $r2
%15:gprnopc = COPY $r2
%14:gpr = COPY $r1
%13:gpr = COPY $r0
t2CMPri %15, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2Bcc %bb.1, 10 /* CC::ge */, $cpsr
bb.4:
successors: %bb.3(0x80000000)
%22:rgpr = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
%16:gpr = COPY %22
t2B %bb.3, 14 /* CC::al */, $noreg
bb.1.vector.ph:
successors: %bb.2(0x80000000)
%17:rgpr = t2ADDri %15, 7, 14 /* CC::al */, $noreg, $noreg
%18:rgpr = t2BICri %17, 7, 14 /* CC::al */, $noreg, $noreg
%19:rgpr = t2SUBri %18, 8, 14 /* CC::al */, $noreg, $noreg
%20:rgpr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
%21:gprnopc = nuw nsw t2ADDrs %20, %19, 27, 14 /* CC::al */, $noreg, $noreg
%0:rgpr = COPY %21
%24:gprlr = t2DoLoopStart %0
%25:rgpr = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
%23:gpr = COPY %25
%1:gpr = COPY %24
%35:gprnopc = COPY %15
bb.2.vector.body:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
%2:gprnopc = PHI %13, %bb.1, %10, %bb.2
%3:gprnopc = PHI %14, %bb.1, %9, %bb.2
%4:tgpreven = PHI %23, %bb.1, %8, %bb.2
%5:gprlr = PHI %1, %bb.1, %11, %bb.2
%6:rgpr = PHI %35, %bb.1, %7, %bb.2
%26:vccr = MVE_VCTP16 %6, 0, $noreg
%27:rgpr = t2SUBri %6, 8, 14 /* CC::al */, $noreg, $noreg
%7:gpr = COPY %27
%28:rgpr, %29:mqpr = MVE_VLDRHU16_post %2, 16, 1, %26 :: (load 16 from %ir.lsr.iv35, align 2)
%30:rgpr, %31:mqpr = MVE_VLDRHU16_post %3, 16, 1, %26 :: (load 16 from %ir.lsr.iv12, align 2)
%32:tgpreven = MVE_VMLADAVas16 %4, killed %31, killed %29, 1, %26
%8:gpr = COPY %32
%9:gpr = COPY %30
%10:gpr = COPY %28
%33:gprlr = t2LoopDec %5, 1
%11:gpr = COPY %33
t2LoopEnd %33, %bb.2, implicit-def dead $cpsr
t2B %bb.3, 14 /* CC::al */, $noreg
bb.3.for.cond.cleanup:
%12:gpr = PHI %16, %bb.4, %8, %bb.2
$r0 = COPY %12
tBX_RET 14 /* CC::al */, $noreg, implicit $r0
...