llvm-project/llvm/test/CodeGen/Thumb2/mve-vpt-2-blocks.mir

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s

--- |
  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
  target triple = "thumbv8.1m.main-none-none-eabi"

  define hidden arm_aapcs_vfpcc <4 x float> @vpt_2_blocks(<4 x float> %inactive1, <4 x float> %inactive2, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 {
  entry:
    %conv.i = zext i16 %p to i32
    %0 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %a, <4 x float> %b, i32 %conv.i) #2
    %1 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %0, <4 x float> %0, i32 %conv.i) #2
    %2 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive1, <4 x float> %1, <4 x float> %b, i32 %conv.i) #2
    %3 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %2, <4 x float> %b, i32 %conv.i) #2
    %4 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive2, <4 x float> %3, <4 x float> %b, i32 %conv.i) #2
    ret <4 x float> %4
  }

  declare <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float>, <4 x float>, <4 x float>, i32) #1

  attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "frame-pointer"="none" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { nounwind readnone }
  attributes #2 = { nounwind }

...
---
name:            vpt_2_blocks
alignment:       4
exposesReturnsTwice: false
legalized:       false
regBankSelected: false
selected:        false
failedISel:      false
tracksRegLiveness: true
hasWinCFI:       false
registers:       []
liveins:
  - { reg: '$q0', virtual-reg: '' }
  - { reg: '$q1', virtual-reg: '' }
  - { reg: '$q2', virtual-reg: '' }
  - { reg: '$q3', virtual-reg: '' }
  - { reg: '$r0', virtual-reg: '' }
frameInfo:
  isFrameAddressTaken: false
  isReturnAddressTaken: false
  hasStackMap:     false
  hasPatchPoint:   false
  stackSize:       0
  offsetAdjustment: 0
  maxAlignment:    0
  adjustsStack:    false
  hasCalls:        false
  stackProtector:  ''
  maxCallFrameSize: 0
  cvBytesOfCalleeSavedRegisters: 0
  hasOpaqueSPAdjustment: false
  hasVAStart:      false
  hasMustTailInVarArgFunc: false
  localFrameSize:  0
  savePoint:       ''
  restorePoint:    ''
fixedStack:      []
stack:           []
constants:       []
body:             |
  bb.0.entry:
    liveins: $q0, $q1, $q2, $q3, $r0

    ; CHECK-LABEL: name: vpt_2_blocks
    ; CHECK: liveins: $q0, $q1, $q2, $q3, $r0
    ; CHECK: $vpr = VMSR_P0 killed $r0, 14 /* CC::al */, $noreg
    ; CHECK: BUNDLE implicit-def dead $q2, implicit-def $d4, implicit-def $s8, implicit-def $s9, implicit-def $d5, implicit-def $s10, implicit-def $s11, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $s1, implicit-def $d1, implicit-def $s2, implicit-def $s3, implicit $vpr, implicit killed $q2, implicit $q3, implicit killed $q0 {
    ; CHECK:   MVE_VPST 1, implicit $vpr
    ; CHECK:   renamable $q2 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q2, renamable $q3, 1, renamable $vpr, $noreg, undef renamable $q2
    ; CHECK:   renamable $q2 = nnan ninf nsz MVE_VMINNMf32 internal killed renamable $q2, internal renamable $q2, 1, renamable $vpr, $noreg, internal undef renamable $q2
    ; CHECK:   renamable $q0 = nnan ninf nsz MVE_VMINNMf32 internal killed renamable $q2, renamable $q3, 1, renamable $vpr, $noreg, killed renamable $q0
    ; CHECK:   renamable $q0 = nnan ninf nsz MVE_VMINNMf32 internal killed renamable $q0, renamable $q3, 1, renamable $vpr, $noreg, internal undef renamable $q0
    ; CHECK: }
    ; CHECK: BUNDLE implicit-def $q1, implicit-def $d2, implicit-def $s4, implicit-def $s5, implicit-def $d3, implicit-def $s6, implicit-def $s7, implicit killed $vpr, implicit killed $q0, implicit killed $q3, implicit killed $q1 {
    ; CHECK:   MVE_VPST 8, implicit $vpr
    ; CHECK:   renamable $q1 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q0, killed renamable $q3, 1, killed renamable $vpr, $noreg, killed renamable $q1
    ; CHECK: }
    ; CHECK: $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, $noreg, undef $q0
    ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0
    $vpr = VMSR_P0 killed $r0, 14, $noreg
    renamable $q2 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q2, renamable $q3, 1, renamable $vpr, $noreg, undef renamable $q2
    renamable $q2 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q2, renamable $q2, 1, renamable $vpr, $noreg, undef renamable $q2
    renamable $q0 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q2, renamable $q3, 1, renamable $vpr, $noreg, killed renamable $q0
    renamable $q0 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q0, renamable $q3, 1, renamable $vpr, $noreg, undef renamable $q0
    renamable $q1 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q0, killed renamable $q3, 1, killed renamable $vpr, $noreg, killed renamable $q1
    $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, $noreg, undef $q0
    tBX_RET 14, $noreg, implicit $q0

...
[ARM] Masked load and store and predicate tests. NFC llvm-svn: 370325 2019-08-29 18:32:12 +08:00			`# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py`
[ARM] MVE VPT Blocks A minor iteration on the MVE VPT Block pass to enable more efficient VPT Block code generation: consecutive VPT predicated statements, predicated on the same condition, will be placed within the same VPT Block. This essentially is also an exercise to write some more tests for the next step, which should be more generic also merging instructions when they are not consecutive. Differential Revision: https://reviews.llvm.org/D63711 llvm-svn: 364298 2019-06-25 20:04:31 +08:00			`# RUN: llc -run-pass arm-mve-vpt %s -o - \| FileCheck %s`

			`--- \|`
			`target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"`
[ARM] Replace arm vendor with none. NFC 2020-04-22 23:33:11 +08:00			`target triple = "thumbv8.1m.main-none-none-eabi"`
[ARM] MVE VPT Blocks A minor iteration on the MVE VPT Block pass to enable more efficient VPT Block code generation: consecutive VPT predicated statements, predicated on the same condition, will be placed within the same VPT Block. This essentially is also an exercise to write some more tests for the next step, which should be more generic also merging instructions when they are not consecutive. Differential Revision: https://reviews.llvm.org/D63711 llvm-svn: 364298 2019-06-25 20:04:31 +08:00
[ARM][MVE] Renamed VPT Block tests and files to something more informative. NFC 2020-01-07 23:57:19 +08:00			`define hidden arm_aapcs_vfpcc <4 x float> @vpt_2_blocks(<4 x float> %inactive1, <4 x float> %inactive2, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 {`
[ARM] MVE VPT Blocks A minor iteration on the MVE VPT Block pass to enable more efficient VPT Block code generation: consecutive VPT predicated statements, predicated on the same condition, will be placed within the same VPT Block. This essentially is also an exercise to write some more tests for the next step, which should be more generic also merging instructions when they are not consecutive. Differential Revision: https://reviews.llvm.org/D63711 llvm-svn: 364298 2019-06-25 20:04:31 +08:00			`entry:`
			`%conv.i = zext i16 %p to i32`
			`%0 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %a, <4 x float> %b, i32 %conv.i) #2`
			`%1 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %0, <4 x float> %0, i32 %conv.i) #2`
			`%2 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive1, <4 x float> %1, <4 x float> %b, i32 %conv.i) #2`
			`%3 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> undef, <4 x float> %2, <4 x float> %b, i32 %conv.i) #2`
			`%4 = tail call nnan ninf nsz <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float> %inactive2, <4 x float> %3, <4 x float> %b, i32 %conv.i) #2`
			`ret <4 x float> %4`
			`}`

			`declare <4 x float> @llvm.arm.mve.vminnm.m.v4f32.v4f32.v4f32.v4f32.i32(<4 x float>, <4 x float>, <4 x float>, i32) #1`

Revert "[NFC] remove explicit default value for strboolattr attribute in tests" This reverts commit bda6e5bee04c75b1f1332b4fd1ac4e8ef6c3c247. See https://lab.llvm.org/buildbot/#/builders/109/builds/15424 for instance 2021-05-25 01:43:40 +08:00			attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "frame-pointer"="none" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" }
[ARM] MVE VPT Blocks A minor iteration on the MVE VPT Block pass to enable more efficient VPT Block code generation: consecutive VPT predicated statements, predicated on the same condition, will be placed within the same VPT Block. This essentially is also an exercise to write some more tests for the next step, which should be more generic also merging instructions when they are not consecutive. Differential Revision: https://reviews.llvm.org/D63711 llvm-svn: 364298 2019-06-25 20:04:31 +08:00			`attributes #1 = { nounwind readnone }`
			`attributes #2 = { nounwind }`

			`...`
			`---`
[ARM][MVE] Renamed VPT Block tests and files to something more informative. NFC 2020-01-07 23:57:19 +08:00			`name: vpt_2_blocks`
[Alignment] Use llvm::Align in MachineFunction and TargetLowering - fixes mir parsing Summary: This catches malformed mir files which specify alignment as log2 instead of pow2. See https://reviews.llvm.org/D65945 for reference, This is patch is part of a series to introduce an Alignment type. See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html See this patch for the introduction of the type: https://reviews.llvm.org/D64790 Reviewers: courbet Subscribers: MatzeB, qcolombet, dschuff, arsenm, sdardis, nemanjai, jvesely, nhaehnle, hiraditya, kbarton, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, s.egerton, pzheng, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D67433 llvm-svn: 371608 2019-09-11 19:16:48 +08:00			`alignment: 4`
[ARM] MVE VPT Blocks A minor iteration on the MVE VPT Block pass to enable more efficient VPT Block code generation: consecutive VPT predicated statements, predicated on the same condition, will be placed within the same VPT Block. This essentially is also an exercise to write some more tests for the next step, which should be more generic also merging instructions when they are not consecutive. Differential Revision: https://reviews.llvm.org/D63711 llvm-svn: 364298 2019-06-25 20:04:31 +08:00			`exposesReturnsTwice: false`
			`legalized: false`
			`regBankSelected: false`
			`selected: false`
			`failedISel: false`
			`tracksRegLiveness: true`
			`hasWinCFI: false`
			`registers: []`
			`liveins:`
			`- { reg: '$q0', virtual-reg: '' }`
			`- { reg: '$q1', virtual-reg: '' }`
			`- { reg: '$q2', virtual-reg: '' }`
			`- { reg: '$q3', virtual-reg: '' }`
			`- { reg: '$r0', virtual-reg: '' }`
			`frameInfo:`
			`isFrameAddressTaken: false`
			`isReturnAddressTaken: false`
			`hasStackMap: false`
			`hasPatchPoint: false`
			`stackSize: 0`
			`offsetAdjustment: 0`
			`maxAlignment: 0`
			`adjustsStack: false`
			`hasCalls: false`
			`stackProtector: ''`
			`maxCallFrameSize: 0`
			`cvBytesOfCalleeSavedRegisters: 0`
			`hasOpaqueSPAdjustment: false`
			`hasVAStart: false`
			`hasMustTailInVarArgFunc: false`
			`localFrameSize: 0`
			`savePoint: ''`
			`restorePoint: ''`
			`fixedStack: []`
			`stack: []`
			`constants: []`
			`body: \|`
			`bb.0.entry:`
			`liveins: $q0, $q1, $q2, $q3, $r0`

[ARM][MVE] Renamed VPT Block tests and files to something more informative. NFC 2020-01-07 23:57:19 +08:00			`; CHECK-LABEL: name: vpt_2_blocks`
[ARM] Masked load and store and predicate tests. NFC llvm-svn: 370325 2019-08-29 18:32:12 +08:00			`; CHECK: liveins: $q0, $q1, $q2, $q3, $r0`
[MIR][ARM] MachineOperand comments This adds infrastructure to print and parse MIR MachineOperand comments. The motivation for the ARM backend is to print condition code names instead of magic constants that are difficult to read (for human beings). For example, instead of this: dead renamable $r2, $cpsr = tEOR killed renamable $r2, renamable $r1, 14, $noreg t2Bcc %bb.4, 0, killed $cpsr we now print this: dead renamable $r2, $cpsr = tEOR killed renamable $r2, renamable $r1, 14 /* CC::always /, $noreg t2Bcc %bb.4, 0 / CC:eq /, killed $cpsr This shows that MachineOperand comments are enclosed between / and /. In this example, the EOR instruction is not conditionally executed (i.e. it is "always executed"), which is encoded by the 14 immediate machine operand. Thus, now this machine operand has / CC::always / as a comment. The 0 on the next conditional branch instruction represents the equal condition code, thus now this operand has / CC:eq */ as a comment. As it is a comment, the MI lexer/parser completely ignores it. The benefit is that this keeps the change in the lexer extremely minimal and no target specific parsing needs to be done. The changes on the MIPrinter side are also minimal, as there is only one target hooks that is used to create the machine operand comments. Differential Revision: https://reviews.llvm.org/D74306 2020-02-24 22:19:21 +08:00			`; CHECK: $vpr = VMSR_P0 killed $r0, 14 /* CC::al */, $noreg`
[ARM][MVE] Change VPST to use, not def, VPR Unlike VPT, VPST just uses the current value of VPR.P0. Differential Revision: https://reviews.llvm.org/D69037 llvm-svn: 375087 2019-10-17 16:46:31 +08:00			`; CHECK: BUNDLE implicit-def dead $q2, implicit-def $d4, implicit-def $s8, implicit-def $s9, implicit-def $d5, implicit-def $s10, implicit-def $s11, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $s1, implicit-def $d1, implicit-def $s2, implicit-def $s3, implicit $vpr, implicit killed $q2, implicit $q3, implicit killed $q0 {`
			`; CHECK: MVE_VPST 1, implicit $vpr`
[ARM] Add a tail-predication loop predicate register The semantics of tail predication loops means that the value of LR as an instruction is executed determines the predicate. In other words: mov r3, #3 DLSTP lr, r3 // Start tail predication, lr==3 VADD.s32 q0, q1, q2 // Lanes 0,1 and 2 are updated in q0. mov lr, #1 VADD.s32 q0, q1, q2 // Only first lane is updated. This means that the value of lr cannot be spilled and re-used in tail predication regions without potentially altering the behaviour of the program. More lanes than required could be stored, for example, and in the case of a gather those lanes might not have been setup, leading to alignment exceptions. This patch adds a new lr predicate operand to MVE instructions in order to keep a reference to the lr that they use as a tail predicate. It will usually hold the zeroreg meaning not predicated, being set to the LR phi value in the MVETPAndVPTOptimisationsPass. This will prevent it from being spilled anywhere that it needs to be used. A lot of tests needed updating. Differential Revision: https://reviews.llvm.org/D107638 2021-09-02 20:42:58 +08:00			`; CHECK: renamable $q2 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q2, renamable $q3, 1, renamable $vpr, $noreg, undef renamable $q2`
			`; CHECK: renamable $q2 = nnan ninf nsz MVE_VMINNMf32 internal killed renamable $q2, internal renamable $q2, 1, renamable $vpr, $noreg, internal undef renamable $q2`
			`; CHECK: renamable $q0 = nnan ninf nsz MVE_VMINNMf32 internal killed renamable $q2, renamable $q3, 1, renamable $vpr, $noreg, killed renamable $q0`
			`; CHECK: renamable $q0 = nnan ninf nsz MVE_VMINNMf32 internal killed renamable $q0, renamable $q3, 1, renamable $vpr, $noreg, internal undef renamable $q0`
[ARM] Masked load and store and predicate tests. NFC llvm-svn: 370325 2019-08-29 18:32:12 +08:00			`; CHECK: }`
[ARM][MVE] Change VPST to use, not def, VPR Unlike VPT, VPST just uses the current value of VPR.P0. Differential Revision: https://reviews.llvm.org/D69037 llvm-svn: 375087 2019-10-17 16:46:31 +08:00			`; CHECK: BUNDLE implicit-def $q1, implicit-def $d2, implicit-def $s4, implicit-def $s5, implicit-def $d3, implicit-def $s6, implicit-def $s7, implicit killed $vpr, implicit killed $q0, implicit killed $q3, implicit killed $q1 {`
			`; CHECK: MVE_VPST 8, implicit $vpr`
[ARM] Add a tail-predication loop predicate register The semantics of tail predication loops means that the value of LR as an instruction is executed determines the predicate. In other words: mov r3, #3 DLSTP lr, r3 // Start tail predication, lr==3 VADD.s32 q0, q1, q2 // Lanes 0,1 and 2 are updated in q0. mov lr, #1 VADD.s32 q0, q1, q2 // Only first lane is updated. This means that the value of lr cannot be spilled and re-used in tail predication regions without potentially altering the behaviour of the program. More lanes than required could be stored, for example, and in the case of a gather those lanes might not have been setup, leading to alignment exceptions. This patch adds a new lr predicate operand to MVE instructions in order to keep a reference to the lr that they use as a tail predicate. It will usually hold the zeroreg meaning not predicated, being set to the LR phi value in the MVETPAndVPTOptimisationsPass. This will prevent it from being spilled anywhere that it needs to be used. A lot of tests needed updating. Differential Revision: https://reviews.llvm.org/D107638 2021-09-02 20:42:58 +08:00			`; CHECK: renamable $q1 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q0, killed renamable $q3, 1, killed renamable $vpr, $noreg, killed renamable $q1`
[ARM] Masked load and store and predicate tests. NFC llvm-svn: 370325 2019-08-29 18:32:12 +08:00			`; CHECK: }`
[ARM] Add a tail-predication loop predicate register The semantics of tail predication loops means that the value of LR as an instruction is executed determines the predicate. In other words: mov r3, #3 DLSTP lr, r3 // Start tail predication, lr==3 VADD.s32 q0, q1, q2 // Lanes 0,1 and 2 are updated in q0. mov lr, #1 VADD.s32 q0, q1, q2 // Only first lane is updated. This means that the value of lr cannot be spilled and re-used in tail predication regions without potentially altering the behaviour of the program. More lanes than required could be stored, for example, and in the case of a gather those lanes might not have been setup, leading to alignment exceptions. This patch adds a new lr predicate operand to MVE instructions in order to keep a reference to the lr that they use as a tail predicate. It will usually hold the zeroreg meaning not predicated, being set to the LR phi value in the MVETPAndVPTOptimisationsPass. This will prevent it from being spilled anywhere that it needs to be used. A lot of tests needed updating. Differential Revision: https://reviews.llvm.org/D107638 2021-09-02 20:42:58 +08:00			`; CHECK: $q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, $noreg, undef $q0`
[MIR][ARM] MachineOperand comments This adds infrastructure to print and parse MIR MachineOperand comments. The motivation for the ARM backend is to print condition code names instead of magic constants that are difficult to read (for human beings). For example, instead of this: dead renamable $r2, $cpsr = tEOR killed renamable $r2, renamable $r1, 14, $noreg t2Bcc %bb.4, 0, killed $cpsr we now print this: dead renamable $r2, $cpsr = tEOR killed renamable $r2, renamable $r1, 14 /* CC::always /, $noreg t2Bcc %bb.4, 0 / CC:eq /, killed $cpsr This shows that MachineOperand comments are enclosed between / and /. In this example, the EOR instruction is not conditionally executed (i.e. it is "always executed"), which is encoded by the 14 immediate machine operand. Thus, now this machine operand has / CC::always / as a comment. The 0 on the next conditional branch instruction represents the equal condition code, thus now this operand has / CC:eq */ as a comment. As it is a comment, the MI lexer/parser completely ignores it. The benefit is that this keeps the change in the lexer extremely minimal and no target specific parsing needs to be done. The changes on the MIPrinter side are also minimal, as there is only one target hooks that is used to create the machine operand comments. Differential Revision: https://reviews.llvm.org/D74306 2020-02-24 22:19:21 +08:00			`; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $q0`
[ARM] MVE VPT Blocks A minor iteration on the MVE VPT Block pass to enable more efficient VPT Block code generation: consecutive VPT predicated statements, predicated on the same condition, will be placed within the same VPT Block. This essentially is also an exercise to write some more tests for the next step, which should be more generic also merging instructions when they are not consecutive. Differential Revision: https://reviews.llvm.org/D63711 llvm-svn: 364298 2019-06-25 20:04:31 +08:00			`$vpr = VMSR_P0 killed $r0, 14, $noreg`
[ARM] Add a tail-predication loop predicate register The semantics of tail predication loops means that the value of LR as an instruction is executed determines the predicate. In other words: mov r3, #3 DLSTP lr, r3 // Start tail predication, lr==3 VADD.s32 q0, q1, q2 // Lanes 0,1 and 2 are updated in q0. mov lr, #1 VADD.s32 q0, q1, q2 // Only first lane is updated. This means that the value of lr cannot be spilled and re-used in tail predication regions without potentially altering the behaviour of the program. More lanes than required could be stored, for example, and in the case of a gather those lanes might not have been setup, leading to alignment exceptions. This patch adds a new lr predicate operand to MVE instructions in order to keep a reference to the lr that they use as a tail predicate. It will usually hold the zeroreg meaning not predicated, being set to the LR phi value in the MVETPAndVPTOptimisationsPass. This will prevent it from being spilled anywhere that it needs to be used. A lot of tests needed updating. Differential Revision: https://reviews.llvm.org/D107638 2021-09-02 20:42:58 +08:00			`renamable $q2 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q2, renamable $q3, 1, renamable $vpr, $noreg, undef renamable $q2`
			`renamable $q2 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q2, renamable $q2, 1, renamable $vpr, $noreg, undef renamable $q2`
			`renamable $q0 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q2, renamable $q3, 1, renamable $vpr, $noreg, killed renamable $q0`
			`renamable $q0 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q0, renamable $q3, 1, renamable $vpr, $noreg, undef renamable $q0`
			`renamable $q1 = nnan ninf nsz MVE_VMINNMf32 killed renamable $q0, killed renamable $q3, 1, killed renamable $vpr, $noreg, killed renamable $q1`
			`$q0 = MVE_VORR killed $q1, killed $q1, 0, $noreg, $noreg, undef $q0`
[ARM] MVE VPT Blocks A minor iteration on the MVE VPT Block pass to enable more efficient VPT Block code generation: consecutive VPT predicated statements, predicated on the same condition, will be placed within the same VPT Block. This essentially is also an exercise to write some more tests for the next step, which should be more generic also merging instructions when they are not consecutive. Differential Revision: https://reviews.llvm.org/D63711 llvm-svn: 364298 2019-06-25 20:04:31 +08:00			`tBX_RET 14, $noreg, implicit $q0`

			`...`