llvm-project/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll

; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NOOPT
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPT
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-NOOPT
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-OPT

define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
; CHECK-LABEL: bar:
; CHECK: add.2d	v[[REG:[0-9]+]], v0, v1
; CHECK: add	d[[REG3:[0-9]+]], d[[REG]], d1
; CHECK: sub	d[[REG2:[0-9]+]], d[[REG]], d1
; CHECK-NOT: fmov
; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
; CHECK-NOT: fmov
; CHECK: mov.d v0[1], [[COPY_REG2]]
; CHECK-NEXT: ret
;
; GENERIC-LABEL: bar:
; GENERIC: add	v[[REG:[0-9]+]].2d, v0.2d, v1.2d
; GENERIC: add	d[[REG3:[0-9]+]], d[[REG]], d1
; GENERIC: sub	d[[REG2:[0-9]+]], d[[REG]], d1
; GENERIC-NOT: fmov
; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
; GENERIC-NOT: fmov
; GENERIC: mov v0.d[1], [[COPY_REG2]]
; GENERIC-NEXT: ret
  %add = add <2 x i64> %a, %b
  %vgetq_lane = extractelement <2 x i64> %add, i32 0
  %vgetq_lane2 = extractelement <2 x i64> %b, i32 0
  %add3 = add i64 %vgetq_lane, %vgetq_lane2
  %sub = sub i64 %vgetq_lane, %vgetq_lane2
  %vecinit = insertelement <2 x i64> undef, i64 %add3, i32 0
  %vecinit8 = insertelement <2 x i64> %vecinit, i64 %sub, i32 1
  ret <2 x i64> %vecinit8
}

define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
; CHECK-LABEL: subdd_su64:
; CHECK: sub d0, d1, d0
; CHECK-NEXT: ret
; GENERIC-LABEL: subdd_su64:
; GENERIC: sub d0, d1, d0
; GENERIC-NEXT: ret
  %vecext = extractelement <2 x i64> %a, i32 0
  %vecext1 = extractelement <2 x i64> %b, i32 0
  %sub.i = sub nsw i64 %vecext1, %vecext
  %retval = bitcast i64 %sub.i to double
  ret double %retval
}

define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
; CHECK-LABEL: vaddd_su64:
; CHECK: add d0, d1, d0
; CHECK-NEXT: ret
; GENERIC-LABEL: vaddd_su64:
; GENERIC: add d0, d1, d0
; GENERIC-NEXT: ret
  %vecext = extractelement <2 x i64> %a, i32 0
  %vecext1 = extractelement <2 x i64> %b, i32 0
  %add.i = add nsw i64 %vecext1, %vecext
  %retval = bitcast i64 %add.i to double
  ret double %retval
}

; sub MI doesn't access dsub register.
define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
; CHECK-LABEL: add_sub_su64:
; CHECK: add d0, d1, d0
; CHECK: sub d0, {{d[0-9]+}}, d0
; CHECK-NEXT: ret
; GENERIC-LABEL: add_sub_su64:
; GENERIC: add d0, d1, d0
; GENERIC: sub d0, {{d[0-9]+}}, d0
; GENERIC-NEXT: ret
  %vecext = extractelement <2 x i64> %a, i32 0
  %vecext1 = extractelement <2 x i64> %b, i32 0
  %add.i = add i64 %vecext1, %vecext
  %sub.i = sub i64 0, %add.i
  %retval = bitcast i64 %sub.i to double
  ret double %retval
}
define double @and_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
; CHECK-LABEL: and_su64:
; CHECK: and.8b v0, v1, v0
; CHECK-NEXT: ret
; GENERIC-LABEL: and_su64:
; GENERIC: and v0.8b, v1.8b, v0.8b
; GENERIC-NEXT: ret
  %vecext = extractelement <2 x i64> %a, i32 0
  %vecext1 = extractelement <2 x i64> %b, i32 0
  %or.i = and i64 %vecext1, %vecext
  %retval = bitcast i64 %or.i to double
  ret double %retval
}

define double @orr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
; CHECK-LABEL: orr_su64:
; CHECK: orr.8b v0, v1, v0
; CHECK-NEXT: ret
; GENERIC-LABEL: orr_su64:
; GENERIC: orr v0.8b, v1.8b, v0.8b
; GENERIC-NEXT: ret
  %vecext = extractelement <2 x i64> %a, i32 0
  %vecext1 = extractelement <2 x i64> %b, i32 0
  %or.i = or i64 %vecext1, %vecext
  %retval = bitcast i64 %or.i to double
  ret double %retval
}

define double @xorr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
; CHECK-LABEL: xorr_su64:
; CHECK: eor.8b v0, v1, v0
; CHECK-NEXT: ret
; GENERIC-LABEL: xorr_su64:
; GENERIC: eor v0.8b, v1.8b, v0.8b
; GENERIC-NEXT: ret
  %vecext = extractelement <2 x i64> %a, i32 0
  %vecext1 = extractelement <2 x i64> %b, i32 0
  %xor.i = xor i64 %vecext1, %vecext
  %retval = bitcast i64 %xor.i to double
  ret double %retval
}
[AArch64] Register passes so they can be run by llc Initialize all AArch64-specific passes in the TargetMachine so they can be run by llc. This can lead to conflicts in opt with some command line options that share the same name as the pass, so I took this opportunity to do some cleanups: * rename all relevant command line options from "aarch64-blah" to "aarch64-enable-blah" and update the tests accordingly * run clang-format on their declarations * move all these declarations to a common place (the TargetMachine) as opposed to having them scattered around (AArch64BranchRelaxation and AArch64AddressTypePromotion were the only offenders) llvm-svn: 277322 2016-08-01 13:56:57 +08:00			`; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true \| FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NOOPT`
			`; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false \| FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPT`
			`; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true \| FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-NOOPT`
			`; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false \| FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-OPT`
[ARM64] Fix an issue where we were always assuming a copy was coming from a D subregister. llvm-svn: 207423 2014-04-29 00:21:50 +08:00
ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. llvm-svn: 205090 2014-03-29 18:18:08 +08:00			`define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {`
			`; CHECK-LABEL: bar:`
			`; CHECK: add.2d v[[REG:[0-9]+]], v0, v1`
			`; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1`
Complete the MachineScheduler fix made way back in r210390. "Fix the MachineScheduler's logic for updating ready times for in-order. Now the scheduler updates a node's ready time as soon as it is scheduled, before releasing dependent nodes." This fix was only made in one variant of the ScheduleDAGMI driver. Francois de Ferriere reported the issue in the other bit of code where it was also needed. I never got around to coming up with a test case, but it's an obvious fix that shouldn't be delayed any longer. I'll try to refactor this code a little better. I did verify performance on a wide variety of targets and saw no negative impact with this fix. llvm-svn: 233366 2015-03-27 14:10:13 +08:00			`; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1`
Re-enable "[MachineCopyPropagation] Extend pass to do COPY source forwarding" Re-enable commit r323991 now that r325931 has been committed to make MachineOperand::isRenamable() check more conservative w.r.t. code changes and opt-in on a per-target basis. llvm-svn: 326208 2018-02-28 00:59:10 +08:00			`; CHECK-NOT: fmov`
[AArch64] Run a peephole pass right after AdvSIMD pass. The AdvSIMD pass may produce copies that are not coalescer-friendly. The peephole optimizer knows how to fix that as demonstrated in the test case. <rdar://problem/12702965> llvm-svn: 216200 2014-08-22 02:10:07 +08:00			`; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]`
Re-enable "[MachineCopyPropagation] Extend pass to do COPY source forwarding" Re-enable commit r323991 now that r325931 has been committed to make MachineOperand::isRenamable() check more conservative w.r.t. code changes and opt-in on a per-target basis. llvm-svn: 326208 2018-02-28 00:59:10 +08:00			`; CHECK-NOT: fmov`
[AArch64][TableGen] Skip tied result operands for InstAlias Summary: This patch fixes an issue so that the right alias is printed when the instruction has tied operands. It checks the number of operands in the resulting instruction as opposed to the alias, and then skips over tied operands that should not be printed in the alias. This allows to generate the preferred assembly syntax for the AArch64 'ins' instruction, which should always be displayed as 'mov' according to the ARM Architecture Reference Manual. Several unit tests have changed as a result, but only to reflect the preferred disassembly. Some other InstAlias patterns (movk/bic/orr) needed a slight adjustment to stop them becoming the default and breaking other unit tests. Please note that the patch is mostly the same as https://reviews.llvm.org/D29219 which was reverted because of an issue found when running TableGen with the Address Sanitizer. That issue has been addressed in this iteration of the patch. Reviewers: rengolin, stoklund, huntergr, SjoerdMeijer, rovka Reviewed By: rengolin, SjoerdMeijer Subscribers: fhahn, aemerson, javed.absar, kristof.beyls, llvm-commits Differential Revision: https://reviews.llvm.org/D40030 llvm-svn: 318650 2017-11-20 22:36:40 +08:00			`; CHECK: mov.d v0[1], [[COPY_REG2]]`
[AArch64] Run a peephole pass right after AdvSIMD pass. The AdvSIMD pass may produce copies that are not coalescer-friendly. The peephole optimizer knows how to fix that as demonstrated in the test case. <rdar://problem/12702965> llvm-svn: 216200 2014-08-22 02:10:07 +08:00			`; CHECK-NEXT: ret`
			`;`
[ARM64] Fix an issue where we were always assuming a copy was coming from a D subregister. llvm-svn: 207423 2014-04-29 00:21:50 +08:00			`; GENERIC-LABEL: bar:`
			`; GENERIC: add v[[REG:[0-9]+]].2d, v0.2d, v1.2d`
			`; GENERIC: add d[[REG3:[0-9]+]], d[[REG]], d1`
Complete the MachineScheduler fix made way back in r210390. "Fix the MachineScheduler's logic for updating ready times for in-order. Now the scheduler updates a node's ready time as soon as it is scheduled, before releasing dependent nodes." This fix was only made in one variant of the ScheduleDAGMI driver. Francois de Ferriere reported the issue in the other bit of code where it was also needed. I never got around to coming up with a test case, but it's an obvious fix that shouldn't be delayed any longer. I'll try to refactor this code a little better. I did verify performance on a wide variety of targets and saw no negative impact with this fix. llvm-svn: 233366 2015-03-27 14:10:13 +08:00			`; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1`
Re-enable "[MachineCopyPropagation] Extend pass to do COPY source forwarding" Re-enable commit r323991 now that r325931 has been committed to make MachineOperand::isRenamable() check more conservative w.r.t. code changes and opt-in on a per-target basis. llvm-svn: 326208 2018-02-28 00:59:10 +08:00			`; GENERIC-NOT: fmov`
[AArch64] Run a peephole pass right after AdvSIMD pass. The AdvSIMD pass may produce copies that are not coalescer-friendly. The peephole optimizer knows how to fix that as demonstrated in the test case. <rdar://problem/12702965> llvm-svn: 216200 2014-08-22 02:10:07 +08:00			`; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]`
Re-enable "[MachineCopyPropagation] Extend pass to do COPY source forwarding" Re-enable commit r323991 now that r325931 has been committed to make MachineOperand::isRenamable() check more conservative w.r.t. code changes and opt-in on a per-target basis. llvm-svn: 326208 2018-02-28 00:59:10 +08:00			`; GENERIC-NOT: fmov`
[AArch64][TableGen] Skip tied result operands for InstAlias Summary: This patch fixes an issue so that the right alias is printed when the instruction has tied operands. It checks the number of operands in the resulting instruction as opposed to the alias, and then skips over tied operands that should not be printed in the alias. This allows to generate the preferred assembly syntax for the AArch64 'ins' instruction, which should always be displayed as 'mov' according to the ARM Architecture Reference Manual. Several unit tests have changed as a result, but only to reflect the preferred disassembly. Some other InstAlias patterns (movk/bic/orr) needed a slight adjustment to stop them becoming the default and breaking other unit tests. Please note that the patch is mostly the same as https://reviews.llvm.org/D29219 which was reverted because of an issue found when running TableGen with the Address Sanitizer. That issue has been addressed in this iteration of the patch. Reviewers: rengolin, stoklund, huntergr, SjoerdMeijer, rovka Reviewed By: rengolin, SjoerdMeijer Subscribers: fhahn, aemerson, javed.absar, kristof.beyls, llvm-commits Differential Revision: https://reviews.llvm.org/D40030 llvm-svn: 318650 2017-11-20 22:36:40 +08:00			`; GENERIC: mov v0.d[1], [[COPY_REG2]]`
[AArch64] Run a peephole pass right after AdvSIMD pass. The AdvSIMD pass may produce copies that are not coalescer-friendly. The peephole optimizer knows how to fix that as demonstrated in the test case. <rdar://problem/12702965> llvm-svn: 216200 2014-08-22 02:10:07 +08:00			`; GENERIC-NEXT: ret`
ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. llvm-svn: 205090 2014-03-29 18:18:08 +08:00			`%add = add <2 x i64> %a, %b`
			`%vgetq_lane = extractelement <2 x i64> %add, i32 0`
			`%vgetq_lane2 = extractelement <2 x i64> %b, i32 0`
			`%add3 = add i64 %vgetq_lane, %vgetq_lane2`
			`%sub = sub i64 %vgetq_lane, %vgetq_lane2`
			`%vecinit = insertelement <2 x i64> undef, i64 %add3, i32 0`
			`%vecinit8 = insertelement <2 x i64> %vecinit, i64 %sub, i32 1`
			`ret <2 x i64> %vecinit8`
			`}`

			`define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {`
			`; CHECK-LABEL: subdd_su64:`
			`; CHECK: sub d0, d1, d0`
			`; CHECK-NEXT: ret`
[ARM64] Fix an issue where we were always assuming a copy was coming from a D subregister. llvm-svn: 207423 2014-04-29 00:21:50 +08:00			`; GENERIC-LABEL: subdd_su64:`
			`; GENERIC: sub d0, d1, d0`
			`; GENERIC-NEXT: ret`
ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. llvm-svn: 205090 2014-03-29 18:18:08 +08:00			`%vecext = extractelement <2 x i64> %a, i32 0`
			`%vecext1 = extractelement <2 x i64> %b, i32 0`
			`%sub.i = sub nsw i64 %vecext1, %vecext`
			`%retval = bitcast i64 %sub.i to double`
			`ret double %retval`
			`}`

			`define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {`
			`; CHECK-LABEL: vaddd_su64:`
			`; CHECK: add d0, d1, d0`
			`; CHECK-NEXT: ret`
[ARM64] Fix an issue where we were always assuming a copy was coming from a D subregister. llvm-svn: 207423 2014-04-29 00:21:50 +08:00			`; GENERIC-LABEL: vaddd_su64:`
			`; GENERIC: add d0, d1, d0`
			`; GENERIC-NEXT: ret`
ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. llvm-svn: 205090 2014-03-29 18:18:08 +08:00			`%vecext = extractelement <2 x i64> %a, i32 0`
			`%vecext1 = extractelement <2 x i64> %b, i32 0`
			`%add.i = add nsw i64 %vecext1, %vecext`
			`%retval = bitcast i64 %add.i to double`
			`ret double %retval`
			`}`
[ARM64] Fix an issue where we were always assuming a copy was coming from a D subregister. llvm-svn: 207423 2014-04-29 00:21:50 +08:00
			`; sub MI doesn't access dsub register.`
			`define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {`
			`; CHECK-LABEL: add_sub_su64:`
			`; CHECK: add d0, d1, d0`
			`; CHECK: sub d0, {{d[0-9]+}}, d0`
			`; CHECK-NEXT: ret`
			`; GENERIC-LABEL: add_sub_su64:`
			`; GENERIC: add d0, d1, d0`
			`; GENERIC: sub d0, {{d[0-9]+}}, d0`
			`; GENERIC-NEXT: ret`
			`%vecext = extractelement <2 x i64> %a, i32 0`
			`%vecext1 = extractelement <2 x i64> %b, i32 0`
			`%add.i = add i64 %vecext1, %vecext`
			`%sub.i = sub i64 0, %add.i`
			`%retval = bitcast i64 %sub.i to double`
			`ret double %retval`
			`}`
[AArch64] Extend the number of scalar instructions supported in the AdvSIMD scalar integer instruction pass. This is a patch I had lying around from a few months ago. The pass is currently disabled by default, so nothing to interesting. llvm-svn: 214779 2014-08-05 05:20:25 +08:00			`define double @and_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {`
			`; CHECK-LABEL: and_su64:`
			`; CHECK: and.8b v0, v1, v0`
			`; CHECK-NEXT: ret`
			`; GENERIC-LABEL: and_su64:`
			`; GENERIC: and v0.8b, v1.8b, v0.8b`
			`; GENERIC-NEXT: ret`
			`%vecext = extractelement <2 x i64> %a, i32 0`
			`%vecext1 = extractelement <2 x i64> %b, i32 0`
			`%or.i = and i64 %vecext1, %vecext`
			`%retval = bitcast i64 %or.i to double`
			`ret double %retval`
			`}`

			`define double @orr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {`
			`; CHECK-LABEL: orr_su64:`
			`; CHECK: orr.8b v0, v1, v0`
			`; CHECK-NEXT: ret`
			`; GENERIC-LABEL: orr_su64:`
			`; GENERIC: orr v0.8b, v1.8b, v0.8b`
			`; GENERIC-NEXT: ret`
			`%vecext = extractelement <2 x i64> %a, i32 0`
			`%vecext1 = extractelement <2 x i64> %b, i32 0`
			`%or.i = or i64 %vecext1, %vecext`
			`%retval = bitcast i64 %or.i to double`
			`ret double %retval`
			`}`

			`define double @xorr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {`
			`; CHECK-LABEL: xorr_su64:`
			`; CHECK: eor.8b v0, v1, v0`
			`; CHECK-NEXT: ret`
			`; GENERIC-LABEL: xorr_su64:`
			`; GENERIC: eor v0.8b, v1.8b, v0.8b`
			`; GENERIC-NEXT: ret`
			`%vecext = extractelement <2 x i64> %a, i32 0`
			`%vecext1 = extractelement <2 x i64> %b, i32 0`
			`%xor.i = xor i64 %vecext1, %vecext`
			`%retval = bitcast i64 %xor.i to double`
			`ret double %retval`
			`}`