2016-08-01 13:56:57 +08:00
|
|
|
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NOOPT
|
|
|
|
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=apple -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPT
|
|
|
|
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-NOOPT
|
|
|
|
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-eabi -aarch64-neon-syntax=generic -aarch64-enable-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-OPT
|
2014-04-29 00:21:50 +08:00
|
|
|
|
2014-03-29 18:18:08 +08:00
|
|
|
define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
|
|
|
|
; CHECK-LABEL: bar:
|
|
|
|
; CHECK: add.2d v[[REG:[0-9]+]], v0, v1
|
|
|
|
; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1
|
2015-03-27 14:10:13 +08:00
|
|
|
; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1
|
2017-10-04 00:59:13 +08:00
|
|
|
; Without advanced copy optimization, we end up with cross register
|
|
|
|
; banks copies that cannot be coalesced.
|
|
|
|
; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
|
|
|
|
; With advanced copy optimization, we end up with just one copy
|
|
|
|
; to insert the computed high part into the V register.
|
|
|
|
; CHECK-OPT-NOT: fmov
|
2014-08-22 02:10:07 +08:00
|
|
|
; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
|
2017-10-04 00:59:13 +08:00
|
|
|
; CHECK-NOOPT: fmov d0, [[COPY_REG3]]
|
|
|
|
; CHECK-OPT-NOT: fmov
|
[AArch64][TableGen] Skip tied result operands for InstAlias
Summary:
This patch fixes an issue so that the right alias is printed when the instruction has tied operands. It checks the number of operands in the resulting instruction as opposed to the alias, and then skips over tied operands that should not be printed in the alias.
This allows to generate the preferred assembly syntax for the AArch64 'ins' instruction, which should always be displayed as 'mov' according to the ARM Architecture Reference Manual. Several unit tests have changed as a result, but only to reflect the preferred disassembly. Some other InstAlias patterns (movk/bic/orr) needed a slight adjustment to stop them becoming the default and breaking other unit tests.
Please note that the patch is mostly the same as https://reviews.llvm.org/D29219 which was reverted because of an issue found when running TableGen with the Address Sanitizer. That issue has been addressed in this iteration of the patch.
Reviewers: rengolin, stoklund, huntergr, SjoerdMeijer, rovka
Reviewed By: rengolin, SjoerdMeijer
Subscribers: fhahn, aemerson, javed.absar, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D40030
llvm-svn: 318650
2017-11-20 22:36:40 +08:00
|
|
|
; CHECK: mov.d v0[1], [[COPY_REG2]]
|
2014-08-22 02:10:07 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
;
|
2014-04-29 00:21:50 +08:00
|
|
|
; GENERIC-LABEL: bar:
|
|
|
|
; GENERIC: add v[[REG:[0-9]+]].2d, v0.2d, v1.2d
|
|
|
|
; GENERIC: add d[[REG3:[0-9]+]], d[[REG]], d1
|
2015-03-27 14:10:13 +08:00
|
|
|
; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1
|
2017-10-04 00:59:13 +08:00
|
|
|
; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
|
|
|
|
; GENERIC-OPT-NOT: fmov
|
2014-08-22 02:10:07 +08:00
|
|
|
; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
|
2017-10-04 00:59:13 +08:00
|
|
|
; GENERIC-NOOPT: fmov d0, [[COPY_REG3]]
|
|
|
|
; GENERIC-OPT-NOT: fmov
|
[AArch64][TableGen] Skip tied result operands for InstAlias
Summary:
This patch fixes an issue so that the right alias is printed when the instruction has tied operands. It checks the number of operands in the resulting instruction as opposed to the alias, and then skips over tied operands that should not be printed in the alias.
This allows to generate the preferred assembly syntax for the AArch64 'ins' instruction, which should always be displayed as 'mov' according to the ARM Architecture Reference Manual. Several unit tests have changed as a result, but only to reflect the preferred disassembly. Some other InstAlias patterns (movk/bic/orr) needed a slight adjustment to stop them becoming the default and breaking other unit tests.
Please note that the patch is mostly the same as https://reviews.llvm.org/D29219 which was reverted because of an issue found when running TableGen with the Address Sanitizer. That issue has been addressed in this iteration of the patch.
Reviewers: rengolin, stoklund, huntergr, SjoerdMeijer, rovka
Reviewed By: rengolin, SjoerdMeijer
Subscribers: fhahn, aemerson, javed.absar, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D40030
llvm-svn: 318650
2017-11-20 22:36:40 +08:00
|
|
|
; GENERIC: mov v0.d[1], [[COPY_REG2]]
|
2014-08-22 02:10:07 +08:00
|
|
|
; GENERIC-NEXT: ret
|
2014-03-29 18:18:08 +08:00
|
|
|
%add = add <2 x i64> %a, %b
|
|
|
|
%vgetq_lane = extractelement <2 x i64> %add, i32 0
|
|
|
|
%vgetq_lane2 = extractelement <2 x i64> %b, i32 0
|
|
|
|
%add3 = add i64 %vgetq_lane, %vgetq_lane2
|
|
|
|
%sub = sub i64 %vgetq_lane, %vgetq_lane2
|
|
|
|
%vecinit = insertelement <2 x i64> undef, i64 %add3, i32 0
|
|
|
|
%vecinit8 = insertelement <2 x i64> %vecinit, i64 %sub, i32 1
|
|
|
|
ret <2 x i64> %vecinit8
|
|
|
|
}
|
|
|
|
|
|
|
|
define double @subdd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
|
|
|
|
; CHECK-LABEL: subdd_su64:
|
|
|
|
; CHECK: sub d0, d1, d0
|
|
|
|
; CHECK-NEXT: ret
|
2014-04-29 00:21:50 +08:00
|
|
|
; GENERIC-LABEL: subdd_su64:
|
|
|
|
; GENERIC: sub d0, d1, d0
|
|
|
|
; GENERIC-NEXT: ret
|
2014-03-29 18:18:08 +08:00
|
|
|
%vecext = extractelement <2 x i64> %a, i32 0
|
|
|
|
%vecext1 = extractelement <2 x i64> %b, i32 0
|
|
|
|
%sub.i = sub nsw i64 %vecext1, %vecext
|
|
|
|
%retval = bitcast i64 %sub.i to double
|
|
|
|
ret double %retval
|
|
|
|
}
|
|
|
|
|
|
|
|
define double @vaddd_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
|
|
|
|
; CHECK-LABEL: vaddd_su64:
|
|
|
|
; CHECK: add d0, d1, d0
|
|
|
|
; CHECK-NEXT: ret
|
2014-04-29 00:21:50 +08:00
|
|
|
; GENERIC-LABEL: vaddd_su64:
|
|
|
|
; GENERIC: add d0, d1, d0
|
|
|
|
; GENERIC-NEXT: ret
|
2014-03-29 18:18:08 +08:00
|
|
|
%vecext = extractelement <2 x i64> %a, i32 0
|
|
|
|
%vecext1 = extractelement <2 x i64> %b, i32 0
|
|
|
|
%add.i = add nsw i64 %vecext1, %vecext
|
|
|
|
%retval = bitcast i64 %add.i to double
|
|
|
|
ret double %retval
|
|
|
|
}
|
2014-04-29 00:21:50 +08:00
|
|
|
|
|
|
|
; sub MI doesn't access dsub register.
|
|
|
|
define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
|
|
|
|
; CHECK-LABEL: add_sub_su64:
|
|
|
|
; CHECK: add d0, d1, d0
|
|
|
|
; CHECK: sub d0, {{d[0-9]+}}, d0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
; GENERIC-LABEL: add_sub_su64:
|
|
|
|
; GENERIC: add d0, d1, d0
|
|
|
|
; GENERIC: sub d0, {{d[0-9]+}}, d0
|
|
|
|
; GENERIC-NEXT: ret
|
|
|
|
%vecext = extractelement <2 x i64> %a, i32 0
|
|
|
|
%vecext1 = extractelement <2 x i64> %b, i32 0
|
|
|
|
%add.i = add i64 %vecext1, %vecext
|
|
|
|
%sub.i = sub i64 0, %add.i
|
|
|
|
%retval = bitcast i64 %sub.i to double
|
|
|
|
ret double %retval
|
|
|
|
}
|
2014-08-05 05:20:25 +08:00
|
|
|
define double @and_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
|
|
|
|
; CHECK-LABEL: and_su64:
|
|
|
|
; CHECK: and.8b v0, v1, v0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
; GENERIC-LABEL: and_su64:
|
|
|
|
; GENERIC: and v0.8b, v1.8b, v0.8b
|
|
|
|
; GENERIC-NEXT: ret
|
|
|
|
%vecext = extractelement <2 x i64> %a, i32 0
|
|
|
|
%vecext1 = extractelement <2 x i64> %b, i32 0
|
|
|
|
%or.i = and i64 %vecext1, %vecext
|
|
|
|
%retval = bitcast i64 %or.i to double
|
|
|
|
ret double %retval
|
|
|
|
}
|
|
|
|
|
|
|
|
define double @orr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
|
|
|
|
; CHECK-LABEL: orr_su64:
|
|
|
|
; CHECK: orr.8b v0, v1, v0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
; GENERIC-LABEL: orr_su64:
|
|
|
|
; GENERIC: orr v0.8b, v1.8b, v0.8b
|
|
|
|
; GENERIC-NEXT: ret
|
|
|
|
%vecext = extractelement <2 x i64> %a, i32 0
|
|
|
|
%vecext1 = extractelement <2 x i64> %b, i32 0
|
|
|
|
%or.i = or i64 %vecext1, %vecext
|
|
|
|
%retval = bitcast i64 %or.i to double
|
|
|
|
ret double %retval
|
|
|
|
}
|
|
|
|
|
|
|
|
define double @xorr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
|
|
|
|
; CHECK-LABEL: xorr_su64:
|
|
|
|
; CHECK: eor.8b v0, v1, v0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
; GENERIC-LABEL: xorr_su64:
|
|
|
|
; GENERIC: eor v0.8b, v1.8b, v0.8b
|
|
|
|
; GENERIC-NEXT: ret
|
|
|
|
%vecext = extractelement <2 x i64> %a, i32 0
|
|
|
|
%vecext1 = extractelement <2 x i64> %b, i32 0
|
|
|
|
%xor.i = xor i64 %vecext1, %vecext
|
|
|
|
%retval = bitcast i64 %xor.i to double
|
|
|
|
ret double %retval
|
|
|
|
}
|