llvm-project/llvm/test/CodeGen/X86/vec_fabs.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X64

define <2 x double> @fabs_v2f64(<2 x double> %p) {
; X32-LABEL: fabs_v2f64:
; X32:       # BB#0:
; X32-NEXT:    vandpd .LCPI0_0, %xmm0, %xmm0
; X32-NEXT:    retl
;
; X64-LABEL: fabs_v2f64:
; X64:       # BB#0:
; X64-NEXT:    vandpd {{.*}}(%rip), %xmm0, %xmm0
; X64-NEXT:    retq
  %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
  ret <2 x double> %t
}
declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p)

define <4 x float> @fabs_v4f32(<4 x float> %p) {
; X32-LABEL: fabs_v4f32:
; X32:       # BB#0:
; X32-NEXT:    vandps .LCPI1_0, %xmm0, %xmm0
; X32-NEXT:    retl
;
; X64-LABEL: fabs_v4f32:
; X64:       # BB#0:
; X64-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
; X64-NEXT:    retq
  %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
  ret <4 x float> %t
}
declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)

define <4 x double> @fabs_v4f64(<4 x double> %p) {
; X32-LABEL: fabs_v4f64:
; X32:       # BB#0:
; X32-NEXT:    vandpd .LCPI2_0, %ymm0, %ymm0
; X32-NEXT:    retl
;
; X64-LABEL: fabs_v4f64:
; X64:       # BB#0:
; X64-NEXT:    vandpd {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT:    retq
  %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
  ret <4 x double> %t
}
declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)

define <8 x float> @fabs_v8f32(<8 x float> %p) {
; X32-LABEL: fabs_v8f32:
; X32:       # BB#0:
; X32-NEXT:    vandps .LCPI3_0, %ymm0, %ymm0
; X32-NEXT:    retl
;
; X64-LABEL: fabs_v8f32:
; X64:       # BB#0:
; X64-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
; X64-NEXT:    retq
  %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
  ret <8 x float> %t
}
declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)

; PR20354: when generating code for a vector fabs op,
; make sure that we're only turning off the sign bit of each float value.
; No constant pool loads or vector ops are needed for the fabs of a
; bitcasted integer constant; we should just return an integer constant
; that has the sign bits turned off.
;
; So instead of something like this:
;    movabsq (constant pool load of mask for sign bits)
;    vmovq   (move from integer register to vector/fp register)
;    vandps  (mask off sign bits)
;    vmovq   (move vector/fp register back to integer return register)
;
; We should generate:
;    mov     (put constant value in return register)

define i64 @fabs_v2f32_1() {
; X32-LABEL: fabs_v2f32_1:
; X32:       # BB#0:
; X32-NEXT:    xorl %eax, %eax
; X32-NEXT:    movl $2147483647, %edx # imm = 0x7FFFFFFF
; X32-NEXT:    retl
;
; X64-LABEL: fabs_v2f32_1:
; X64:       # BB#0:
; X64-NEXT:    movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000
; X64-NEXT:    retq
 %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000
 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
 %ret = bitcast <2 x float> %fabs to i64
 ret i64 %ret
}

define i64 @fabs_v2f32_2() {
; X32-LABEL: fabs_v2f32_2:
; X32:       # BB#0:
; X32-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
; X32-NEXT:    xorl %edx, %edx
; X32-NEXT:    retl
;
; X64-LABEL: fabs_v2f32_2:
; X64:       # BB#0:
; X64-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
; X64-NEXT:    retq
 %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
 %ret = bitcast <2 x float> %fabs to i64
 ret i64 %ret
}

declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p)
[X86][SSE] Regenerated vector float tests - fabs / floor(etc.) / fneg / float2double llvm-svn: 265186 2016-04-02 05:30:48 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
			`; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx \| FileCheck %s --check-prefix=X32`
			`; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f \| FileCheck %s --check-prefix=X32`
			`; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx \| FileCheck %s --check-prefix=X64`
			`; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f \| FileCheck %s --check-prefix=X64`
Add support for lowering FABS of vector types. llvm-svn: 163461 2012-09-08 15:31:51 +08:00
[X86][SSE] Regenerated vector float tests - fabs / floor(etc.) / fneg / float2double llvm-svn: 265186 2016-04-02 05:30:48 +08:00			`define <2 x double> @fabs_v2f64(<2 x double> %p) {`
			`; X32-LABEL: fabs_v2f64:`
			`; X32: # BB#0:`
			`; X32-NEXT: vandpd .LCPI0_0, %xmm0, %xmm0`
			`; X32-NEXT: retl`
			`;`
			`; X64-LABEL: fabs_v2f64:`
			`; X64: # BB#0:`
			`; X64-NEXT: vandpd {{.*}}(%rip), %xmm0, %xmm0`
			`; X64-NEXT: retq`
Add support for lowering FABS of vector types. llvm-svn: 163461 2012-09-08 15:31:51 +08:00			`%t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)`
			`ret <2 x double> %t`
			`}`
			`declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p)`

[X86][SSE] Regenerated vector float tests - fabs / floor(etc.) / fneg / float2double llvm-svn: 265186 2016-04-02 05:30:48 +08:00			`define <4 x float> @fabs_v4f32(<4 x float> %p) {`
			`; X32-LABEL: fabs_v4f32:`
			`; X32: # BB#0:`
			`; X32-NEXT: vandps .LCPI1_0, %xmm0, %xmm0`
			`; X32-NEXT: retl`
			`;`
			`; X64-LABEL: fabs_v4f32:`
			`; X64: # BB#0:`
			`; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0`
			`; X64-NEXT: retq`
Add support for lowering FABS of vector types. llvm-svn: 163461 2012-09-08 15:31:51 +08:00			`%t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)`
			`ret <4 x float> %t`
			`}`
			`declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)`

[X86][SSE] Regenerated vector float tests - fabs / floor(etc.) / fneg / float2double llvm-svn: 265186 2016-04-02 05:30:48 +08:00			`define <4 x double> @fabs_v4f64(<4 x double> %p) {`
			`; X32-LABEL: fabs_v4f64:`
			`; X32: # BB#0:`
			`; X32-NEXT: vandpd .LCPI2_0, %ymm0, %ymm0`
			`; X32-NEXT: retl`
			`;`
			`; X64-LABEL: fabs_v4f64:`
			`; X64: # BB#0:`
			`; X64-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm0`
			`; X64-NEXT: retq`
Add support for lowering FABS of vector types. llvm-svn: 163461 2012-09-08 15:31:51 +08:00			`%t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)`
			`ret <4 x double> %t`
			`}`
			`declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)`

[X86][SSE] Regenerated vector float tests - fabs / floor(etc.) / fneg / float2double llvm-svn: 265186 2016-04-02 05:30:48 +08:00			`define <8 x float> @fabs_v8f32(<8 x float> %p) {`
			`; X32-LABEL: fabs_v8f32:`
			`; X32: # BB#0:`
			`; X32-NEXT: vandps .LCPI3_0, %ymm0, %ymm0`
			`; X32-NEXT: retl`
			`;`
			`; X64-LABEL: fabs_v8f32:`
			`; X64: # BB#0:`
			`; X64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0`
			`; X64-NEXT: retq`
Add support for lowering FABS of vector types. llvm-svn: 163461 2012-09-08 15:31:51 +08:00			`%t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)`
			`ret <8 x float> %t`
			`}`
			`declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)`
fix for PR20354 - Miscompile of fabs due to vectorization This is intended to be the minimal change needed to fix PR20354 ( http://llvm.org/bugs/show_bug.cgi?id=20354 ). The check for a vector operation was wrong; we need to check that the fabs itself is not a vector operation. This patch will not generate the optimal code. A constant pool load and 'and' op will be generated instead of just returning a value that we can calculate in advance (as we do for the scalar case). I've put a 'TODO' comment for that here and expect to have that patch ready soon. There is a very similar optimization that we can do in visitFNEG, so I've put another 'TODO' there and expect to have another patch for that too. llvm-svn: 214670 2014-08-04 06:48:23 +08:00
			`; PR20354: when generating code for a vector fabs op,`
Optimize vector fabs of bitcasted constant integer values. Allow vector fabs operations on bitcasted constant integer values to be optimized in the same way that we already optimize scalar fabs. So for code like this: %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) %ret = bitcast <2 x float> %fabs to i64 Instead of generating something like this: movabsq (constant pool loadi of mask for sign bits) vmovq (move from integer register to vector/fp register) vandps (mask off sign bits) vmovq (move vector/fp register back to integer return register) We should generate: mov (put constant value in return register) I have also removed a redundant clause in the first 'if' statement: N0.getOperand(0).getValueType().isInteger() is the same thing as: IntVT.isInteger() Testcases for x86 and ARM added to existing files that deal with vector fabs. One existing testcase for x86 removed because it is no longer ideal. For more background, please see: http://reviews.llvm.org/D4770 And: http://llvm.org/bugs/show_bug.cgi?id=20354 Differential Revision: http://reviews.llvm.org/D4785 llvm-svn: 214892 2014-08-06 01:35:22 +08:00			`; make sure that we're only turning off the sign bit of each float value.`
			`; No constant pool loads or vector ops are needed for the fabs of a`
			`; bitcasted integer constant; we should just return an integer constant`
			`; that has the sign bits turned off.`
			`;`
			`; So instead of something like this:`
[X86][SSE] Regenerated vector float tests - fabs / floor(etc.) / fneg / float2double llvm-svn: 265186 2016-04-02 05:30:48 +08:00			`; movabsq (constant pool load of mask for sign bits)`
Optimize vector fabs of bitcasted constant integer values. Allow vector fabs operations on bitcasted constant integer values to be optimized in the same way that we already optimize scalar fabs. So for code like this: %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) %ret = bitcast <2 x float> %fabs to i64 Instead of generating something like this: movabsq (constant pool loadi of mask for sign bits) vmovq (move from integer register to vector/fp register) vandps (mask off sign bits) vmovq (move vector/fp register back to integer return register) We should generate: mov (put constant value in return register) I have also removed a redundant clause in the first 'if' statement: N0.getOperand(0).getValueType().isInteger() is the same thing as: IntVT.isInteger() Testcases for x86 and ARM added to existing files that deal with vector fabs. One existing testcase for x86 removed because it is no longer ideal. For more background, please see: http://reviews.llvm.org/D4770 And: http://llvm.org/bugs/show_bug.cgi?id=20354 Differential Revision: http://reviews.llvm.org/D4785 llvm-svn: 214892 2014-08-06 01:35:22 +08:00			`; vmovq (move from integer register to vector/fp register)`
			`; vandps (mask off sign bits)`
			`; vmovq (move vector/fp register back to integer return register)`
			`;`
			`; We should generate:`
			`; mov (put constant value in return register)`

			`define i64 @fabs_v2f32_1() {`
[X86][SSE] Regenerated vector float tests - fabs / floor(etc.) / fneg / float2double llvm-svn: 265186 2016-04-02 05:30:48 +08:00			`; X32-LABEL: fabs_v2f32_1:`
			`; X32: # BB#0:`
			`; X32-NEXT: xorl %eax, %eax`
			`; X32-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF`
			`; X32-NEXT: retl`
			`;`
			`; X64-LABEL: fabs_v2f32_1:`
			`; X64: # BB#0:`
			`; X64-NEXT: movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000`
			`; X64-NEXT: retq`
Optimize vector fabs of bitcasted constant integer values. Allow vector fabs operations on bitcasted constant integer values to be optimized in the same way that we already optimize scalar fabs. So for code like this: %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) %ret = bitcast <2 x float> %fabs to i64 Instead of generating something like this: movabsq (constant pool loadi of mask for sign bits) vmovq (move from integer register to vector/fp register) vandps (mask off sign bits) vmovq (move vector/fp register back to integer return register) We should generate: mov (put constant value in return register) I have also removed a redundant clause in the first 'if' statement: N0.getOperand(0).getValueType().isInteger() is the same thing as: IntVT.isInteger() Testcases for x86 and ARM added to existing files that deal with vector fabs. One existing testcase for x86 removed because it is no longer ideal. For more background, please see: http://reviews.llvm.org/D4770 And: http://llvm.org/bugs/show_bug.cgi?id=20354 Differential Revision: http://reviews.llvm.org/D4785 llvm-svn: 214892 2014-08-06 01:35:22 +08:00			`%bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000`
			`%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)`
			`%ret = bitcast <2 x float> %fabs to i64`
			`ret i64 %ret`
			`}`

			`define i64 @fabs_v2f32_2() {`
[X86][SSE] Regenerated vector float tests - fabs / floor(etc.) / fneg / float2double llvm-svn: 265186 2016-04-02 05:30:48 +08:00			`; X32-LABEL: fabs_v2f32_2:`
			`; X32: # BB#0:`
			`; X32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF`
			`; X32-NEXT: xorl %edx, %edx`
			`; X32-NEXT: retl`
			`;`
			`; X64-LABEL: fabs_v2f32_2:`
			`; X64: # BB#0:`
			`; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF`
			`; X64-NEXT: retq`
Optimize vector fabs of bitcasted constant integer values. Allow vector fabs operations on bitcasted constant integer values to be optimized in the same way that we already optimize scalar fabs. So for code like this: %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) %ret = bitcast <2 x float> %fabs to i64 Instead of generating something like this: movabsq (constant pool loadi of mask for sign bits) vmovq (move from integer register to vector/fp register) vandps (mask off sign bits) vmovq (move vector/fp register back to integer return register) We should generate: mov (put constant value in return register) I have also removed a redundant clause in the first 'if' statement: N0.getOperand(0).getValueType().isInteger() is the same thing as: IntVT.isInteger() Testcases for x86 and ARM added to existing files that deal with vector fabs. One existing testcase for x86 removed because it is no longer ideal. For more background, please see: http://reviews.llvm.org/D4770 And: http://llvm.org/bugs/show_bug.cgi?id=20354 Differential Revision: http://reviews.llvm.org/D4785 llvm-svn: 214892 2014-08-06 01:35:22 +08:00			`%bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF`
			`%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)`
			`%ret = bitcast <2 x float> %fabs to i64`
			`ret i64 %ret`
fix for PR20354 - Miscompile of fabs due to vectorization This is intended to be the minimal change needed to fix PR20354 ( http://llvm.org/bugs/show_bug.cgi?id=20354 ). The check for a vector operation was wrong; we need to check that the fabs itself is not a vector operation. This patch will not generate the optimal code. A constant pool load and 'and' op will be generated instead of just returning a value that we can calculate in advance (as we do for the scalar case). I've put a 'TODO' comment for that here and expect to have that patch ready soon. There is a very similar optimization that we can do in visitFNEG, so I've put another 'TODO' there and expect to have another patch for that too. llvm-svn: 214670 2014-08-04 06:48:23 +08:00			`}`

			`declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p)`