llvm-project/llvm/test/CodeGen/X86/combine-fabs.ll

; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX

;
; NOTE: this is generated by utils/update_llc_test_checks.py but we can't check NAN types (PR30443),
; so we need to edit it to remove the NAN constant comments
;

; fabs(c1) -> c2
define float @combine_fabs_constant() {
; SSE-LABEL: combine_fabs_constant:
; SSE:       # %bb.0:
; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT:    retq
;
; AVX-LABEL: combine_fabs_constant:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT:    retq
  %1 = call float @llvm.fabs.f32(float -2.0)
  ret float %1
}

define <4 x float> @combine_vec_fabs_constant() {
; SSE-LABEL: combine_vec_fabs_constant:
; SSE:       # %bb.0:
; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0]
; SSE-NEXT:    retq
;
; AVX-LABEL: combine_vec_fabs_constant:
; AVX:       # %bb.0:
; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0]
; AVX-NEXT:    retq
  %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> <float 0.0, float -0.0, float 2.0, float -2.0>)
  ret <4 x float> %1
}

; fabs(fabs(x)) -> fabs(x)
define float @combine_fabs_fabs(float %a) {
; SSE-LABEL: combine_fabs_fabs:
; SSE:       # %bb.0:
; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: combine_fabs_fabs:
; AVX:       # %bb.0:
; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
; AVX-NEXT:    retq
  %1 = call float @llvm.fabs.f32(float %a)
  %2 = call float @llvm.fabs.f32(float %1)
  ret float %2
}

define <4 x float> @combine_vec_fabs_fabs(<4 x float> %a) {
; SSE-LABEL: combine_vec_fabs_fabs:
; SSE:       # %bb.0:
; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: combine_vec_fabs_fabs:
; AVX:       # %bb.0:
; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
; AVX-NEXT:    retq
  %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
  %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
  ret <4 x float> %2
}

; fabs(fneg(x)) -> fabs(x)
define float @combine_fabs_fneg(float %a) {
; SSE-LABEL: combine_fabs_fneg:
; SSE:       # %bb.0:
; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: combine_fabs_fneg:
; AVX:       # %bb.0:
; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
; AVX-NEXT:    retq
  %1 = fsub float -0.0, %a
  %2 = call float @llvm.fabs.f32(float %1)
  ret float %2
}

define <4 x float> @combine_vec_fabs_fneg(<4 x float> %a) {
; SSE-LABEL: combine_vec_fabs_fneg:
; SSE:       # %bb.0:
; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: combine_vec_fabs_fneg:
; AVX:       # %bb.0:
; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
; AVX-NEXT:    retq
  %1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %a
  %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
  ret <4 x float> %2
}

; fabs(fcopysign(x, y)) -> fabs(x)
define float @combine_fabs_fcopysign(float %a, float %b) {
; SSE-LABEL: combine_fabs_fcopysign:
; SSE:       # %bb.0:
; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: combine_fabs_fcopysign:
; AVX:       # %bb.0:
; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
; AVX-NEXT:    retq
  %1 = call float @llvm.copysign.f32(float %a, float %b)
  %2 = call float @llvm.fabs.f32(float %1)
  ret float %2
}

define <4 x float> @combine_vec_fabs_fcopysign(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: combine_vec_fabs_fcopysign:
; SSE:       # %bb.0:
; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
; SSE-NEXT:    retq
;
; AVX-LABEL: combine_vec_fabs_fcopysign:
; AVX:       # %bb.0:
; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
; AVX-NEXT:    retq
  %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
  %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)
  ret <4 x float> %2
}

declare float @llvm.fabs.f32(float %p)
declare float @llvm.copysign.f32(float %Mag, float %Sgn)

declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
declare <4 x float> @llvm.copysign.v4f32(<4 x float> %Mag, <4 x float> %Sgn)
[X86][SSE] Add some basic FABS combine tests llvm-svn: 322182 2018-01-10 21:28:34 +08:00			`; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 \| FileCheck %s --check-prefix=SSE`
			`; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 \| FileCheck %s --check-prefix=AVX`

			`;`
			`; NOTE: this is generated by utils/update_llc_test_checks.py but we can't check NAN types (PR30443),`
			`; so we need to edit it to remove the NAN constant comments`
			`;`

			`; fabs(c1) -> c2`
			`define float @combine_fabs_constant() {`
			`; SSE-LABEL: combine_fabs_constant:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: combine_fabs_constant:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero`
			`; AVX-NEXT: retq`
			`%1 = call float @llvm.fabs.f32(float -2.0)`
			`ret float %1`
			`}`

			`define <4 x float> @combine_vec_fabs_constant() {`
			`; SSE-LABEL: combine_vec_fabs_constant:`
			`; SSE: # %bb.0:`
[X86] Force floating point values in constant pool decoding to print in scientific notation so they can't be confused with integers. When the floating point constants are whole numbers they have no decimal point so look like integers, but mean something very different in something like an 'and' instruction. Ideally we would just print a decimal point and a 0, but I couldn't see how to make APFloat::toString do that. llvm-svn: 345488 2018-10-29 12:52:04 +08:00			`; SSE-NEXT: movaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0]`
[X86][SSE] Add some basic FABS combine tests llvm-svn: 322182 2018-01-10 21:28:34 +08:00			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: combine_vec_fabs_constant:`
			`; AVX: # %bb.0:`
[X86] Force floating point values in constant pool decoding to print in scientific notation so they can't be confused with integers. When the floating point constants are whole numbers they have no decimal point so look like integers, but mean something very different in something like an 'and' instruction. Ideally we would just print a decimal point and a 0, but I couldn't see how to make APFloat::toString do that. llvm-svn: 345488 2018-10-29 12:52:04 +08:00			`; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,0.0E+0,2.0E+0,2.0E+0]`
[X86][SSE] Add some basic FABS combine tests llvm-svn: 322182 2018-01-10 21:28:34 +08:00			`; AVX-NEXT: retq`
			`%1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> <float 0.0, float -0.0, float 2.0, float -2.0>)`
			`ret <4 x float> %1`
			`}`

			`; fabs(fabs(x)) -> fabs(x)`
			`define float @combine_fabs_fabs(float %a) {`
			`; SSE-LABEL: combine_fabs_fabs:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: andps {{.*}}(%rip), %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: combine_fabs_fabs:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1`
			`; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`%1 = call float @llvm.fabs.f32(float %a)`
			`%2 = call float @llvm.fabs.f32(float %1)`
			`ret float %2`
			`}`

			`define <4 x float> @combine_vec_fabs_fabs(<4 x float> %a) {`
			`; SSE-LABEL: combine_vec_fabs_fabs:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: andps {{.*}}(%rip), %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: combine_vec_fabs_fabs:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1`
			`; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`%1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)`
			`%2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)`
			`ret <4 x float> %2`
			`}`

			`; fabs(fneg(x)) -> fabs(x)`
			`define float @combine_fabs_fneg(float %a) {`
			`; SSE-LABEL: combine_fabs_fneg:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: andps {{.*}}(%rip), %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: combine_fabs_fneg:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1`
			`; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`%1 = fsub float -0.0, %a`
			`%2 = call float @llvm.fabs.f32(float %1)`
			`ret float %2`
			`}`

			`define <4 x float> @combine_vec_fabs_fneg(<4 x float> %a) {`
			`; SSE-LABEL: combine_vec_fabs_fneg:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: andps {{.*}}(%rip), %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: combine_vec_fabs_fneg:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1`
			`; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`%1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %a`
			`%2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)`
			`ret <4 x float> %2`
			`}`

			`; fabs(fcopysign(x, y)) -> fabs(x)`
			`define float @combine_fabs_fcopysign(float %a, float %b) {`
			`; SSE-LABEL: combine_fabs_fcopysign:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: andps {{.*}}(%rip), %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: combine_fabs_fcopysign:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1`
			`; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`%1 = call float @llvm.copysign.f32(float %a, float %b)`
			`%2 = call float @llvm.fabs.f32(float %1)`
			`ret float %2`
			`}`

			`define <4 x float> @combine_vec_fabs_fcopysign(<4 x float> %a, <4 x float> %b) {`
			`; SSE-LABEL: combine_vec_fabs_fcopysign:`
			`; SSE: # %bb.0:`
			`; SSE-NEXT: andps {{.*}}(%rip), %xmm0`
			`; SSE-NEXT: retq`
			`;`
			`; AVX-LABEL: combine_vec_fabs_fcopysign:`
			`; AVX: # %bb.0:`
			`; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1`
			`; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0`
			`; AVX-NEXT: retq`
			`%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)`
			`%2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %1)`
			`ret <4 x float> %2`
			`}`

			`declare float @llvm.fabs.f32(float %p)`
			`declare float @llvm.copysign.f32(float %Mag, float %Sgn)`

			`declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)`
			`declare <4 x float> @llvm.copysign.v4f32(<4 x float> %Mag, <4 x float> %Sgn)`