forked from OSchip/llvm-project
586 lines
20 KiB
LLVM
586 lines
20 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512fp16 | FileCheck %s
|
|
|
|
define <32 x half> @vaddph_512_test(<32 x half> %i, <32 x half> %j) nounwind readnone {
|
|
; CHECK-LABEL: vaddph_512_test:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x = fadd <32 x half> %i, %j
|
|
ret <32 x half> %x
|
|
}
|
|
|
|
define <32 x half> @vaddph_512_fold_test(<32 x half> %i, <32 x half>* %j) nounwind {
|
|
; CHECK-LABEL: vaddph_512_fold_test:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vaddph (%rdi), %zmm0, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%tmp = load <32 x half>, <32 x half>* %j, align 4
|
|
%x = fadd <32 x half> %i, %tmp
|
|
ret <32 x half> %x
|
|
}
|
|
|
|
define <32 x half> @vaddph_512_broadc_test(<32 x half> %a) nounwind {
|
|
; CHECK-LABEL: vaddph_512_broadc_test:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vaddph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to32}, %zmm0, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%b = fadd <32 x half> %a, <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>
|
|
ret <32 x half> %b
|
|
}
|
|
|
|
define <16 x half> @vaddph_256_broadc_test(<16 x half> %a) nounwind {
|
|
; CHECK-LABEL: vaddph_256_broadc_test:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vaddph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%b = fadd <16 x half> %a, <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>
|
|
ret <16 x half> %b
|
|
}
|
|
|
|
define <8 x half> @vaddph_128_broadc_test(<8 x half> %a) nounwind {
|
|
; CHECK-LABEL: vaddph_128_broadc_test:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vaddph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%b = fadd <8 x half> %a, <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>
|
|
ret <8 x half> %b
|
|
}
|
|
|
|
define <32 x half> @vaddph_512_mask_test1(<32 x half> %i, <32 x half> %j, <32 x i1> %mask) nounwind readnone {
|
|
; CHECK-LABEL: vaddph_512_mask_test1:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpsllw $7, %ymm2, %ymm2
|
|
; CHECK-NEXT: vpmovb2m %ymm2, %k1
|
|
; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0 {%k1}
|
|
; CHECK-NEXT: retq
|
|
%x = fadd <32 x half> %i, %j
|
|
%r = select <32 x i1> %mask, <32 x half> %x, <32 x half> %i
|
|
ret <32 x half> %r
|
|
}
|
|
|
|
define <32 x half> @vaddph_512_mask_test(<32 x half> %i, <32 x half> %j, <32 x half> %mask1) nounwind readnone {
|
|
; CHECK-LABEL: vaddph_512_mask_test:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
|
|
; CHECK-NEXT: vcmpneq_oqph %zmm3, %zmm2, %k1
|
|
; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0 {%k1}
|
|
; CHECK-NEXT: retq
|
|
%mask = fcmp one <32 x half> %mask1, zeroinitializer
|
|
%x = fadd <32 x half> %i, %j
|
|
%r = select <32 x i1> %mask, <32 x half> %x, <32 x half> %i
|
|
ret <32 x half> %r
|
|
}
|
|
|
|
define <32 x half> @vaddph_512_maskz_test(<32 x half> %i, <32 x half> %j, <32 x half> %mask1) nounwind readnone {
|
|
; CHECK-LABEL: vaddph_512_maskz_test:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
|
|
; CHECK-NEXT: vcmpneq_oqph %zmm3, %zmm2, %k1
|
|
; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%mask = fcmp one <32 x half> %mask1, zeroinitializer
|
|
%x = fadd <32 x half> %i, %j
|
|
%r = select <32 x i1> %mask, <32 x half> %x, <32 x half> zeroinitializer
|
|
ret <32 x half> %r
|
|
}
|
|
|
|
define <32 x half> @vaddph_512_mask_fold_test(<32 x half> %i, <32 x half>* %j.ptr, <32 x half> %mask1) nounwind readnone {
|
|
; CHECK-LABEL: vaddph_512_mask_fold_test:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
; CHECK-NEXT: vcmpneq_oqph %zmm2, %zmm1, %k1
|
|
; CHECK-NEXT: vaddph (%rdi), %zmm0, %zmm0 {%k1}
|
|
; CHECK-NEXT: retq
|
|
%mask = fcmp one <32 x half> %mask1, zeroinitializer
|
|
%j = load <32 x half>, <32 x half>* %j.ptr
|
|
%x = fadd <32 x half> %i, %j
|
|
%r = select <32 x i1> %mask, <32 x half> %x, <32 x half> %i
|
|
ret <32 x half> %r
|
|
}
|
|
|
|
define <32 x half> @vaddph_512_maskz_fold_test(<32 x half> %i, <32 x half>* %j.ptr, <32 x half> %mask1) nounwind readnone {
|
|
; CHECK-LABEL: vaddph_512_maskz_fold_test:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
; CHECK-NEXT: vcmpneq_oqph %zmm2, %zmm1, %k1
|
|
; CHECK-NEXT: vaddph (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%mask = fcmp one <32 x half> %mask1, zeroinitializer
|
|
%j = load <32 x half>, <32 x half>* %j.ptr
|
|
%x = fadd <32 x half> %i, %j
|
|
%r = select <32 x i1> %mask, <32 x half> %x, <32 x half> zeroinitializer
|
|
ret <32 x half> %r
|
|
}
|
|
|
|
define <32 x half> @vaddph_512_maskz_fold_test_2(<32 x half> %i, <32 x half>* %j.ptr, <32 x half> %mask1) nounwind readnone {
|
|
; CHECK-LABEL: vaddph_512_maskz_fold_test_2:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
|
; CHECK-NEXT: vcmpneq_oqph %zmm2, %zmm1, %k1
|
|
; CHECK-NEXT: vaddph (%rdi), %zmm0, %zmm0 {%k1} {z}
|
|
; CHECK-NEXT: retq
|
|
%mask = fcmp one <32 x half> %mask1, zeroinitializer
|
|
%j = load <32 x half>, <32 x half>* %j.ptr
|
|
%x = fadd <32 x half> %j, %i
|
|
%r = select <32 x i1> %mask, <32 x half> %x, <32 x half> zeroinitializer
|
|
ret <32 x half> %r
|
|
}
|
|
|
|
define <32 x half> @vsubph_512_test(<32 x half> %i, <32 x half> %j) nounwind readnone {
|
|
; CHECK-LABEL: vsubph_512_test:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vsubph %zmm1, %zmm0, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x = fsub <32 x half> %i, %j
|
|
ret <32 x half> %x
|
|
}
|
|
|
|
define <32 x half> @vmulph_512_test(<32 x half> %i, <32 x half> %j) nounwind readnone {
|
|
; CHECK-LABEL: vmulph_512_test:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vmulph %zmm1, %zmm0, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x = fmul <32 x half> %i, %j
|
|
ret <32 x half> %x
|
|
}
|
|
|
|
define <32 x half> @vdivph_512_test(<32 x half> %i, <32 x half> %j) nounwind readnone {
|
|
; CHECK-LABEL: vdivph_512_test:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vdivph %zmm1, %zmm0, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%x = fdiv <32 x half> %i, %j
|
|
ret <32 x half> %x
|
|
}
|
|
|
|
define half @add_sh(half %i, half %j, half* %x.ptr) nounwind readnone {
|
|
; CHECK-LABEL: add_sh:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vaddsh (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%x = load half, half* %x.ptr
|
|
%y = fadd half %i, %j
|
|
%r = fadd half %x, %y
|
|
ret half %r
|
|
}
|
|
|
|
define half @sub_sh(half %i, half %j, half* %x.ptr) nounwind readnone {
|
|
; CHECK-LABEL: sub_sh:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vmovsh (%rdi), %xmm2
|
|
; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vsubsh %xmm0, %xmm2, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%x = load half, half* %x.ptr
|
|
%y = fsub half %i, %j
|
|
%r = fsub half %x, %y
|
|
ret half %r
|
|
}
|
|
|
|
define half @sub_sh_2(half %i, half %j, half* %x.ptr) nounwind readnone {
|
|
; CHECK-LABEL: sub_sh_2:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vsubsh (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%x = load half, half* %x.ptr
|
|
%y = fsub half %i, %j
|
|
%r = fsub half %y, %x
|
|
ret half %r
|
|
}
|
|
|
|
define half @mul_sh(half %i, half %j, half* %x.ptr) nounwind readnone {
|
|
; CHECK-LABEL: mul_sh:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vmulsh %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vmulsh (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%x = load half, half* %x.ptr
|
|
%y = fmul half %i, %j
|
|
%r = fmul half %x, %y
|
|
ret half %r
|
|
}
|
|
|
|
define half @div_sh(half %i, half %j, half* %x.ptr) nounwind readnone {
|
|
; CHECK-LABEL: div_sh:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vmovsh (%rdi), %xmm2
|
|
; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vdivsh %xmm0, %xmm2, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%x = load half, half* %x.ptr
|
|
%y = fdiv half %i, %j
|
|
%r = fdiv half %x, %y
|
|
ret half %r
|
|
}
|
|
|
|
define half @div_sh_2(half %i, half %j, half* %x.ptr) nounwind readnone {
|
|
; CHECK-LABEL: div_sh_2:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vdivsh %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vdivsh (%rdi), %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%x = load half, half* %x.ptr
|
|
%y = fdiv half %i, %j
|
|
%r = fdiv half %y, %x
|
|
ret half %r
|
|
}
|
|
|
|
define i1 @cmp_une_sh(half %x, half %y) {
|
|
; CHECK-LABEL: cmp_une_sh:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: vcmpneqsh %xmm1, %xmm0, %k0
|
|
; CHECK-NEXT: kmovd %k0, %eax
|
|
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = fcmp une half %x, %y
|
|
ret i1 %0
|
|
}
|
|
|
|
define i1 @cmp_oeq_sh(half %x, half %y) {
|
|
; CHECK-LABEL: cmp_oeq_sh:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: vcmpeqsh %xmm1, %xmm0, %k0
|
|
; CHECK-NEXT: kmovd %k0, %eax
|
|
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = fcmp oeq half %x, %y
|
|
ret i1 %0
|
|
}
|
|
|
|
define i1 @cmp_olt_sh(half %x, half %y) {
|
|
; CHECK-LABEL: cmp_olt_sh:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: vucomish %xmm0, %xmm1
|
|
; CHECK-NEXT: seta %al
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = fcmp olt half %x, %y
|
|
ret i1 %0
|
|
}
|
|
|
|
define <32 x i1> @cmp_ph(<32 x half> %x, <32 x half> %y) {
|
|
; CHECK-LABEL: cmp_ph:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: vcmpneqph %zmm1, %zmm0, %k0
|
|
; CHECK-NEXT: vpmovm2b %k0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = fcmp une <32 x half> %x, %y
|
|
ret <32 x i1> %0
|
|
}
|
|
|
|
define half @fneg(half %x) {
|
|
; CHECK-LABEL: fneg:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%a = fneg half %x
|
|
ret half %a
|
|
}
|
|
|
|
define half @fneg_idiom(half %x) {
|
|
; CHECK-LABEL: fneg_idiom:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%a = fsub half -0.0, %x
|
|
ret half %a
|
|
}
|
|
|
|
define half @fabs(half %x) {
|
|
; CHECK-LABEL: fabs:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
|
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%a = call half @llvm.fabs.f16(half %x)
|
|
ret half %a
|
|
}
|
|
declare half @llvm.fabs.f16(half)
|
|
|
|
define half @fcopysign(half %x, half %y) {
|
|
; CHECK-LABEL: fcopysign:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
|
; CHECK-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%a = call half @llvm.copysign.f16(half %x, half %y)
|
|
ret half %a
|
|
}
|
|
declare half @llvm.copysign.f16(half, half)
|
|
|
|
define <8 x half> @fnegv8f16(<8 x half> %x) {
|
|
; CHECK-LABEL: fnegv8f16:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%a = fneg <8 x half> %x
|
|
ret <8 x half> %a
|
|
}
|
|
|
|
define <8 x half> @fneg_idiomv8f16(<8 x half> %x) {
|
|
; CHECK-LABEL: fneg_idiomv8f16:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
|
|
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%a = fsub <8 x half> <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>, %x
|
|
ret <8 x half> %a
|
|
}
|
|
|
|
define <8 x half> @fabsv8f16(<8 x half> %x) {
|
|
; CHECK-LABEL: fabsv8f16:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
|
; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%a = call <8 x half> @llvm.fabs.v8f16(<8 x half> %x)
|
|
ret <8 x half> %a
|
|
}
|
|
declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
|
|
|
|
define <8 x half> @fcopysignv8f16(<8 x half> %x, <8 x half> %y) {
|
|
; CHECK-LABEL: fcopysignv8f16:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
|
; CHECK-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%a = call <8 x half> @llvm.copysign.v8f16(<8 x half> %x, <8 x half> %y)
|
|
ret <8 x half> %a
|
|
}
|
|
declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
|
|
|
|
define <16 x half> @fnegv16f16(<16 x half> %x) {
|
|
; CHECK-LABEL: fnegv16f16:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
|
|
; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%a = fneg <16 x half> %x
|
|
ret <16 x half> %a
|
|
}
|
|
|
|
define <16 x half> @fneg_idiomv16f16(<16 x half> %x) {
|
|
; CHECK-LABEL: fneg_idiomv16f16:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
|
|
; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%a = fsub <16 x half> <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>, %x
|
|
ret <16 x half> %a
|
|
}
|
|
|
|
define <16 x half> @fabsv16f16(<16 x half> %x) {
|
|
; CHECK-LABEL: fabsv16f16:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
|
; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%a = call <16 x half> @llvm.fabs.v16f16(<16 x half> %x)
|
|
ret <16 x half> %a
|
|
}
|
|
declare <16 x half> @llvm.fabs.v16f16(<16 x half>)
|
|
|
|
define <16 x half> @fcopysignv16f16(<16 x half> %x, <16 x half> %y) {
|
|
; CHECK-LABEL: fcopysignv16f16:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
|
; CHECK-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0
|
|
; CHECK-NEXT: retq
|
|
%a = call <16 x half> @llvm.copysign.v16f16(<16 x half> %x, <16 x half> %y)
|
|
ret <16 x half> %a
|
|
}
|
|
declare <16 x half> @llvm.copysign.v16f16(<16 x half>, <16 x half>)
|
|
|
|
define <32 x half> @fnegv32f16(<32 x half> %x) {
|
|
; CHECK-LABEL: fnegv32f16:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} zmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
|
|
; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%a = fneg <32 x half> %x
|
|
ret <32 x half> %a
|
|
}
|
|
|
|
define <32 x half> @fneg_idiomv32f16(<32 x half> %x) {
|
|
; CHECK-LABEL: fneg_idiomv32f16:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} zmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
|
|
; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%a = fsub <32 x half> <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>, %x
|
|
ret <32 x half> %a
|
|
}
|
|
|
|
define <32 x half> @fabsv32f16(<32 x half> %x) {
|
|
; CHECK-LABEL: fabsv32f16:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} zmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
|
; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%a = call <32 x half> @llvm.fabs.v32f16(<32 x half> %x)
|
|
ret <32 x half> %a
|
|
}
|
|
declare <32 x half> @llvm.fabs.v32f16(<32 x half>)
|
|
|
|
define <32 x half> @fcopysignv32f16(<32 x half> %x, <32 x half> %y) {
|
|
; CHECK-LABEL: fcopysignv32f16:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: vpbroadcastw {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
|
|
; CHECK-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%a = call <32 x half> @llvm.copysign.v32f16(<32 x half> %x, <32 x half> %y)
|
|
ret <32 x half> %a
|
|
}
|
|
declare <32 x half> @llvm.copysign.v32f16(<32 x half>, <32 x half>)
|
|
|
|
define <8 x half> @regression_test1(<8 x half> %x, <8 x half> %y) #0 {
|
|
; CHECK-LABEL: regression_test1:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm2
|
|
; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3],xmm2[4],xmm0[5],xmm2[6],xmm0[7]
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%a = fsub <8 x half> %x, %y
|
|
%b = fadd <8 x half> %x, %y
|
|
%c = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
|
|
ret <8 x half> %c
|
|
}
|
|
|
|
define <8 x i16> @regression_test2(<8 x float> %x) #0 {
|
|
; CHECK-LABEL: regression_test2:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0
|
|
; CHECK-NEXT: vpmovdw %ymm0, %xmm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%a = fptoui <8 x float> %x to <8 x i16>
|
|
ret <8 x i16> %a
|
|
}
|
|
|
|
define <8 x i16> @regression_test3(<8 x float> %x) #0 {
|
|
; CHECK-LABEL: regression_test3:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
|
|
; CHECK-NEXT: vpmovdw %ymm0, %xmm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%a = fptosi <8 x float> %x to <8 x i16>
|
|
ret <8 x i16> %a
|
|
}
|
|
|
|
define <8 x i16> @regression_test4(<8 x double> %x) #0 {
|
|
; CHECK-LABEL: regression_test4:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm0
|
|
; CHECK-NEXT: vpmovdw %ymm0, %xmm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%a = fptoui <8 x double> %x to <8 x i16>
|
|
ret <8 x i16> %a
|
|
}
|
|
|
|
define <8 x i16> @regression_test5(<8 x double> %x) #0 {
|
|
; CHECK-LABEL: regression_test5:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0
|
|
; CHECK-NEXT: vpmovdw %ymm0, %xmm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%a = fptosi <8 x double> %x to <8 x i16>
|
|
ret <8 x i16> %a
|
|
}
|
|
|
|
define <8 x i1> @fcmp_v8f16(<8 x half> %a, <8 x half> %b)
|
|
; CHECK-LABEL: fcmp_v8f16:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: vcmpeqph %xmm1, %xmm0, %k0
|
|
; CHECK-NEXT: vpmovm2w %k0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
{
|
|
entry:
|
|
%0 = fcmp oeq <8 x half> %a, %b
|
|
ret <8 x i1> %0
|
|
}
|
|
|
|
define <16 x i1> @fcmp_v16f16(<16 x half> %a, <16 x half> %b)
|
|
; CHECK-LABEL: fcmp_v16f16:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: vcmpeqph %ymm1, %ymm0, %k0
|
|
; CHECK-NEXT: vpmovm2b %k0, %xmm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
{
|
|
entry:
|
|
%0 = fcmp oeq <16 x half> %a, %b
|
|
ret <16 x i1> %0
|
|
}
|
|
|
|
define <32 x i1> @fcmp_v32f16(<32 x half> %a, <32 x half> %b)
|
|
; CHECK-LABEL: fcmp_v32f16:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: vcmpeqph %zmm1, %zmm0, %k0
|
|
; CHECK-NEXT: vpmovm2b %k0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
{
|
|
entry:
|
|
%0 = fcmp oeq <32 x half> %a, %b
|
|
ret <32 x i1> %0
|
|
}
|
|
|
|
define <8 x i16> @zext_fcmp_v8f16(<8 x half> %a, <8 x half> %b)
|
|
; CHECK-LABEL: zext_fcmp_v8f16:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: vcmpeqph %xmm1, %xmm0, %k0
|
|
; CHECK-NEXT: vpmovm2w %k0, %xmm0
|
|
; CHECK-NEXT: vpsrlw $15, %xmm0, %xmm0
|
|
; CHECK-NEXT: retq
|
|
{
|
|
entry:
|
|
%0 = fcmp oeq <8 x half> %a, %b
|
|
%1 = zext <8 x i1> %0 to <8 x i16>
|
|
ret <8 x i16> %1
|
|
}
|
|
|
|
define <16 x i16> @zext_fcmp_v16f16(<16 x half> %a, <16 x half> %b)
|
|
; CHECK-LABEL: zext_fcmp_v16f16:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: vcmpeqph %ymm1, %ymm0, %k0
|
|
; CHECK-NEXT: vpmovm2w %k0, %ymm0
|
|
; CHECK-NEXT: vpsrlw $15, %ymm0, %ymm0
|
|
; CHECK-NEXT: retq
|
|
{
|
|
entry:
|
|
%0 = fcmp oeq <16 x half> %a, %b
|
|
%1 = zext <16 x i1> %0 to <16 x i16>
|
|
ret <16 x i16> %1
|
|
}
|
|
|
|
define <32 x i16> @zext_fcmp_v32f16(<32 x half> %a, <32 x half> %b)
|
|
; CHECK-LABEL: zext_fcmp_v32f16:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: vcmpeqph %zmm1, %zmm0, %k0
|
|
; CHECK-NEXT: vpmovm2w %k0, %zmm0
|
|
; CHECK-NEXT: vpsrlw $15, %zmm0, %zmm0
|
|
; CHECK-NEXT: retq
|
|
{
|
|
entry:
|
|
%0 = fcmp oeq <32 x half> %a, %b
|
|
%1 = zext <32 x i1> %0 to <32 x i16>
|
|
ret <32 x i16> %1
|
|
}
|
|
|