llvm-project/llvm/test/CodeGen/X86/avx512-vec-cmp.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1592 lines
78 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=KNL
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -show-mc-encoding -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
; CHECK-LABEL: test1:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x02]
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp ole <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
ret <16 x float> %max
}
define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
; CHECK-LABEL: test2:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x02]
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = fcmp ole <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
ret <8 x double> %max
}
define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
; CHECK-LABEL: test3:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x0f]
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load <16 x i32>, <16 x i32>* %yp, align 4
%mask = icmp eq <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
; CHECK-LABEL: test4_unsigned:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x05]
; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp uge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
}
define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
; CHECK-LABEL: test5:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc9]
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
ret <8 x i64> %max
}
define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
; CHECK-LABEL: test6_unsigned:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x06]
; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc2]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp ugt <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
ret <8 x i64> %max
}
define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
; AVX512-LABEL: test7:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
; AVX512-NEXT: vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test7:
; SKX: ## %bb.0:
; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x57,0xd2]
; SKX-NEXT: vcmpltps %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xca,0x01]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%mask = fcmp olt <4 x float> %a, zeroinitializer
%c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
ret <4 x float>%c
}
define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
; AVX512-LABEL: test8:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x57,0xd2]
; AVX512-NEXT: vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test8:
; SKX: ## %bb.0:
; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x57,0xd2]
; SKX-NEXT: vcmpltpd %xmm2, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xca,0x01]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%mask = fcmp olt <2 x double> %a, zeroinitializer
%c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
ret <2 x double>%c
}
define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
; AVX512-LABEL: test9:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
; AVX512-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test9:
; SKX: ## %bb.0:
; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc9]
; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x64,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
}
define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
; AVX512-LABEL: test10:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x00]
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test10:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x00]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%mask = fcmp oeq <8 x float> %x, %y
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
ret <8 x float> %max
}
define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
; AVX512-LABEL: test11_unsigned:
; AVX512: ## %bb.0:
; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test11_unsigned:
; SKX: ## %bb.0:
; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
; SKX-NEXT: retq ## encoding: [0xc3]
%mask = icmp ugt <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
}
define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
; KNL-LABEL: test12:
; KNL: ## %bb.0:
; KNL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc2]
; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x29,0xcb]
; KNL-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; KNL-NEXT: retq ## encoding: [0xc3]
;
; AVX512BW-LABEL: test12:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc2]
; AVX512BW-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x29,0xcb]
; AVX512BW-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
; AVX512BW-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512BW-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test12:
; SKX: ## %bb.0:
; SKX-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc2]
; SKX-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x29,0xcb]
; SKX-NEXT: kunpckbw %k0, %k1, %k0 ## encoding: [0xc5,0xf5,0x4b,0xc0]
; SKX-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
%res = icmp eq <16 x i64> %a, %b
%res1 = bitcast <16 x i1> %res to i16
ret i16 %res1
}
define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
; KNL-LABEL: test12_v32i32:
; KNL: ## %bb.0:
; KNL-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc2]
; KNL-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
; KNL-NEXT: vpcmpeqd %zmm3, %zmm1, %k0 ## encoding: [0x62,0xf1,0x75,0x48,0x76,0xc3]
; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; KNL-NEXT: shll $16, %eax ## encoding: [0xc1,0xe0,0x10]
; KNL-NEXT: orl %ecx, %eax ## encoding: [0x09,0xc8]
; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; KNL-NEXT: retq ## encoding: [0xc3]
;
; AVX512BW-LABEL: test12_v32i32:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc2]
; AVX512BW-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x76,0xcb]
; AVX512BW-NEXT: kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
; AVX512BW-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512BW-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test12_v32i32:
; SKX: ## %bb.0:
; SKX-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc2]
; SKX-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x76,0xcb]
; SKX-NEXT: kunpckwd %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x4b,0xc0]
; SKX-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
%res = icmp eq <32 x i32> %a, %b
%res1 = bitcast <32 x i1> %res to i32
ret i32 %res1
}
define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
; KNL-LABEL: test12_v64i16:
; KNL: ## %bb.0:
; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm4 ## encoding: [0xc5,0xfd,0x75,0xe2]
; KNL-NEXT: vpmovsxwd %ymm4, %zmm4 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xe4]
; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0 ## encoding: [0x62,0xf2,0x5d,0x48,0x27,0xc4]
; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; KNL-NEXT: vextracti64x4 $1, %zmm2, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xd2,0x01]
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xc0,0x01]
; KNL-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x75,0xc2]
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
; KNL-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
; KNL-NEXT: shll $16, %ecx ## encoding: [0xc1,0xe1,0x10]
; KNL-NEXT: orl %eax, %ecx ## encoding: [0x09,0xc1]
; KNL-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x75,0xc3]
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
; KNL-NEXT: kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
; KNL-NEXT: vextracti64x4 $1, %zmm3, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xd8,0x01]
; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ## encoding: [0x62,0xf3,0xfd,0x48,0x3b,0xc9,0x01]
; KNL-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0 ## encoding: [0xc5,0xf5,0x75,0xc0]
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc0]
; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; KNL-NEXT: shll $16, %eax ## encoding: [0xc1,0xe0,0x10]
; KNL-NEXT: orl %edx, %eax ## encoding: [0x09,0xd0]
; KNL-NEXT: shlq $32, %rax ## encoding: [0x48,0xc1,0xe0,0x20]
; KNL-NEXT: orq %rcx, %rax ## encoding: [0x48,0x09,0xc8]
; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; KNL-NEXT: retq ## encoding: [0xc3]
;
; AVX512BW-LABEL: test12_v64i16:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpcmpeqw %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc2]
; AVX512BW-NEXT: vpcmpeqw %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x75,0xcb]
; AVX512BW-NEXT: kunpckdq %k0, %k1, %k0 ## encoding: [0xc4,0xe1,0xf4,0x4b,0xc0]
; AVX512BW-NEXT: kmovq %k0, %rax ## encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512BW-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test12_v64i16:
; SKX: ## %bb.0:
; SKX-NEXT: vpcmpeqw %zmm2, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc2]
; SKX-NEXT: vpcmpeqw %zmm3, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x75,0xcb]
; SKX-NEXT: kunpckdq %k0, %k1, %k0 ## encoding: [0xc4,0xe1,0xf4,0x4b,0xc0]
; SKX-NEXT: kmovq %k0, %rax ## encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
%res = icmp eq <64 x i16> %a, %b
%res1 = bitcast <64 x i1> %res to i64
ret i64 %res1
}
define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
; AVX512-LABEL: test13:
; AVX512: ## %bb.0:
; AVX512-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x00]
; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
; AVX512-NEXT: vpsrld $31, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x1f]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test13:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc1,0x00]
; SKX-NEXT: vpmovm2d %k0, %zmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x38,0xc0]
; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x1f]
; SKX-NEXT: retq ## encoding: [0xc3]
{
%cmpvector_i = fcmp oeq <16 x float> %a, %b
%conv = zext <16 x i1> %cmpvector_i to <16 x i32>
ret <16 x i32> %conv
}
define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
; CHECK-LABEL: test14:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc9]
; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
; CHECK-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
%sub_r = sub <16 x i32> %a, %b
%cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
%sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
%mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer
%res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r
ret <16 x i32>%res
}
define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
; CHECK-LABEL: test15:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc9]
; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
%sub_r = sub <8 x i64> %a, %b
%cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
%sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
%mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer
%res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r
ret <8 x i64>%res
}
define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
; CHECK-LABEL: test16:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x05]
; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc2]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask = icmp sge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
}
define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test17:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0x0f]
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sgt <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test18:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0x0f,0x02]
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sle <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
; CHECK-LABEL: test19:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0x0f,0x02]
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp ule <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
; CHECK-LABEL: test20:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc9]
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf1,0x6d,0x49,0x76,0xcb]
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp eq <16 x i32> %x1, %y1
%mask0 = icmp eq <16 x i32> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
ret <16 x i32> %max
}
define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
; CHECK-LABEL: test21:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
; CHECK-NEXT: vpcmpnltq %zmm3, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf3,0xed,0x49,0x1f,0xcb,0x05]
; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x64,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp sge <8 x i64> %x1, %y1
%mask0 = icmp sle <8 x i64> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
ret <8 x i64> %max
}
define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
; CHECK-LABEL: test22:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xca]
; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0x0f]
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp sgt <8 x i64> %x1, %y1
%y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
%mask0 = icmp sgt <8 x i64> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
ret <8 x i64> %max
}
define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
; CHECK-LABEL: test23:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0x0f,0x02]
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp sge <16 x i32> %x1, %y1
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask0 = icmp ule <16 x i32> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
; CHECK-LABEL: test24:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x58,0x29,0x0f]
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
%y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
%mask = icmp eq <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
ret <8 x i64> %max
}
define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
; CHECK-LABEL: test25:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x58,0x1f,0x0f,0x02]
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
%y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
%mask = icmp sle <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
; CHECK-LABEL: test26:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x05]
; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x66,0x0f]
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x64,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp sge <16 x i32> %x1, %y1
%yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
%y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
%mask0 = icmp sgt <16 x i32> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
; CHECK-LABEL: test27:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpcmpnltq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x05]
; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x59,0x1f,0x0f,0x02]
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%mask1 = icmp sge <8 x i64> %x1, %y1
%yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
%y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
%mask0 = icmp sle <8 x i64> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
ret <8 x i64> %max
}
define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
; AVX512-LABEL: test28:
; AVX512: ## %bb.0:
; AVX512-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
; AVX512-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x48,0x37,0xcb]
; AVX512-NEXT: kxnorw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc9]
; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test28:
; SKX: ## %bb.0:
; SKX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
; SKX-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x48,0x37,0xcb]
; SKX-NEXT: kxnorb %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x46,0xc1]
; SKX-NEXT: vpmovm2d %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x38,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%x_gt_y = icmp sgt <8 x i64> %x, %y
%x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
%res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
%resse = sext <8 x i1>%res to <8 x i32>
ret <8 x i32> %resse
}
define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
; KNL-LABEL: test29:
; KNL: ## %bb.0:
; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
; KNL-NEXT: vpcmpgtd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x48,0x66,0xcb]
; KNL-NEXT: kxorw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x47,0xc9]
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
; KNL-NEXT: vpmovdb %zmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; KNL-NEXT: retq ## encoding: [0xc3]
;
; AVX512BW-LABEL: test29:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
; AVX512BW-NEXT: vpcmpgtd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x48,0x66,0xcb]
; AVX512BW-NEXT: kxorw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x47,0xc1]
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512BW-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test29:
; SKX: ## %bb.0:
; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
; SKX-NEXT: vpcmpgtd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6d,0x48,0x66,0xcb]
; SKX-NEXT: kxorw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x47,0xc1]
; SKX-NEXT: vpmovm2b %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
%x_gt_y = icmp sgt <16 x i32> %x, %y
%x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
%res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
%resse = sext <16 x i1>%res to <16 x i8>
ret <16 x i8> %resse
}
define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
; AVX512-LABEL: test30:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x00]
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test30:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc9,0x00]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%mask = fcmp oeq <4 x double> %x, %y
%max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
ret <4 x double> %max
}
define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
; AVX512-LABEL: test31:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vmovupd (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x10,0x17]
; AVX512-NEXT: vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test31:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%y = load <2 x double>, <2 x double>* %yp, align 4
%mask = fcmp olt <2 x double> %x, %y
%max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
ret <2 x double> %max
}
define <2 x double> @test31_commute(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
; AVX512-LABEL: test31_commute:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vmovupd (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x10,0x17]
; AVX512-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01]
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test31_commute:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpgtpd (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%y = load <2 x double>, <2 x double>* %yp, align 4
%mask = fcmp olt <2 x double> %y, %x
%max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
ret <2 x double> %max
}
define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
; AVX512-LABEL: test32:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vmovupd (%rdi), %ymm2 ## encoding: [0xc5,0xfd,0x10,0x17]
; AVX512-NEXT: vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01]
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test32:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%y = load <4 x double>, <4 x double>* %yp, align 4
%mask = fcmp ogt <4 x double> %y, %x
%max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
ret <4 x double> %max
}
define <4 x double> @test32_commute(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
; AVX512-LABEL: test32_commute:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vmovupd (%rdi), %ymm2 ## encoding: [0xc5,0xfd,0x10,0x17]
; AVX512-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01]
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test32_commute:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpgtpd (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%y = load <4 x double>, <4 x double>* %yp, align 4
%mask = fcmp ogt <4 x double> %x, %y
%max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
ret <4 x double> %max
}
define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
; CHECK-LABEL: test33:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x01]
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load <8 x double>, <8 x double>* %yp, align 4
%mask = fcmp olt <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
ret <8 x double> %max
}
define <8 x double> @test33_commute(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
; CHECK-LABEL: test33_commute:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0x0f,0x0e]
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load <8 x double>, <8 x double>* %yp, align 4
%mask = fcmp olt <8 x double> %y, %x
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
ret <8 x double> %max
}
define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
; AVX512-LABEL: test34:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vmovups (%rdi), %xmm2 ## encoding: [0xc5,0xf8,0x10,0x17]
; AVX512-NEXT: vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test34:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%y = load <4 x float>, <4 x float>* %yp, align 4
%mask = fcmp olt <4 x float> %x, %y
%max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
ret <4 x float> %max
}
define <4 x float> @test34_commute(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
; AVX512-LABEL: test34_commute:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vmovups (%rdi), %xmm2 ## encoding: [0xc5,0xf8,0x10,0x17]
; AVX512-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01]
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test34_commute:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpgtps (%rdi), %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%y = load <4 x float>, <4 x float>* %yp, align 4
%mask = fcmp olt <4 x float> %y, %x
%max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
ret <4 x float> %max
}
define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
; AVX512-LABEL: test35:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vmovups (%rdi), %ymm2 ## encoding: [0xc5,0xfc,0x10,0x17]
; AVX512-NEXT: vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01]
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test35:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%y = load <8 x float>, <8 x float>* %yp, align 4
%mask = fcmp ogt <8 x float> %y, %x
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
ret <8 x float> %max
}
define <8 x float> @test35_commute(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
; AVX512-LABEL: test35_commute:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vmovups (%rdi), %ymm2 ## encoding: [0xc5,0xfc,0x10,0x17]
; AVX512-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01]
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test35_commute:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpgtps (%rdi), %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%y = load <8 x float>, <8 x float>* %yp, align 4
%mask = fcmp ogt <8 x float> %x, %y
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
ret <8 x float> %max
}
define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
; CHECK-LABEL: test36:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x01]
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load <16 x float>, <16 x float>* %yp, align 4
%mask = fcmp olt <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
ret <16 x float> %max
}
define <16 x float> @test36_commute(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
; CHECK-LABEL: test36_commute:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcmpgtps (%rdi), %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0x0f,0x0e]
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%y = load <16 x float>, <16 x float>* %yp, align 4
%mask = fcmp olt <16 x float> %y, %x
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
ret <16 x float> %max
}
define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
; CHECK-LABEL: test37:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%a = load double, double* %ptr
%v = insertelement <8 x double> undef, double %a, i32 0
%shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
%mask = fcmp ogt <8 x double> %shuffle, %x
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
ret <8 x double> %max
}
define <8 x double> @test37_commute(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
; CHECK-LABEL: test37_commute:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%a = load double, double* %ptr
%v = insertelement <8 x double> undef, double %a, i32 0
%shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
%mask = fcmp ogt <8 x double> %x, %shuffle
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
ret <8 x double> %max
}
define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
; AVX512-LABEL: test38:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test38:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%a = load double, double* %ptr
%v = insertelement <4 x double> undef, double %a, i32 0
%shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
%mask = fcmp ogt <4 x double> %shuffle, %x
%max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
ret <4 x double> %max
}
define <4 x double> @test38_commute(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
; AVX512-LABEL: test38_commute:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test38_commute:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpgtpd (%rdi){1to4}, %ymm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x38,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%a = load double, double* %ptr
%v = insertelement <4 x double> undef, double %a, i32 0
%shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
%mask = fcmp ogt <4 x double> %x, %shuffle
%max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
ret <4 x double> %max
}
define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
; AVX512-LABEL: test39:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test39:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%a = load double, double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%mask = fcmp ogt <2 x double> %shuffle, %x
%max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
ret <2 x double> %max
}
define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
; AVX512-LABEL: test39_commute:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test39_commute:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpgtpd (%rdi){1to2}, %xmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x18,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%a = load double, double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%mask = fcmp ogt <2 x double> %x, %shuffle
%max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
ret <2 x double> %max
}
define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind {
; CHECK-LABEL: test40:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%a = load float, float* %ptr
%v = insertelement <16 x float> undef, float %a, i32 0
%shuffle = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%mask = fcmp ogt <16 x float> %shuffle, %x
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
ret <16 x float> %max
}
define <16 x float> @test40_commute(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind {
; CHECK-LABEL: test40_commute:
; CHECK: ## %bb.0:
; CHECK-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
%a = load float, float* %ptr
%v = insertelement <16 x float> undef, float %a, i32 0
%shuffle = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%mask = fcmp ogt <16 x float> %x, %shuffle
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
ret <16 x float> %max
}
define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind {
; AVX512-LABEL: test41:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test41:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%a = load float, float* %ptr
%v = insertelement <8 x float> undef, float %a, i32 0
%shuffle = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%mask = fcmp ogt <8 x float> %shuffle, %x
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
ret <8 x float> %max
}
define <8 x float> @test41_commute(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind {
; AVX512-LABEL: test41_commute:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test41_commute:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpgtps (%rdi){1to8}, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x38,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%a = load float, float* %ptr
%v = insertelement <8 x float> undef, float %a, i32 0
%shuffle = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%mask = fcmp ogt <8 x float> %x, %shuffle
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
ret <8 x float> %max
}
define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind {
; AVX512-LABEL: test42:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01]
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test42:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%a = load float, float* %ptr
%v = insertelement <4 x float> undef, float %a, i32 0
%shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%mask = fcmp ogt <4 x float> %shuffle, %x
%max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
ret <4 x float> %max
}
define <4 x float> @test42_commute(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind {
; AVX512-LABEL: test42_commute:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e]
; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0]
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test42_commute:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpgtps (%rdi){1to4}, %xmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%a = load float, float* %ptr
%v = insertelement <4 x float> undef, float %a, i32 0
%shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%mask = fcmp ogt <4 x float> %x, %shuffle
%max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
ret <4 x float> %max
}
define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
; KNL-LABEL: test43:
; KNL: ## %bb.0:
; KNL-NEXT: vpmovsxwq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd2]
; KNL-NEXT: vpsllq $63, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf2,0x3f]
; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01]
; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x27,0xca]
; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; KNL-NEXT: retq ## encoding: [0xc3]
;
; AVX512BW-LABEL: test43:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpsllw $15, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
; AVX512BW-NEXT: vpmovw2m %zmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x29,0xca]
; AVX512BW-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x01]
; AVX512BW-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512BW-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test43:
; SKX: ## %bb.0:
; SKX-NEXT: vpsllw $15, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
; SKX-NEXT: vpmovw2m %xmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xca]
; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x01]
; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%a = load double, double* %ptr
%v = insertelement <8 x double> undef, double %a, i32 0
%shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
%mask_cmp = fcmp ogt <8 x double> %shuffle, %x
%mask = and <8 x i1> %mask_cmp, %mask_in
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
ret <8 x double> %max
}
define <8 x double> @test43_commute(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
; KNL-LABEL: test43_commute:
; KNL: ## %bb.0:
; KNL-NEXT: vpmovsxwq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd2]
; KNL-NEXT: vpsllq $63, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf2,0x3f]
; KNL-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e]
; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x27,0xca]
; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; KNL-NEXT: retq ## encoding: [0xc3]
;
; AVX512BW-LABEL: test43_commute:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpsllw $15, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
; AVX512BW-NEXT: vpmovw2m %zmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x29,0xca]
; AVX512BW-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x0e]
; AVX512BW-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; AVX512BW-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test43_commute:
; SKX: ## %bb.0:
; SKX-NEXT: vpsllw $15, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf2,0x0f]
; SKX-NEXT: vpmovw2m %xmm2, %k1 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xca]
; SKX-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xc2,0x0f,0x0e]
; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%a = load double, double* %ptr
%v = insertelement <8 x double> undef, double %a, i32 0
%shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
%mask_cmp = fcmp ogt <8 x double> %x, %shuffle
%mask = and <8 x i1> %mask_cmp, %mask_in
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
ret <8 x double> %max
}
define <4 x i32> @test44(<4 x i16> %x, <4 x i16> %y) #0 {
; AVX512-LABEL: test44:
; AVX512: ## %bb.0:
; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x75,0xc1]
; AVX512-NEXT: vpmovsxwd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x23,0xc0]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test44:
; SKX: ## %bb.0:
; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
; SKX-NEXT: vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <4 x i16> %x, %y
%1 = sext <4 x i1> %mask to <4 x i32>
ret <4 x i32> %1
}
define <2 x i64> @test45(<2 x i16> %x, <2 x i16> %y) #0 {
; AVX512-LABEL: test45:
; AVX512: ## %bb.0:
; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x75,0xc1]
; AVX512-NEXT: vpmovzxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x34,0xc0]
; AVX512-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
; AVX512-NEXT: ## fixup A - offset: 4, value: LCPI59_0-4, kind: reloc_riprel_4byte
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test45:
; SKX: ## %bb.0:
; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
; SKX-NEXT: vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
; SKX-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <2 x i16> %x, %y
%1 = zext <2 x i1> %mask to <2 x i64>
ret <2 x i64> %1
}
define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
; AVX512-LABEL: test46:
; AVX512: ## %bb.0:
; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0xc2,0xc1,0x00]
; AVX512-NEXT: vpermilps $212, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0xd4]
; AVX512-NEXT: ## xmm0 = xmm0[0,1,1,3]
; AVX512-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x54,0x05,A,A,A,A]
; AVX512-NEXT: ## fixup A - offset: 4, value: LCPI60_0-4, kind: reloc_riprel_4byte
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test46:
; SKX: ## %bb.0:
; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x00]
; SKX-NEXT: vpmovm2q %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0]
; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x3f]
; SKX-NEXT: retq ## encoding: [0xc3]
%mask = fcmp oeq <2 x float> %x, %y
%1 = zext <2 x i1> %mask to <2 x i64>
ret <2 x i64> %1
}
define <16 x i8> @test47(<16 x i32> %a, <16 x i8> %b, <16 x i8> %c) {
; KNL-LABEL: test47:
; KNL: ## %bb.0:
; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
; KNL-NEXT: vpmovdb %zmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x31,0xc0]
; KNL-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 ## encoding: [0xc4,0xe3,0x69,0x4c,0xc1,0x00]
; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; KNL-NEXT: retq ## encoding: [0xc3]
;
; AVX512BW-LABEL: test47:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
; AVX512BW-NEXT: vpblendmb %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x66,0xc1]
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512BW-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test47:
; SKX: ## %bb.0:
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
; SKX-NEXT: vpblendmb %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x09,0x66,0xc1]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
%cmp = icmp eq <16 x i32> %a, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %c
ret <16 x i8> %res
}
define <16 x i16> @test48(<16 x i32> %a, <16 x i16> %b, <16 x i16> %c) {
; KNL-LABEL: test48:
; KNL: ## %bb.0:
; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
; KNL-NEXT: vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0]
; KNL-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 ## encoding: [0xc4,0xe3,0x6d,0x4c,0xc1,0x00]
; KNL-NEXT: retq ## encoding: [0xc3]
;
; AVX512BW-LABEL: test48:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: ## kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512BW-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
; AVX512BW-NEXT: vpblendmw %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x66,0xc1]
; AVX512BW-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512BW-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test48:
; SKX: ## %bb.0:
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc8]
; SKX-NEXT: vpblendmw %ymm1, %ymm2, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x29,0x66,0xc1]
; SKX-NEXT: retq ## encoding: [0xc3]
%cmp = icmp eq <16 x i32> %a, zeroinitializer
%res = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %c
ret <16 x i16> %res
}
define <8 x i16> @test49(<8 x i64> %a, <8 x i16> %b, <8 x i16> %c) {
; KNL-LABEL: test49:
; KNL: ## %bb.0:
; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc8]
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
; KNL-NEXT: vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0]
; KNL-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 ## encoding: [0xc4,0xe3,0x69,0x4c,0xc1,0x00]
; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; KNL-NEXT: retq ## encoding: [0xc3]
;
; AVX512BW-LABEL: test49:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512BW-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc8]
; AVX512BW-NEXT: vpblendmw %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x66,0xc1]
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512BW-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: test49:
; SKX: ## %bb.0:
; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc8]
; SKX-NEXT: vpblendmw %xmm1, %xmm2, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x09,0x66,0xc1]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
%cmp = icmp eq <8 x i64> %a, zeroinitializer
%res = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %c
ret <8 x i16> %res
}
define i16 @pcmpeq_mem_1(<16 x i32> %a, <16 x i32>* %b) {
; KNL-LABEL: pcmpeq_mem_1:
; KNL: ## %bb.0:
; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; KNL-NEXT: retq ## encoding: [0xc3]
;
; AVX512BW-LABEL: pcmpeq_mem_1:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
; AVX512BW-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512BW-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: pcmpeq_mem_1:
; SKX: ## %bb.0:
; SKX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
; SKX-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
%load = load <16 x i32>, <16 x i32>* %b
%cmp = icmp eq <16 x i32> %a, %load
%cast = bitcast <16 x i1> %cmp to i16
ret i16 %cast
}
; Make sure we use the short pcmpeq encoding like the test above when the memoryo
; operand is in the first argument instead of the second.
define i16 @pcmpeq_mem_2(<16 x i32> %a, <16 x i32>* %b) {
; KNL-LABEL: pcmpeq_mem_2:
; KNL: ## %bb.0:
; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
; KNL-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; KNL-NEXT: retq ## encoding: [0xc3]
;
; AVX512BW-LABEL: pcmpeq_mem_2:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
; AVX512BW-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512BW-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: pcmpeq_mem_2:
; SKX: ## %bb.0:
; SKX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0x07]
; SKX-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
%load = load <16 x i32>, <16 x i32>* %b
%cmp = icmp eq <16 x i32> %load, %a
%cast = bitcast <16 x i1> %cmp to i16
ret i16 %cast
}
; Don't let a degenerate case trigger an infinite loop.
; This should get simplified before it even exists as a vselect node,
; but that does not happen as of this change.
define <2 x i64> @PR41066(<2 x i64> %t0, <2 x double> %x, <2 x double> %y) {
; AVX512-LABEL: PR41066:
; AVX512: ## %bb.0:
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x57,0xc0]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: PR41066:
; SKX: ## %bb.0:
; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%t1 = fcmp ogt <2 x double> %x, %y
%t2 = select <2 x i1> %t1, <2 x i64> <i64 undef, i64 0>, <2 x i64> zeroinitializer
ret <2 x i64> %t2
}
define <4 x i32> @zext_bool_logic(<4 x i64> %cond1, <4 x i64> %cond2, <4 x i32> %x) {
; AVX512-LABEL: zext_bool_logic:
; AVX512: ## %bb.0:
; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vptestnmq %zmm0, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc0]
; AVX512-NEXT: vptestnmq %zmm1, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf6,0x48,0x27,0xc9]
; AVX512-NEXT: korw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x45,0xc9]
; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
; AVX512-NEXT: vpsubd %xmm0, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0xfa,0xc0]
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: zext_bool_logic:
; SKX: ## %bb.0:
; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc0]
; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf6,0x28,0x27,0xc9]
; SKX-NEXT: korw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x45,0xc1]
; SKX-NEXT: vpmovm2d %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0]
; SKX-NEXT: vpsubd %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfa,0xc0]
; SKX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; SKX-NEXT: retq ## encoding: [0xc3]
%a = icmp eq <4 x i64> %cond1, zeroinitializer
%b = icmp eq <4 x i64> %cond2, zeroinitializer
%c = or <4 x i1> %a, %b
%d = zext <4 x i1> %c to <4 x i32>
%e = add <4 x i32> %d, %x
ret <4 x i32> %e
}
; This used to crash in WidenVecRes_SETCC due to generating the wrong
; result type.
define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
; KNL-LABEL: half_vec_compare:
; KNL: ## %bb.0: ## %entry
; KNL-NEXT: movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02]
; KNL-NEXT: movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f]
[LegalizeTypes][X86] Add a new strategy for type legalizing f16 type that softens it to i16, but promotes to f32 around arithmetic ops. This is based on this llvm-dev thread http://lists.llvm.org/pipermail/llvm-dev/2019-December/137521.html The current strategy for f16 is to promote type to float every except where the specific width is required like loads, stores, and bitcasts. This results in rounding occurring in odd places instead of immediately after arithmetic operations. This interacts in weird ways with the __fp16 type in clang which is a storage only type where arithmetic is always promoted to float. InstCombine can remove some fpext/fptruncs around such arithmetic and turn it into arithmetic on half. This wouldn't be so bad if SelectionDAG was able to put those fpext/fpround back in when it promotes. It is also not obvious how to handle to make the existing strategy work with STRICT fp. We need to use STRICT versions of the conversions which require chain operands. But if the conversions are created for a bitcast, there is no place to get an appropriate chain from. This patch implements a different strategy where conversions are emitted directly around arithmetic operations. And otherwise its passed around as an i16 including in arguments and return values. This can result in more conversions between arithmetic operations, but is closer to matching the IR the frontend generates for __fp16. And it will allow us to use the chain from constrained arithmetic nodes to link the STRICT_FP_TO_FP16/STRICT_FP16_TO_FP that will need to be added. I've set it up so that each target can opt into the new behavior. Converting all the targets myself was more than I was able to handle. Differential Revision: https://reviews.llvm.org/D73749
2020-02-01 14:42:07 +08:00
; KNL-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
; KNL-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
; KNL-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1]
; KNL-NEXT: setne %dl ## encoding: [0x0f,0x95,0xc2]
; KNL-NEXT: orb %cl, %dl ## encoding: [0x08,0xca]
; KNL-NEXT: andl $1, %edx ## encoding: [0x83,0xe2,0x01]
[LegalizeTypes][X86] Add a new strategy for type legalizing f16 type that softens it to i16, but promotes to f32 around arithmetic ops. This is based on this llvm-dev thread http://lists.llvm.org/pipermail/llvm-dev/2019-December/137521.html The current strategy for f16 is to promote type to float every except where the specific width is required like loads, stores, and bitcasts. This results in rounding occurring in odd places instead of immediately after arithmetic operations. This interacts in weird ways with the __fp16 type in clang which is a storage only type where arithmetic is always promoted to float. InstCombine can remove some fpext/fptruncs around such arithmetic and turn it into arithmetic on half. This wouldn't be so bad if SelectionDAG was able to put those fpext/fpround back in when it promotes. It is also not obvious how to handle to make the existing strategy work with STRICT fp. We need to use STRICT versions of the conversions which require chain operands. But if the conversions are created for a bitcast, there is no place to get an appropriate chain from. This patch implements a different strategy where conversions are emitted directly around arithmetic operations. And otherwise its passed around as an i16 including in arguments and return values. This can result in more conversions between arithmetic operations, but is closer to matching the IR the frontend generates for __fp16. And it will allow us to use the chain from constrained arithmetic nodes to link the STRICT_FP_TO_FP16/STRICT_FP16_TO_FP that will need to be added. I've set it up so that each target can opt into the new behavior. Converting all the targets myself was more than I was able to handle. Differential Revision: https://reviews.llvm.org/D73749
2020-02-01 14:42:07 +08:00
; KNL-NEXT: kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2]
; KNL-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
[LegalizeTypes][X86] Add a new strategy for type legalizing f16 type that softens it to i16, but promotes to f32 around arithmetic ops. This is based on this llvm-dev thread http://lists.llvm.org/pipermail/llvm-dev/2019-December/137521.html The current strategy for f16 is to promote type to float every except where the specific width is required like loads, stores, and bitcasts. This results in rounding occurring in odd places instead of immediately after arithmetic operations. This interacts in weird ways with the __fp16 type in clang which is a storage only type where arithmetic is always promoted to float. InstCombine can remove some fpext/fptruncs around such arithmetic and turn it into arithmetic on half. This wouldn't be so bad if SelectionDAG was able to put those fpext/fpround back in when it promotes. It is also not obvious how to handle to make the existing strategy work with STRICT fp. We need to use STRICT versions of the conversions which require chain operands. But if the conversions are created for a bitcast, there is no place to get an appropriate chain from. This patch implements a different strategy where conversions are emitted directly around arithmetic operations. And otherwise its passed around as an i16 including in arguments and return values. This can result in more conversions between arithmetic operations, but is closer to matching the IR the frontend generates for __fp16. And it will allow us to use the chain from constrained arithmetic nodes to link the STRICT_FP_TO_FP16/STRICT_FP16_TO_FP that will need to be added. I've set it up so that each target can opt into the new behavior. Converting all the targets myself was more than I was able to handle. Differential Revision: https://reviews.llvm.org/D73749
2020-02-01 14:42:07 +08:00
; KNL-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
; KNL-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
; KNL-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
; KNL-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
; KNL-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
; KNL-NEXT: kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01]
; KNL-NEXT: korw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x45,0xc9]
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
; KNL-NEXT: vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0]
; KNL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
; KNL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
; KNL-NEXT: ## fixup A - offset: 4, value: LCPI68_0-4, kind: reloc_riprel_4byte
; KNL-NEXT: vpextrw $0, %xmm0, (%rsi) ## encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
; KNL-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; KNL-NEXT: retq ## encoding: [0xc3]
;
; AVX512BW-LABEL: half_vec_compare:
; AVX512BW: ## %bb.0: ## %entry
; AVX512BW-NEXT: movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02]
; AVX512BW-NEXT: movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f]
[LegalizeTypes][X86] Add a new strategy for type legalizing f16 type that softens it to i16, but promotes to f32 around arithmetic ops. This is based on this llvm-dev thread http://lists.llvm.org/pipermail/llvm-dev/2019-December/137521.html The current strategy for f16 is to promote type to float every except where the specific width is required like loads, stores, and bitcasts. This results in rounding occurring in odd places instead of immediately after arithmetic operations. This interacts in weird ways with the __fp16 type in clang which is a storage only type where arithmetic is always promoted to float. InstCombine can remove some fpext/fptruncs around such arithmetic and turn it into arithmetic on half. This wouldn't be so bad if SelectionDAG was able to put those fpext/fpround back in when it promotes. It is also not obvious how to handle to make the existing strategy work with STRICT fp. We need to use STRICT versions of the conversions which require chain operands. But if the conversions are created for a bitcast, there is no place to get an appropriate chain from. This patch implements a different strategy where conversions are emitted directly around arithmetic operations. And otherwise its passed around as an i16 including in arguments and return values. This can result in more conversions between arithmetic operations, but is closer to matching the IR the frontend generates for __fp16. And it will allow us to use the chain from constrained arithmetic nodes to link the STRICT_FP_TO_FP16/STRICT_FP16_TO_FP that will need to be added. I've set it up so that each target can opt into the new behavior. Converting all the targets myself was more than I was able to handle. Differential Revision: https://reviews.llvm.org/D73749
2020-02-01 14:42:07 +08:00
; AVX512BW-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
; AVX512BW-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
; AVX512BW-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1]
; AVX512BW-NEXT: setne %dl ## encoding: [0x0f,0x95,0xc2]
; AVX512BW-NEXT: orb %cl, %dl ## encoding: [0x08,0xca]
; AVX512BW-NEXT: andl $1, %edx ## encoding: [0x83,0xe2,0x01]
; AVX512BW-NEXT: kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2]
; AVX512BW-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
[LegalizeTypes][X86] Add a new strategy for type legalizing f16 type that softens it to i16, but promotes to f32 around arithmetic ops. This is based on this llvm-dev thread http://lists.llvm.org/pipermail/llvm-dev/2019-December/137521.html The current strategy for f16 is to promote type to float every except where the specific width is required like loads, stores, and bitcasts. This results in rounding occurring in odd places instead of immediately after arithmetic operations. This interacts in weird ways with the __fp16 type in clang which is a storage only type where arithmetic is always promoted to float. InstCombine can remove some fpext/fptruncs around such arithmetic and turn it into arithmetic on half. This wouldn't be so bad if SelectionDAG was able to put those fpext/fpround back in when it promotes. It is also not obvious how to handle to make the existing strategy work with STRICT fp. We need to use STRICT versions of the conversions which require chain operands. But if the conversions are created for a bitcast, there is no place to get an appropriate chain from. This patch implements a different strategy where conversions are emitted directly around arithmetic operations. And otherwise its passed around as an i16 including in arguments and return values. This can result in more conversions between arithmetic operations, but is closer to matching the IR the frontend generates for __fp16. And it will allow us to use the chain from constrained arithmetic nodes to link the STRICT_FP_TO_FP16/STRICT_FP16_TO_FP that will need to be added. I've set it up so that each target can opt into the new behavior. Converting all the targets myself was more than I was able to handle. Differential Revision: https://reviews.llvm.org/D73749
2020-02-01 14:42:07 +08:00
; AVX512BW-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
; AVX512BW-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
; AVX512BW-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
; AVX512BW-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
; AVX512BW-NEXT: kmovd %ecx, %k1 ## encoding: [0xc5,0xfb,0x92,0xc9]
; AVX512BW-NEXT: kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01]
; AVX512BW-NEXT: korw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x45,0xc1]
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ## encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0]
; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
; AVX512BW-NEXT: ## fixup A - offset: 4, value: LCPI68_0-4, kind: reloc_riprel_4byte
; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
; AVX512BW-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512BW-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: half_vec_compare:
; SKX: ## %bb.0: ## %entry
; SKX-NEXT: movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07]
; SKX-NEXT: movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02]
[LegalizeTypes][X86] Add a new strategy for type legalizing f16 type that softens it to i16, but promotes to f32 around arithmetic ops. This is based on this llvm-dev thread http://lists.llvm.org/pipermail/llvm-dev/2019-December/137521.html The current strategy for f16 is to promote type to float every except where the specific width is required like loads, stores, and bitcasts. This results in rounding occurring in odd places instead of immediately after arithmetic operations. This interacts in weird ways with the __fp16 type in clang which is a storage only type where arithmetic is always promoted to float. InstCombine can remove some fpext/fptruncs around such arithmetic and turn it into arithmetic on half. This wouldn't be so bad if SelectionDAG was able to put those fpext/fpround back in when it promotes. It is also not obvious how to handle to make the existing strategy work with STRICT fp. We need to use STRICT versions of the conversions which require chain operands. But if the conversions are created for a bitcast, there is no place to get an appropriate chain from. This patch implements a different strategy where conversions are emitted directly around arithmetic operations. And otherwise its passed around as an i16 including in arguments and return values. This can result in more conversions between arithmetic operations, but is closer to matching the IR the frontend generates for __fp16. And it will allow us to use the chain from constrained arithmetic nodes to link the STRICT_FP_TO_FP16/STRICT_FP16_TO_FP that will need to be added. I've set it up so that each target can opt into the new behavior. Converting all the targets myself was more than I was able to handle. Differential Revision: https://reviews.llvm.org/D73749
2020-02-01 14:42:07 +08:00
; SKX-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; SKX-NEXT: vcvtph2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x57,0xc9]
; SKX-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
; SKX-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1]
; SKX-NEXT: setne %dl ## encoding: [0x0f,0x95,0xc2]
; SKX-NEXT: orb %cl, %dl ## encoding: [0x08,0xca]
; SKX-NEXT: kmovd %edx, %k0 ## encoding: [0xc5,0xfb,0x92,0xc2]
; SKX-NEXT: kshiftlb $1, %k0, %k0 ## encoding: [0xc4,0xe3,0x79,0x32,0xc0,0x01]
; SKX-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; SKX-NEXT: vcvtph2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
[LegalizeTypes][X86] Add a new strategy for type legalizing f16 type that softens it to i16, but promotes to f32 around arithmetic ops. This is based on this llvm-dev thread http://lists.llvm.org/pipermail/llvm-dev/2019-December/137521.html The current strategy for f16 is to promote type to float every except where the specific width is required like loads, stores, and bitcasts. This results in rounding occurring in odd places instead of immediately after arithmetic operations. This interacts in weird ways with the __fp16 type in clang which is a storage only type where arithmetic is always promoted to float. InstCombine can remove some fpext/fptruncs around such arithmetic and turn it into arithmetic on half. This wouldn't be so bad if SelectionDAG was able to put those fpext/fpround back in when it promotes. It is also not obvious how to handle to make the existing strategy work with STRICT fp. We need to use STRICT versions of the conversions which require chain operands. But if the conversions are created for a bitcast, there is no place to get an appropriate chain from. This patch implements a different strategy where conversions are emitted directly around arithmetic operations. And otherwise its passed around as an i16 including in arguments and return values. This can result in more conversions between arithmetic operations, but is closer to matching the IR the frontend generates for __fp16. And it will allow us to use the chain from constrained arithmetic nodes to link the STRICT_FP_TO_FP16/STRICT_FP16_TO_FP that will need to be added. I've set it up so that each target can opt into the new behavior. Converting all the targets myself was more than I was able to handle. Differential Revision: https://reviews.llvm.org/D73749
2020-02-01 14:42:07 +08:00
; SKX-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
; SKX-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
; SKX-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
; SKX-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
; SKX-NEXT: kmovd %ecx, %k1 ## encoding: [0xc5,0xfb,0x92,0xc9]
; SKX-NEXT: kshiftlb $7, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x32,0xc9,0x07]
; SKX-NEXT: kshiftrb $7, %k1, %k1 ## encoding: [0xc4,0xe3,0x79,0x30,0xc9,0x07]
; SKX-NEXT: korw %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x45,0xc0]
; SKX-NEXT: vpmovm2w %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
; SKX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc0]
; SKX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
; SKX-NEXT: ## fixup A - offset: 4, value: LCPI68_0-4, kind: reloc_riprel_4byte
; SKX-NEXT: vpextrw $0, %xmm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x15,0x06,0x00]
; SKX-NEXT: retq ## encoding: [0xc3]
entry:
%0 = load <2 x half>, <2 x half>* %x
%1 = fcmp une <2 x half> %0, zeroinitializer
%2 = zext <2 x i1> %1 to <2 x i8>
store <2 x i8> %2, <2 x i8>* %y
ret void
}
; This test used to end up with the vpcmpgtb on KNL having its operands in the wrong order.
define <8 x i64> @cmp_swap_bug(<16 x i8>* %x, <8 x i64> %y, <8 x i64> %z) {
; KNL-LABEL: cmp_swap_bug:
; KNL: ## %bb.0: ## %entry
; KNL-NEXT: vmovdqa (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x6f,0x17]
; KNL-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; KNL-NEXT: ## encoding: [0xc4,0xe2,0x69,0x00,0x15,A,A,A,A]
; KNL-NEXT: ## fixup A - offset: 5, value: LCPI69_0-4, kind: reloc_riprel_4byte
; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0xef,0xdb]
; KNL-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 ## encoding: [0xc5,0xe1,0x64,0xd2]
; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x21,0xd2]
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x48,0x27,0xca]
; KNL-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
; KNL-NEXT: retq ## encoding: [0xc3]
;
; AVX512BW-LABEL: cmp_swap_bug:
; AVX512BW: ## %bb.0: ## %entry
; AVX512BW-NEXT: vmovdqa (%rdi), %xmm2 ## encoding: [0xc5,0xf9,0x6f,0x17]
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: ## encoding: [0xc4,0xe2,0x69,0x00,0x15,A,A,A,A]
; AVX512BW-NEXT: ## fixup A - offset: 5, value: LCPI69_0-4, kind: reloc_riprel_4byte
; AVX512BW-NEXT: vpmovb2m %zmm2, %k1 ## encoding: [0x62,0xf2,0x7e,0x48,0x29,0xca]
; AVX512BW-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
; AVX512BW-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: cmp_swap_bug:
; SKX: ## %bb.0: ## %entry
; SKX-NEXT: vmovdqa (%rdi), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x17]
; SKX-NEXT: vpmovwb %xmm2, %xmm2 ## encoding: [0x62,0xf2,0x7e,0x08,0x30,0xd2]
; SKX-NEXT: vpmovb2m %xmm2, %k1 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xca]
; SKX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x64,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
entry:
%0 = load <16 x i8>, <16 x i8>* %x
%1 = shufflevector <16 x i8> %0, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%2 = icmp slt <8 x i8> %1, zeroinitializer
%3 = select <8 x i1> %2, <8 x i64> %y, <8 x i64> %z
ret <8 x i64> %3
}
define <2 x i32> @narrow_cmp_select_reverse(<2 x i64> %x, <2 x i32> %y) nounwind {
; AVX512-LABEL: narrow_cmp_select_reverse:
; AVX512: ## %bb.0:
; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0xef,0xd2]
; AVX512-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x29,0xc2]
; AVX512-NEXT: vpshufd $232, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x70,0xc0,0xe8]
; AVX512-NEXT: ## xmm0 = xmm0[0,2,2,3]
; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdf,0xc1]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: narrow_cmp_select_reverse:
; SKX: ## %bb.0:
; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
; SKX-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xc1]
; SKX-NEXT: retq ## encoding: [0xc3]
%mask = icmp eq <2 x i64> %x, zeroinitializer
%res = select <2 x i1> %mask, <2 x i32> zeroinitializer, <2 x i32> %y
ret <2 x i32> %res
}