Updated and extended the information about each instruction in HSW and SNB to include the following data:

•static latency
•number of uOps from which the instructions consists
•all ports used by the instruction

Reviewers: 
 RKSimon 
 zvi  
aymanmus  
m_zuckerman 

Differential Revision: https://reviews.llvm.org/D33897
 

llvm-svn: 306414
This commit is contained in:
Gadi Haber 2017-06-27 15:05:13 +00:00
parent a179d25b99
commit 13759a7ed6
33 changed files with 10003 additions and 5705 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1619,10 +1619,10 @@ define <8 x float> @test_gather_mask(<8 x float> %a0, float* %a, <8 x i32> %idx
; ;
; AVX512VL-LABEL: test_gather_mask: ; AVX512VL-LABEL: test_gather_mask:
; AVX512VL: ## BB#0: ; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; AVX512VL-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda]
; AVX512VL-NEXT: vgatherdps %ymm3, (%eax,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x88]
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
; AVX512VL-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda]
; AVX512VL-NEXT: vgatherdps %ymm3, (%ecx,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x89]
; AVX512VL-NEXT: vmovups %ymm2, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x10] ; AVX512VL-NEXT: vmovups %ymm2, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x10]
; AVX512VL-NEXT: retl ## encoding: [0xc3] ; AVX512VL-NEXT: retl ## encoding: [0xc3]
%a_i8 = bitcast float* %a to i8* %a_i8 = bitcast float* %a to i8*

View File

@ -9,7 +9,7 @@ define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [5:0.50] ; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [5:0.50]
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_pabsb: ; ZNVER1-LABEL: test_pabsb:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -29,9 +29,9 @@ define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
; HASWELL-LABEL: test_pabsd: ; HASWELL-LABEL: test_pabsd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [5:0.50] ; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [1:0.50]
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_pabsd: ; ZNVER1-LABEL: test_pabsd:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -51,9 +51,9 @@ define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
; HASWELL-LABEL: test_pabsw: ; HASWELL-LABEL: test_pabsw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [5:0.50] ; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [1:0.50]
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_pabsw: ; ZNVER1-LABEL: test_pabsw:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -74,7 +74,7 @@ define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_paddb: ; ZNVER1-LABEL: test_paddb:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -92,7 +92,7 @@ define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_paddd: ; ZNVER1-LABEL: test_paddd:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -109,8 +109,8 @@ define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; HASWELL-LABEL: test_paddq: ; HASWELL-LABEL: test_paddq:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_paddq: ; ZNVER1-LABEL: test_paddq:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -128,7 +128,7 @@ define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_paddw: ; ZNVER1-LABEL: test_paddw:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -145,9 +145,9 @@ define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; HASWELL-LABEL: test_pand: ; HASWELL-LABEL: test_pand:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_pand: ; ZNVER1-LABEL: test_pand:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -166,9 +166,9 @@ define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; HASWELL-LABEL: test_pandn: ; HASWELL-LABEL: test_pandn:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [5:0.50] ; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [1:0.50]
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_pandn: ; ZNVER1-LABEL: test_pandn:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -190,7 +190,7 @@ define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00] ; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [10:2.00] ; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_pmulld: ; ZNVER1-LABEL: test_pmulld:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -207,8 +207,8 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
; HASWELL-LABEL: test_pmullw: ; HASWELL-LABEL: test_pmullw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] ; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_pmullw: ; ZNVER1-LABEL: test_pmullw:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -225,9 +225,9 @@ define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; HASWELL-LABEL: test_por: ; HASWELL-LABEL: test_por:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_por: ; ZNVER1-LABEL: test_por:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -246,8 +246,8 @@ define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
; HASWELL-LABEL: test_psubb: ; HASWELL-LABEL: test_psubb:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_psubb: ; ZNVER1-LABEL: test_psubb:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -264,8 +264,8 @@ define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
; HASWELL-LABEL: test_psubd: ; HASWELL-LABEL: test_psubd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_psubd: ; ZNVER1-LABEL: test_psubd:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -282,8 +282,8 @@ define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; HASWELL-LABEL: test_psubq: ; HASWELL-LABEL: test_psubq:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_psubq: ; ZNVER1-LABEL: test_psubq:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -300,8 +300,8 @@ define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
; HASWELL-LABEL: test_psubw: ; HASWELL-LABEL: test_psubw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_psubw: ; ZNVER1-LABEL: test_psubw:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:
@ -318,9 +318,9 @@ define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
; HASWELL-LABEL: test_pxor: ; HASWELL-LABEL: test_pxor:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; ZNVER1-LABEL: test_pxor: ; ZNVER1-LABEL: test_pxor:
; ZNVER1: # BB#0: ; ZNVER1: # BB#0:

View File

@ -381,6 +381,7 @@ define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 ; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; X32-NEXT: vzeroupper ; X32-NEXT: vzeroupper
; X32-NEXT: retl ; X32-NEXT: retl
; X32-NEXT: ## -- End function
; ;
; X64-LABEL: srl_trunc_and_v4i64: ; X64-LABEL: srl_trunc_and_v4i64:
; X64: ## BB#0: ; X64: ## BB#0:
@ -391,6 +392,7 @@ define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; X64-NEXT: vzeroupper ; X64-NEXT: vzeroupper
; X64-NEXT: retq ; X64-NEXT: retq
; X64-NEXT: ## -- End function
%and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8> %and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8>
%trunc = trunc <4 x i64> %and to <4 x i32> %trunc = trunc <4 x i64> %and to <4 x i32>
%sra = lshr <4 x i32> %x, %trunc %sra = lshr <4 x i32> %x, %trunc
@ -412,6 +414,7 @@ define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper ; X32-NEXT: vzeroupper
; X32-NEXT: retl ; X32-NEXT: retl
; X32-NEXT: ## -- End function
; ;
; X64-LABEL: shl_8i16: ; X64-LABEL: shl_8i16:
; X64: ## BB#0: ; X64: ## BB#0:
@ -423,6 +426,7 @@ define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper ; X64-NEXT: vzeroupper
; X64-NEXT: retq ; X64-NEXT: retq
; X64-NEXT: ## -- End function
%shl = shl <8 x i16> %r, %a %shl = shl <8 x i16> %r, %a
ret <8 x i16> %shl ret <8 x i16> %shl
} }
@ -434,13 +438,14 @@ define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X32-NEXT: vpsllvd %ymm3, %ymm4, %ymm3 ; X32-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
; X32-NEXT: vpsrld $16, %ymm3, %ymm3
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; X32-NEXT: vpsrld $16, %ymm3, %ymm1
; X32-NEXT: vpsrld $16, %ymm0, %ymm0 ; X32-NEXT: vpsrld $16, %ymm0, %ymm0
; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 ; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
; X32-NEXT: retl ; X32-NEXT: retl
; X32-NEXT: ## -- End function
; ;
; X64-LABEL: shl_16i16: ; X64-LABEL: shl_16i16:
; X64: ## BB#0: ; X64: ## BB#0:
@ -448,13 +453,14 @@ define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X64-NEXT: vpsllvd %ymm3, %ymm4, %ymm3 ; X64-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
; X64-NEXT: vpsrld $16, %ymm3, %ymm3
; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; X64-NEXT: vpsrld $16, %ymm3, %ymm1
; X64-NEXT: vpsrld $16, %ymm0, %ymm0 ; X64-NEXT: vpsrld $16, %ymm0, %ymm0
; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 ; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
; X64-NEXT: retq ; X64-NEXT: retq
; X64-NEXT: ## -- End function
%shl = shl <16 x i16> %r, %a %shl = shl <16 x i16> %r, %a
ret <16 x i16> %shl ret <16 x i16> %shl
} }
@ -474,6 +480,7 @@ define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; X32-NEXT: retl ; X32-NEXT: retl
; X32-NEXT: ## -- End function
; ;
; X64-LABEL: shl_32i8: ; X64-LABEL: shl_32i8:
; X64: ## BB#0: ; X64: ## BB#0:
@ -489,6 +496,7 @@ define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; X64-NEXT: retq ; X64-NEXT: retq
; X64-NEXT: ## -- End function
%shl = shl <32 x i8> %r, %a %shl = shl <32 x i8> %r, %a
ret <32 x i8> %shl ret <32 x i8> %shl
} }
@ -504,6 +512,7 @@ define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper ; X32-NEXT: vzeroupper
; X32-NEXT: retl ; X32-NEXT: retl
; X32-NEXT: ## -- End function
; ;
; X64-LABEL: ashr_8i16: ; X64-LABEL: ashr_8i16:
; X64: ## BB#0: ; X64: ## BB#0:
@ -515,6 +524,7 @@ define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper ; X64-NEXT: vzeroupper
; X64-NEXT: retq ; X64-NEXT: retq
; X64-NEXT: ## -- End function
%ashr = ashr <8 x i16> %r, %a %ashr = ashr <8 x i16> %r, %a
ret <8 x i16> %ashr ret <8 x i16> %ashr
} }
@ -526,13 +536,14 @@ define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X32-NEXT: vpsravd %ymm3, %ymm4, %ymm3 ; X32-NEXT: vpsravd %ymm3, %ymm4, %ymm3
; X32-NEXT: vpsrld $16, %ymm3, %ymm3
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0 ; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; X32-NEXT: vpsrld $16, %ymm3, %ymm1
; X32-NEXT: vpsrld $16, %ymm0, %ymm0 ; X32-NEXT: vpsrld $16, %ymm0, %ymm0
; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 ; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
; X32-NEXT: retl ; X32-NEXT: retl
; X32-NEXT: ## -- End function
; ;
; X64-LABEL: ashr_16i16: ; X64-LABEL: ashr_16i16:
; X64: ## BB#0: ; X64: ## BB#0:
@ -540,13 +551,14 @@ define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X64-NEXT: vpsravd %ymm3, %ymm4, %ymm3 ; X64-NEXT: vpsravd %ymm3, %ymm4, %ymm3
; X64-NEXT: vpsrld $16, %ymm3, %ymm3
; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0 ; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; X64-NEXT: vpsrld $16, %ymm3, %ymm1
; X64-NEXT: vpsrld $16, %ymm0, %ymm0 ; X64-NEXT: vpsrld $16, %ymm0, %ymm0
; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 ; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
; X64-NEXT: retq ; X64-NEXT: retq
; X64-NEXT: ## -- End function
%ashr = ashr <16 x i16> %r, %a %ashr = ashr <16 x i16> %r, %a
ret <16 x i16> %ashr ret <16 x i16> %ashr
} }
@ -579,6 +591,7 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X32-NEXT: vpsrlw $8, %ymm0, %ymm0 ; X32-NEXT: vpsrlw $8, %ymm0, %ymm0
; X32-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; X32-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
; X32-NEXT: retl ; X32-NEXT: retl
; X32-NEXT: ## -- End function
; ;
; X64-LABEL: ashr_32i8: ; X64-LABEL: ashr_32i8:
; X64: ## BB#0: ; X64: ## BB#0:
@ -607,6 +620,7 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X64-NEXT: vpsrlw $8, %ymm0, %ymm0 ; X64-NEXT: vpsrlw $8, %ymm0, %ymm0
; X64-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; X64-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
; X64-NEXT: retq ; X64-NEXT: retq
; X64-NEXT: ## -- End function
%ashr = ashr <32 x i8> %r, %a %ashr = ashr <32 x i8> %r, %a
ret <32 x i8> %ashr ret <32 x i8> %ashr
} }
@ -622,6 +636,7 @@ define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper ; X32-NEXT: vzeroupper
; X32-NEXT: retl ; X32-NEXT: retl
; X32-NEXT: ## -- End function
; ;
; X64-LABEL: lshr_8i16: ; X64-LABEL: lshr_8i16:
; X64: ## BB#0: ; X64: ## BB#0:
@ -633,6 +648,7 @@ define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper ; X64-NEXT: vzeroupper
; X64-NEXT: retq ; X64-NEXT: retq
; X64-NEXT: ## -- End function
%lshr = lshr <8 x i16> %r, %a %lshr = lshr <8 x i16> %r, %a
ret <8 x i16> %lshr ret <8 x i16> %lshr
} }
@ -644,13 +660,14 @@ define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X32-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3 ; X32-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
; X32-NEXT: vpsrld $16, %ymm3, %ymm3
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 ; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; X32-NEXT: vpsrld $16, %ymm3, %ymm1
; X32-NEXT: vpsrld $16, %ymm0, %ymm0 ; X32-NEXT: vpsrld $16, %ymm0, %ymm0
; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 ; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
; X32-NEXT: retl ; X32-NEXT: retl
; X32-NEXT: ## -- End function
; ;
; X64-LABEL: lshr_16i16: ; X64-LABEL: lshr_16i16:
; X64: ## BB#0: ; X64: ## BB#0:
@ -658,13 +675,14 @@ define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
; X64-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3 ; X64-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
; X64-NEXT: vpsrld $16, %ymm3, %ymm3
; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; X64-NEXT: vpsrld $16, %ymm3, %ymm1
; X64-NEXT: vpsrld $16, %ymm0, %ymm0 ; X64-NEXT: vpsrld $16, %ymm0, %ymm0
; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 ; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
; X64-NEXT: retq ; X64-NEXT: retq
; X64-NEXT: ## -- End function
%lshr = lshr <16 x i16> %r, %a %lshr = lshr <16 x i16> %r, %a
ret <16 x i16> %lshr ret <16 x i16> %lshr
} }
@ -685,6 +703,7 @@ define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; X32-NEXT: retl ; X32-NEXT: retl
; X32-NEXT: ## -- End function
; ;
; X64-LABEL: lshr_32i8: ; X64-LABEL: lshr_32i8:
; X64: ## BB#0: ; X64: ## BB#0:
@ -701,6 +720,7 @@ define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; X64-NEXT: retq ; X64-NEXT: retq
; X64-NEXT: ## -- End function
%lshr = lshr <32 x i8> %r, %a %lshr = lshr <32 x i8> %r, %a
ret <32 x i8> %lshr ret <32 x i8> %lshr
} }

View File

@ -14,6 +14,7 @@ define double @test1(double %a, double %b) nounwind {
; ALL-NEXT: LBB0_2: ## %l2 ; ALL-NEXT: LBB0_2: ## %l2
; ALL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; ALL-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; ALL-NEXT: retq ; ALL-NEXT: retq
; ALL-NEXT: ## -- End function
%tobool = fcmp une double %a, %b %tobool = fcmp une double %a, %b
br i1 %tobool, label %l1, label %l2 br i1 %tobool, label %l1, label %l2
@ -36,6 +37,7 @@ define float @test2(float %a, float %b) nounwind {
; ALL-NEXT: LBB1_2: ## %l2 ; ALL-NEXT: LBB1_2: ## %l2
; ALL-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; ALL-NEXT: vaddss %xmm1, %xmm0, %xmm0
; ALL-NEXT: retq ; ALL-NEXT: retq
; ALL-NEXT: ## -- End function
%tobool = fcmp olt float %a, %b %tobool = fcmp olt float %a, %b
br i1 %tobool, label %l1, label %l2 br i1 %tobool, label %l1, label %l2
@ -124,11 +126,11 @@ entry:
define i32 @test8(i32 %a1, i32 %a2, i32 %a3) { define i32 @test8(i32 %a1, i32 %a2, i32 %a3) {
; ALL-LABEL: test8: ; ALL-LABEL: test8:
; ALL: ## BB#0: ; ALL: ## BB#0:
; ALL-NEXT: notl %edi
; ALL-NEXT: xorl $-2147483648, %esi ## imm = 0x80000000 ; ALL-NEXT: xorl $-2147483648, %esi ## imm = 0x80000000
; ALL-NEXT: testl %edx, %edx ; ALL-NEXT: testl %edx, %edx
; ALL-NEXT: movl $1, %eax ; ALL-NEXT: movl $1, %eax
; ALL-NEXT: cmovel %eax, %edx ; ALL-NEXT: cmovel %eax, %edx
; ALL-NEXT: notl %edi
; ALL-NEXT: orl %edi, %esi ; ALL-NEXT: orl %edi, %esi
; ALL-NEXT: cmovnel %edx, %eax ; ALL-NEXT: cmovnel %edx, %eax
; ALL-NEXT: retq ; ALL-NEXT: retq

View File

@ -1545,19 +1545,19 @@ define <4 x double> @uitofp_4i1_double(<4 x i32> %a) {
} }
define <2 x float> @uitofp_2i1_float(<2 x i32> %a) { define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
; NOVL-LABEL: uitofp_2i1_float: ; KNL-LABEL: uitofp_2i1_float:
; NOVL: # BB#0: ; KNL: # BB#0:
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NOVL-NEXT: vpextrb $8, %xmm0, %eax ; KNL-NEXT: vpextrb $8, %xmm0, %eax
; NOVL-NEXT: andl $1, %eax ; KNL-NEXT: andl $1, %eax
; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1 ; KNL-NEXT: vpextrb $0, %xmm0, %ecx
; NOVL-NEXT: vpextrb $0, %xmm0, %eax ; KNL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; NOVL-NEXT: andl $1, %eax ; KNL-NEXT: andl $1, %ecx
; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 ; KNL-NEXT: vcvtsi2ssl %ecx, %xmm2, %xmm1
; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; NOVL-NEXT: retq ; KNL-NEXT: retq
; ;
; VL-LABEL: uitofp_2i1_float: ; VL-LABEL: uitofp_2i1_float:
; VL: # BB#0: ; VL: # BB#0:
@ -1567,6 +1567,34 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
; VL-NEXT: vcvtudq2ps %xmm0, %xmm0 ; VL-NEXT: vcvtudq2ps %xmm0, %xmm0
; VL-NEXT: retq ; VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_2i1_float:
; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX512DQ-NEXT: vpextrb $8, %xmm0, %eax
; AVX512DQ-NEXT: andl $1, %eax
; AVX512DQ-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1
; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
; AVX512DQ-NEXT: andl $1, %eax
; AVX512DQ-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: uitofp_2i1_float:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
; AVX512BW-NEXT: andl $1, %eax
; AVX512BW-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
; AVX512BW-NEXT: andl $1, %eax
; AVX512BW-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; AVX512BW-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512BW-NEXT: retq
%mask = icmp ult <2 x i32> %a, zeroinitializer %mask = icmp ult <2 x i32> %a, zeroinitializer
%1 = uitofp <2 x i1> %mask to <2 x float> %1 = uitofp <2 x i1> %mask to <2 x float>
ret <2 x float> %1 ret <2 x float> %1

View File

@ -12,6 +12,7 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; KNL-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0 ; KNL-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test1: ; SKX-LABEL: test1:
; SKX: ## BB#0: ; SKX: ## BB#0:
@ -21,6 +22,7 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; SKX-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0 ; SKX-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%rrr = load float, float* %br %rrr = load float, float* %br
%rrr2 = insertelement <16 x float> %x, float %rrr, i32 1 %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
%rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14 %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
@ -36,6 +38,7 @@ define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
; KNL-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; KNL-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; KNL-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0 ; KNL-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test2: ; SKX-LABEL: test2:
; SKX: ## BB#0: ; SKX: ## BB#0:
@ -45,6 +48,7 @@ define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SKX-NEXT: vinsertf64x2 $3, %xmm0, %zmm2, %zmm0 ; SKX-NEXT: vinsertf64x2 $3, %xmm0, %zmm2, %zmm0
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%rrr = load double, double* %br %rrr = load double, double* %br
%rrr2 = insertelement <8 x double> %x, double %rrr, i32 1 %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
%rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6 %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
@ -58,6 +62,7 @@ define <16 x float> @test3(<16 x float> %x) nounwind {
; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 ; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test3: ; SKX-LABEL: test3:
; SKX: ## BB#0: ; SKX: ## BB#0:
@ -65,6 +70,7 @@ define <16 x float> @test3(<16 x float> %x) nounwind {
; SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] ; SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 ; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%eee = extractelement <16 x float> %x, i32 4 %eee = extractelement <16 x float> %x, i32 4
%rrr2 = insertelement <16 x float> %x, float %eee, i32 1 %rrr2 = insertelement <16 x float> %x, float %eee, i32 1
ret <16 x float> %rrr2 ret <16 x float> %rrr2
@ -78,6 +84,7 @@ define <8 x i64> @test4(<8 x i64> %x) nounwind {
; KNL-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 ; KNL-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 ; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test4: ; SKX-LABEL: test4:
; SKX: ## BB#0: ; SKX: ## BB#0:
@ -86,6 +93,7 @@ define <8 x i64> @test4(<8 x i64> %x) nounwind {
; SKX-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 ; SKX-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 ; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%eee = extractelement <8 x i64> %x, i32 4 %eee = extractelement <8 x i64> %x, i32 4
%rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1 %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
ret <8 x i64> %rrr2 ret <8 x i64> %rrr2
@ -96,11 +104,13 @@ define i32 @test5(<4 x float> %x) nounwind {
; KNL: ## BB#0: ; KNL: ## BB#0:
; KNL-NEXT: vextractps $3, %xmm0, %eax ; KNL-NEXT: vextractps $3, %xmm0, %eax
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test5: ; SKX-LABEL: test5:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vextractps $3, %xmm0, %eax ; SKX-NEXT: vextractps $3, %xmm0, %eax
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%ef = extractelement <4 x float> %x, i32 3 %ef = extractelement <4 x float> %x, i32 3
%ei = bitcast float %ef to i32 %ei = bitcast float %ef to i32
ret i32 %ei ret i32 %ei
@ -111,11 +121,13 @@ define void @test6(<4 x float> %x, float* %out) nounwind {
; KNL: ## BB#0: ; KNL: ## BB#0:
; KNL-NEXT: vextractps $3, %xmm0, (%rdi) ; KNL-NEXT: vextractps $3, %xmm0, (%rdi)
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test6: ; SKX-LABEL: test6:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vextractps $3, %xmm0, (%rdi) ; SKX-NEXT: vextractps $3, %xmm0, (%rdi)
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%ef = extractelement <4 x float> %x, i32 3 %ef = extractelement <4 x float> %x, i32 3
store float %ef, float* %out, align 4 store float %ef, float* %out, align 4
ret void ret void
@ -135,6 +147,7 @@ define float @test7(<16 x float> %x, i32 %ind) nounwind {
; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp ; KNL-NEXT: popq %rbp
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test7: ; SKX-LABEL: test7:
; SKX: ## BB#0: ; SKX: ## BB#0:
@ -150,6 +163,7 @@ define float @test7(<16 x float> %x, i32 %ind) nounwind {
; SKX-NEXT: popq %rbp ; SKX-NEXT: popq %rbp
; SKX-NEXT: vzeroupper ; SKX-NEXT: vzeroupper
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%e = extractelement <16 x float> %x, i32 %ind %e = extractelement <16 x float> %x, i32 %ind
ret float %e ret float %e
} }
@ -168,6 +182,7 @@ define double @test8(<8 x double> %x, i32 %ind) nounwind {
; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp ; KNL-NEXT: popq %rbp
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test8: ; SKX-LABEL: test8:
; SKX: ## BB#0: ; SKX: ## BB#0:
@ -183,6 +198,7 @@ define double @test8(<8 x double> %x, i32 %ind) nounwind {
; SKX-NEXT: popq %rbp ; SKX-NEXT: popq %rbp
; SKX-NEXT: vzeroupper ; SKX-NEXT: vzeroupper
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%e = extractelement <8 x double> %x, i32 %ind %e = extractelement <8 x double> %x, i32 %ind
ret double %e ret double %e
} }
@ -201,6 +217,7 @@ define float @test9(<8 x float> %x, i32 %ind) nounwind {
; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp ; KNL-NEXT: popq %rbp
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test9: ; SKX-LABEL: test9:
; SKX: ## BB#0: ; SKX: ## BB#0:
@ -216,6 +233,7 @@ define float @test9(<8 x float> %x, i32 %ind) nounwind {
; SKX-NEXT: popq %rbp ; SKX-NEXT: popq %rbp
; SKX-NEXT: vzeroupper ; SKX-NEXT: vzeroupper
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%e = extractelement <8 x float> %x, i32 %ind %e = extractelement <8 x float> %x, i32 %ind
ret float %e ret float %e
} }
@ -234,6 +252,7 @@ define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp ; KNL-NEXT: popq %rbp
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test10: ; SKX-LABEL: test10:
; SKX: ## BB#0: ; SKX: ## BB#0:
@ -249,6 +268,7 @@ define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
; SKX-NEXT: popq %rbp ; SKX-NEXT: popq %rbp
; SKX-NEXT: vzeroupper ; SKX-NEXT: vzeroupper
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%e = extractelement <16 x i32> %x, i32 %ind %e = extractelement <16 x i32> %x, i32 %ind
ret i32 %e ret i32 %e
} }
@ -1114,137 +1134,137 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: .cfi_def_cfa_register %rbp
; KNL-NEXT: andq $-32, %rsp ; KNL-NEXT: andq $-32, %rsp
; KNL-NEXT: subq $32, %rsp ; KNL-NEXT: subq $32, %rsp
; KNL-NEXT: xorl %eax, %eax
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: setb %al
; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0 ; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %edx ; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %edx, %xmm1 ; KNL-NEXT: vmovd %ecx, %xmm1
; KNL-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0 ; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: vmovd %eax, %xmm0
; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: vmovd %edx, %xmm0
; KNL-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; KNL-NEXT: xorl %eax, %eax
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
; KNL-NEXT: setb %al
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
@ -1299,8 +1319,8 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y)
; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpextrb $4, %xmm0, %ecx ; KNL-NEXT: vpextrb $4, %xmm0, %ecx
; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: vpextrb $0, %xmm0, %ecx ; KNL-NEXT: vpextrb $0, %xmm0, %ecx
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
@ -2124,8 +2144,8 @@ define i16 @test_extractelement_variable_v32i16(<32 x i16> %t1, i32 %index) {
define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) { define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) {
; KNL-LABEL: test_extractelement_variable_v16i8: ; KNL-LABEL: test_extractelement_variable_v16i8:
; KNL: ## BB#0: ; KNL: ## BB#0:
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: andl $15, %edi ; KNL-NEXT: andl $15, %edi
; KNL-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; KNL-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
; KNL-NEXT: movb (%rdi,%rax), %al ; KNL-NEXT: movb (%rdi,%rax), %al
@ -2156,8 +2176,8 @@ define i8 @test_extractelement_variable_v32i8(<32 x i8> %t1, i32 %index) {
; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: .cfi_def_cfa_register %rbp
; KNL-NEXT: andq $-32, %rsp ; KNL-NEXT: andq $-32, %rsp
; KNL-NEXT: subq $64, %rsp ; KNL-NEXT: subq $64, %rsp
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vmovaps %ymm0, (%rsp) ; KNL-NEXT: vmovaps %ymm0, (%rsp)
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: andl $31, %edi ; KNL-NEXT: andl $31, %edi
; KNL-NEXT: movq %rsp, %rax ; KNL-NEXT: movq %rsp, %rax
; KNL-NEXT: movb (%rdi,%rax), %al ; KNL-NEXT: movb (%rdi,%rax), %al
@ -2204,9 +2224,9 @@ define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) {
; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: .cfi_def_cfa_register %rbp
; KNL-NEXT: andq $-64, %rsp ; KNL-NEXT: andq $-64, %rsp
; KNL-NEXT: subq $128, %rsp ; KNL-NEXT: subq $128, %rsp
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
; KNL-NEXT: vmovaps %ymm0, (%rsp) ; KNL-NEXT: vmovaps %ymm0, (%rsp)
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: andl $63, %edi ; KNL-NEXT: andl $63, %edi
; KNL-NEXT: movq %rsp, %rax ; KNL-NEXT: movq %rsp, %rax
; KNL-NEXT: movb (%rdi,%rax), %al ; KNL-NEXT: movb (%rdi,%rax), %al
@ -2295,12 +2315,12 @@ define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index)
define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b, i32 %index) { define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b, i32 %index) {
; KNL-LABEL: test_extractelement_varible_v2i1: ; KNL-LABEL: test_extractelement_varible_v2i1:
; KNL: ## BB#0: ; KNL: ## BB#0:
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: andl $1, %edi ; KNL-NEXT: andl $1, %edi
; KNL-NEXT: movl -24(%rsp,%rdi,8), %eax ; KNL-NEXT: movl -24(%rsp,%rdi,8), %eax
; KNL-NEXT: andl $1, %eax ; KNL-NEXT: andl $1, %eax
@ -2325,12 +2345,12 @@ define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b,
define zeroext i8 @test_extractelement_varible_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %index) { define zeroext i8 @test_extractelement_varible_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %index) {
; KNL-LABEL: test_extractelement_varible_v4i1: ; KNL-LABEL: test_extractelement_varible_v4i1:
; KNL: ## BB#0: ; KNL: ## BB#0:
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; KNL-NEXT: andl $3, %edi ; KNL-NEXT: andl $3, %edi
; KNL-NEXT: movl -24(%rsp,%rdi,4), %eax ; KNL-NEXT: movl -24(%rsp,%rdi,4), %eax
; KNL-NEXT: andl $1, %eax ; KNL-NEXT: andl $1, %eax

View File

@ -2880,7 +2880,6 @@ declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8
define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) { define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
; CHECK-LABEL: test_mask_vextractf32x4: ; CHECK-LABEL: test_mask_vextractf32x4:
; CHECK: ## BB#0: ; CHECK: ## BB#0:
; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1
; CHECK-NEXT: kmovw %edi, %k0 ; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: kshiftlw $12, %k0, %k1 ; CHECK-NEXT: kshiftlw $12, %k0, %k1
; CHECK-NEXT: kshiftrw $15, %k1, %k1 ; CHECK-NEXT: kshiftrw $15, %k1, %k1
@ -2898,6 +2897,7 @@ define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8
; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; CHECK-NEXT: kmovw %k1, %eax ; CHECK-NEXT: kmovw %k1, %eax
; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1
; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 ; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq ; CHECK-NEXT: retq
@ -2941,7 +2941,6 @@ declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i
define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) { define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
; CHECK-LABEL: test_maskz_vextracti32x4: ; CHECK-LABEL: test_maskz_vextracti32x4:
; CHECK: ## BB#0: ; CHECK: ## BB#0:
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0
; CHECK-NEXT: kmovw %edi, %k0 ; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: kshiftlw $12, %k0, %k1 ; CHECK-NEXT: kshiftlw $12, %k0, %k1
; CHECK-NEXT: kshiftrw $15, %k1, %k1 ; CHECK-NEXT: kshiftrw $15, %k1, %k1
@ -2959,6 +2958,7 @@ define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
; CHECK-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; CHECK-NEXT: kmovw %k1, %eax ; CHECK-NEXT: kmovw %k1, %eax
; CHECK-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; CHECK-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0
; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
; CHECK-NEXT: vpsrad $31, %xmm1, %xmm1 ; CHECK-NEXT: vpsrad $31, %xmm1, %xmm1
; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0

View File

@ -1837,73 +1837,8 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: .cfi_def_cfa_register %rbp
; KNL-NEXT: andq $-32, %rsp ; KNL-NEXT: andq $-32, %rsp
; KNL-NEXT: subq $32, %rsp ; KNL-NEXT: subq $32, %rsp
; KNL-NEXT: vmovups (%rdi), %zmm2 ; KNL-NEXT: vmovups 64(%rdi), %zmm2
; KNL-NEXT: vmovups 64(%rdi), %zmm3 ; KNL-NEXT: vcmpltps %zmm1, %zmm2, %k2
; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1
; KNL-NEXT: kshiftlw $14, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $15, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: vmovd %ecx, %xmm3
; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $13, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $12, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $11, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $10, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $9, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $8, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $6, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $5, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $4, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $3, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $2, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $1, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2
; KNL-NEXT: kshiftlw $14, %k2, %k0 ; KNL-NEXT: kshiftlw $14, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kmovw %k0, %eax
@ -1967,10 +1902,139 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; KNL-NEXT: kshiftrw $15, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k2, %k0
; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 ; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; KNL-NEXT: vmovups (%rdi), %zmm3
; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z} ; KNL-NEXT: vcmpltps %zmm0, %zmm3, %k1
; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z} ; KNL-NEXT: kshiftlw $14, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $15, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: vmovd %ecx, %xmm3
; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $13, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $12, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $11, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $10, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $9, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $8, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $6, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $5, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $4, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $3, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $2, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $1, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k2} {z}
; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0 ; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %ecx
; KNL-NEXT: vmovd %ecx, %xmm4
; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $13, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $12, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $11, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $10, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $9, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $8, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $7, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $6, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $5, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $4, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $3, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $2, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
; KNL-NEXT: kshiftlw $1, %k0, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: kmovw %k2, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3
; KNL-NEXT: vmovups 4(%rdi), %zmm4 {%k1} {z}
; KNL-NEXT: vcmpltps %zmm4, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
@ -2034,71 +2098,7 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 ; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0 ; KNL-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm3
; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2 ; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
@ -2943,36 +2943,6 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; ;
; KNL-LABEL: store_64i1: ; KNL-LABEL: store_64i1:
; KNL: ## BB#0: ; KNL: ## BB#0:
; KNL-NEXT: pushq %rbp
; KNL-NEXT: Lcfi9:
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: pushq %r15
; KNL-NEXT: Lcfi10:
; KNL-NEXT: .cfi_def_cfa_offset 24
; KNL-NEXT: pushq %r14
; KNL-NEXT: Lcfi11:
; KNL-NEXT: .cfi_def_cfa_offset 32
; KNL-NEXT: pushq %r13
; KNL-NEXT: Lcfi12:
; KNL-NEXT: .cfi_def_cfa_offset 40
; KNL-NEXT: pushq %r12
; KNL-NEXT: Lcfi13:
; KNL-NEXT: .cfi_def_cfa_offset 48
; KNL-NEXT: pushq %rbx
; KNL-NEXT: Lcfi14:
; KNL-NEXT: .cfi_def_cfa_offset 56
; KNL-NEXT: Lcfi15:
; KNL-NEXT: .cfi_offset %rbx, -56
; KNL-NEXT: Lcfi16:
; KNL-NEXT: .cfi_offset %r12, -48
; KNL-NEXT: Lcfi17:
; KNL-NEXT: .cfi_offset %r13, -40
; KNL-NEXT: Lcfi18:
; KNL-NEXT: .cfi_offset %r14, -32
; KNL-NEXT: Lcfi19:
; KNL-NEXT: .cfi_offset %r15, -24
; KNL-NEXT: Lcfi20:
; KNL-NEXT: .cfi_offset %rbp, -16
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
@ -2984,281 +2954,275 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r8d ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r9d ; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r10d ; KNL-NEXT: vmovd %ecx, %xmm3
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r11d ; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r14d ; KNL-NEXT: vpinsrb $2, %ecx, %xmm3, %xmm3
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r15d ; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r12d ; KNL-NEXT: vpinsrb $4, %ecx, %xmm3, %xmm3
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r13d ; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ebx ; KNL-NEXT: vpinsrb $6, %ecx, %xmm3, %xmm3
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ebp ; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: vpinsrb $8, %ecx, %xmm3, %xmm3
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %edx ; KNL-NEXT: vpinsrb $10, %ecx, %xmm3, %xmm3
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %esi ; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vmovd %r9d, %xmm3 ; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3
; KNL-NEXT: kmovw %k1, %r9d ; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2 ; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm2
; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2
; KNL-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
; KNL-NEXT: kmovw %k0, 6(%rdi)
; KNL-NEXT: kshiftlw $14, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r8d
; KNL-NEXT: kshiftlw $15, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r10d
; KNL-NEXT: kshiftlw $13, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r9d
; KNL-NEXT: kshiftlw $12, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r11d
; KNL-NEXT: kshiftlw $11, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r14d
; KNL-NEXT: kshiftlw $10, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r15d
; KNL-NEXT: kshiftlw $9, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r12d
; KNL-NEXT: kshiftlw $8, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r13d
; KNL-NEXT: kshiftlw $7, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $6, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %esi
; KNL-NEXT: kshiftlw $5, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ebp
; KNL-NEXT: kshiftlw $4, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ebx
; KNL-NEXT: kshiftlw $3, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $2, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %edx
; KNL-NEXT: kshiftlw $1, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vmovd %r10d, %xmm2
; KNL-NEXT: kmovw %k0, %r10d
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL-NEXT: kshiftrw $15, %k2, %k0
; KNL-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1
; KNL-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1
; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, 4(%rdi)
; KNL-NEXT: kshiftlw $14, %k1, %k0 ; KNL-NEXT: kshiftlw $14, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r8d ; KNL-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $15, %k1, %k0 ; KNL-NEXT: kshiftlw $15, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r10d ; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $13, %k1, %k0 ; KNL-NEXT: kshiftlw $13, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r9d ; KNL-NEXT: vmovd %eax, %xmm3
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $12, %k1, %k0 ; KNL-NEXT: kshiftlw $12, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $1, %ecx, %xmm3, %xmm3
; KNL-NEXT: kmovw %k0, %r11d
; KNL-NEXT: kshiftlw $11, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r14d
; KNL-NEXT: kshiftlw $10, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r15d
; KNL-NEXT: kshiftlw $9, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r12d
; KNL-NEXT: kshiftlw $8, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %r13d
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $11, %k1, %k0
; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $10, %k1, %k0
; KNL-NEXT: vpinsrb $3, %ecx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $9, %k1, %k0
; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $8, %k1, %k0
; KNL-NEXT: vpinsrb $5, %ecx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $6, %k1, %k0 ; KNL-NEXT: kshiftlw $6, %k1, %k0
; KNL-NEXT: vpinsrb $7, %ecx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %esi ; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $5, %k1, %k0 ; KNL-NEXT: kshiftlw $5, %k1, %k0
; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ebp ; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $4, %k1, %k0 ; KNL-NEXT: kshiftlw $4, %k1, %k0
; KNL-NEXT: vpinsrb $9, %ecx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ebx ; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $3, %k1, %k0 ; KNL-NEXT: kshiftlw $3, %k1, %k0
; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $2, %k1, %k0 ; KNL-NEXT: kshiftlw $2, %k1, %k0
; KNL-NEXT: vpinsrb $11, %ecx, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %edx ; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $1, %k1, %k0 ; KNL-NEXT: kshiftlw $1, %k1, %k0
; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vmovd %r10d, %xmm1 ; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kmovw %k0, %r10d ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpmovsxbd %xmm2, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm2
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm1
; KNL-NEXT: vpinsrb $15, %ecx, %xmm1, %xmm1
; KNL-NEXT: kmovw %k1, 6(%rdi)
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
; KNL-NEXT: kmovw %k1, 4(%rdi)
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0
; KNL-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vmovd %ecx, %xmm1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm0
; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k1, 2(%rdi) ; KNL-NEXT: kmovw %k0, 2(%rdi)
; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftlw $14, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r8d
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r10d
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r11d
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r14d
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r15d
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r12d
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %r13d
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %esi
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ebp
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ebx
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: vmovd %r9d, %xmm0
; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $15, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $13, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: vmovd %ecx, %xmm0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $12, %k1, %k0
; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $11, %k1, %k0
; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $10, %k1, %k0
; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $9, %k1, %k0
; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $8, %k1, %k0
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $6, %k1, %k0
; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $5, %k1, %k0
; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $4, %k1, %k0
; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $3, %k1, %k0
; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftlw $2, %k1, %k0
; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $1, %k1, %k0
; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
; KNL-NEXT: kshiftrw $15, %k1, %k0
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, (%rdi) ; KNL-NEXT: kmovw %k0, (%rdi)
; KNL-NEXT: popq %rbx
; KNL-NEXT: popq %r12
; KNL-NEXT: popq %r13
; KNL-NEXT: popq %r14
; KNL-NEXT: popq %r15
; KNL-NEXT: popq %rbp
; KNL-NEXT: retq ; KNL-NEXT: retq
; ;
; SKX-LABEL: store_64i1: ; SKX-LABEL: store_64i1:

View File

@ -8,6 +8,7 @@ define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask = fcmp ole <16 x float> %x, %y %mask = fcmp ole <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
ret <16 x float> %max ret <16 x float> %max
@ -19,6 +20,7 @@ define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask = fcmp ole <8 x double> %x, %y %mask = fcmp ole <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
ret <8 x double> %max ret <8 x double> %max
@ -30,6 +32,7 @@ define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwin
; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 ; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%y = load <16 x i32>, <16 x i32>* %yp, align 4 %y = load <16 x i32>, <16 x i32>* %yp, align 4
%mask = icmp eq <16 x i32> %x, %y %mask = icmp eq <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@ -42,6 +45,7 @@ define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1)
; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask = icmp uge <16 x i32> %x, %y %mask = icmp uge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max ret <16 x i32> %max
@ -53,6 +57,7 @@ define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask = icmp eq <8 x i64> %x, %y %mask = icmp eq <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
ret <8 x i64> %max ret <8 x i64> %max
@ -64,6 +69,7 @@ define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) noun
; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask = icmp ugt <8 x i64> %x, %y %mask = icmp ugt <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
ret <8 x i64> %max ret <8 x i64> %max
@ -117,12 +123,14 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test9: ; SKX-LABEL: test9:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%mask = icmp eq <8 x i32> %x, %y %mask = icmp eq <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max ret <8 x i32> %max
@ -137,12 +145,14 @@ define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test10: ; SKX-LABEL: test10:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%mask = fcmp oeq <8 x float> %x, %y %mask = fcmp oeq <8 x float> %x, %y
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
@ -154,6 +164,7 @@ define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
; CHECK: ## BB#0: ; CHECK: ## BB#0:
; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask = icmp ugt <8 x i32> %x, %y %mask = icmp ugt <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max ret <8 x i32> %max
@ -168,6 +179,7 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> ; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test12: ; SKX-LABEL: test12:
; SKX: ## BB#0: ; SKX: ## BB#0:
@ -178,6 +190,7 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> ; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; SKX-NEXT: vzeroupper ; SKX-NEXT: vzeroupper
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%res = icmp eq <16 x i64> %a, %b %res = icmp eq <16 x i64> %a, %b
%res1 = bitcast <16 x i1> %res to i16 %res1 = bitcast <16 x i1> %res to i16
ret i16 %res1 ret i16 %res1
@ -330,6 +343,7 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp ; KNL-NEXT: popq %rbp
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test12_v32i32: ; SKX-LABEL: test12_v32i32:
; SKX: ## BB#0: ; SKX: ## BB#0:
@ -339,6 +353,7 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: vzeroupper ; SKX-NEXT: vzeroupper
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%res = icmp eq <32 x i32> %a, %b %res = icmp eq <32 x i32> %a, %b
%res1 = bitcast <32 x i1> %res to i32 %res1 = bitcast <32 x i1> %res to i32
ret i32 %res1 ret i32 %res1
@ -562,72 +577,72 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm1
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm0 ; KNL-NEXT: vmovd %ecx, %xmm1
; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm0
; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; KNL-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@ -642,6 +657,7 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp ; KNL-NEXT: popq %rbp
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test12_v64i16: ; SKX-LABEL: test12_v64i16:
; SKX: ## BB#0: ; SKX: ## BB#0:
@ -651,6 +667,7 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
; SKX-NEXT: kmovq %k0, %rax ; SKX-NEXT: kmovq %k0, %rax
; SKX-NEXT: vzeroupper ; SKX-NEXT: vzeroupper
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%res = icmp eq <64 x i16> %a, %b %res = icmp eq <64 x i16> %a, %b
%res1 = bitcast <64 x i1> %res to i64 %res1 = bitcast <64 x i1> %res to i64
ret i64 %res1 ret i64 %res1
@ -704,6 +721,7 @@ define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind
; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1 ; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1
; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask = icmp sge <16 x i32> %x, %y %mask = icmp sge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max ret <16 x i32> %max
@ -715,6 +733,7 @@ define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou
; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 ; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sgt <16 x i32> %x, %y %mask = icmp sgt <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@ -727,6 +746,7 @@ define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou
; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1 ; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sle <16 x i32> %x, %y %mask = icmp sle <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@ -739,6 +759,7 @@ define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp ule <16 x i32> %x, %y %mask = icmp ule <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@ -752,6 +773,7 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask1 = icmp eq <16 x i32> %x1, %y1 %mask1 = icmp eq <16 x i32> %x1, %y1
%mask0 = icmp eq <16 x i32> %x, %y %mask0 = icmp eq <16 x i32> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
@ -766,6 +788,7 @@ define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y
; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1} ; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask1 = icmp sge <8 x i64> %x1, %y1 %mask1 = icmp sge <8 x i64> %x1, %y1
%mask0 = icmp sle <8 x i64> %x, %y %mask0 = icmp sle <8 x i64> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
@ -780,6 +803,7 @@ define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i6
; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask1 = icmp sgt <8 x i64> %x1, %y1 %mask1 = icmp sgt <8 x i64> %x1, %y1
%y = load <8 x i64>, <8 x i64>* %y.ptr, align 4 %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
%mask0 = icmp sgt <8 x i64> %x, %y %mask0 = icmp sgt <8 x i64> %x, %y
@ -795,6 +819,7 @@ define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask1 = icmp sge <16 x i32> %x1, %y1 %mask1 = icmp sge <16 x i32> %x1, %y1
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask0 = icmp ule <16 x i32> %x, %y %mask0 = icmp ule <16 x i32> %x, %y
@ -809,6 +834,7 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%yb = load i64, i64* %yb.ptr, align 4 %yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
%y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
@ -823,6 +849,7 @@ define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind
; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%yb = load i32, i32* %yb.ptr, align 4 %yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
%y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
@ -838,6 +865,7 @@ define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32
; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask1 = icmp sge <16 x i32> %x1, %y1 %mask1 = icmp sge <16 x i32> %x1, %y1
%yb = load i32, i32* %yb.ptr, align 4 %yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
@ -855,6 +883,7 @@ define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y
; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%mask1 = icmp sge <8 x i64> %x1, %y1 %mask1 = icmp sge <8 x i64> %x1, %y1
%yb = load i64, i64* %yb.ptr, align 4 %yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
@ -920,12 +949,14 @@ define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
; KNL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm2 ; KNL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm2
; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test30: ; SKX-LABEL: test30:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%mask = fcmp oeq <4 x double> %x, %y %mask = fcmp oeq <4 x double> %x, %y
%max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
@ -938,12 +969,14 @@ define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp
; KNL-NEXT: vcmpltpd (%rdi), %xmm0, %xmm2 ; KNL-NEXT: vcmpltpd (%rdi), %xmm0, %xmm2
; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test31: ; SKX-LABEL: test31:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1 ; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%y = load <2 x double>, <2 x double>* %yp, align 4 %y = load <2 x double>, <2 x double>* %yp, align 4
%mask = fcmp olt <2 x double> %x, %y %mask = fcmp olt <2 x double> %x, %y
@ -957,12 +990,14 @@ define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp
; KNL-NEXT: vcmpltpd (%rdi), %ymm0, %ymm2 ; KNL-NEXT: vcmpltpd (%rdi), %ymm0, %ymm2
; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test32: ; SKX-LABEL: test32:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1 ; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%y = load <4 x double>, <4 x double>* %yp, align 4 %y = load <4 x double>, <4 x double>* %yp, align 4
%mask = fcmp ogt <4 x double> %y, %x %mask = fcmp ogt <4 x double> %y, %x
@ -976,6 +1011,7 @@ define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp
; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1 ; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%y = load <8 x double>, <8 x double>* %yp, align 4 %y = load <8 x double>, <8 x double>* %yp, align 4
%mask = fcmp olt <8 x double> %x, %y %mask = fcmp olt <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
@ -988,12 +1024,14 @@ define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) no
; KNL-NEXT: vcmpltps (%rdi), %xmm0, %xmm2 ; KNL-NEXT: vcmpltps (%rdi), %xmm0, %xmm2
; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test34: ; SKX-LABEL: test34:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1 ; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%y = load <4 x float>, <4 x float>* %yp, align 4 %y = load <4 x float>, <4 x float>* %yp, align 4
%mask = fcmp olt <4 x float> %x, %y %mask = fcmp olt <4 x float> %x, %y
%max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
@ -1010,12 +1048,14 @@ define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) no
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test35: ; SKX-LABEL: test35:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1 ; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%y = load <8 x float>, <8 x float>* %yp, align 4 %y = load <8 x float>, <8 x float>* %yp, align 4
%mask = fcmp ogt <8 x float> %y, %x %mask = fcmp ogt <8 x float> %y, %x
@ -1029,6 +1069,7 @@ define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp
; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1 ; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%y = load <16 x float>, <16 x float>* %yp, align 4 %y = load <16 x float>, <16 x float>* %yp, align 4
%mask = fcmp olt <16 x float> %x, %y %mask = fcmp olt <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
@ -1041,6 +1082,7 @@ define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nou
; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%a = load double, double* %ptr %a = load double, double* %ptr
%v = insertelement <8 x double> undef, double %a, i32 0 %v = insertelement <8 x double> undef, double %a, i32 0
@ -1058,12 +1100,14 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nou
; KNL-NEXT: vcmpltpd %ymm2, %ymm0, %ymm2 ; KNL-NEXT: vcmpltpd %ymm2, %ymm0, %ymm2
; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test38: ; SKX-LABEL: test38:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1 ; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%a = load double, double* %ptr %a = load double, double* %ptr
%v = insertelement <4 x double> undef, double %a, i32 0 %v = insertelement <4 x double> undef, double %a, i32 0
@ -1081,12 +1125,14 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nou
; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2 ; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test39: ; SKX-LABEL: test39:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1 ; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%a = load double, double* %ptr %a = load double, double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0 %v = insertelement <2 x double> undef, double %a, i32 0
@ -1104,6 +1150,7 @@ define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) n
; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; CHECK-NEXT: retq ; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%a = load float, float* %ptr %a = load float, float* %ptr
%v = insertelement <16 x float> undef, float %a, i32 0 %v = insertelement <16 x float> undef, float %a, i32 0
@ -1124,12 +1171,14 @@ define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) noun
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test41: ; SKX-LABEL: test41:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1 ; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%a = load float, float* %ptr %a = load float, float* %ptr
%v = insertelement <8 x float> undef, float %a, i32 0 %v = insertelement <8 x float> undef, float %a, i32 0
@ -1147,12 +1196,14 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) noun
; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 ; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test42: ; SKX-LABEL: test42:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1 ; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%a = load float, float* %ptr %a = load float, float* %ptr
%v = insertelement <4 x float> undef, float %a, i32 0 %v = insertelement <4 x float> undef, float %a, i32 0
@ -1172,6 +1223,7 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x
; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: retq ; KNL-NEXT: retq
; KNL-NEXT: ## -- End function
; ;
; SKX-LABEL: test43: ; SKX-LABEL: test43:
; SKX: ## BB#0: ; SKX: ## BB#0:
@ -1180,6 +1232,7 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x
; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
; SKX-NEXT: retq ; SKX-NEXT: retq
; SKX-NEXT: ## -- End function
%a = load double, double* %ptr %a = load double, double* %ptr
%v = insertelement <8 x double> undef, double %a, i32 0 %v = insertelement <8 x double> undef, double %a, i32 0

File diff suppressed because it is too large Load Diff

View File

@ -2695,32 +2695,32 @@ declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32
define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
; CHECK-LABEL: test_cmp_b_256: ; CHECK-LABEL: test_cmp_b_256:
; CHECK: ## BB#0: ; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xc0,0x02]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vmovd %eax, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd1,0x01]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd2,0x02]
; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd0,0x03]
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] ; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0] ; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02] ; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] ; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] ; CHECK-NEXT: vmovd %eax, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc8]
; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xc0,0x02] ; CHECK-NEXT: vpunpckldq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x62,0xc0]
; CHECK-NEXT: kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8] ; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] ; CHECK-NEXT: vmovd %edx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xc1]
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] ; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc2,0x01]
; CHECK-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02]
; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
; CHECK-NEXT: vmovd %ecx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
; CHECK-NEXT: vmovd %r8d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
; CHECK-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
; CHECK-NEXT: ## xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-NEXT: vmovd %edx, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
; CHECK-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
; CHECK-NEXT: ## xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
%vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
@ -2750,23 +2750,23 @@ define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0] ; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0]
; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8] ; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02] ; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x3f,0xc0,0x02]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] ; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] ; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01] ; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] ; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x3f,0xc0,0x02]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] ; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8] ; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8]
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] ; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] ; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03]
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
@ -2793,32 +2793,32 @@ declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) noun
define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
; CHECK-LABEL: test_ucmp_b_256: ; CHECK-LABEL: test_ucmp_b_256:
; CHECK: ## BB#0: ; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vmovd %eax, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd1,0x01]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd2,0x02]
; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd0,0x03]
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] ; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01] ; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02] ; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] ; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] ; CHECK-NEXT: vmovd %eax, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc8]
; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05] ; CHECK-NEXT: vpunpckldq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x62,0xc0]
; CHECK-NEXT: kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8] ; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06] ; CHECK-NEXT: vmovd %edx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xc1]
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] ; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc2,0x01]
; CHECK-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02]
; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
; CHECK-NEXT: vmovd %ecx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
; CHECK-NEXT: vmovd %r8d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
; CHECK-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
; CHECK-NEXT: ## xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-NEXT: vmovd %edx, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
; CHECK-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
; CHECK-NEXT: ## xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
%vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
@ -2848,23 +2848,23 @@ define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask)
; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01] ; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8] ; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02] ; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] ; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] ; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01] ; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] ; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] ; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8] ; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8]
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] ; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02]
; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] ; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03]
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3] ; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)

View File

@ -453,10 +453,10 @@ define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0
; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1
; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5
; SSE2-SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) ; SSE2-SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp)
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
; SSE2-SSSE3-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) ; SSE2-SSSE3-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp)
; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
; SSE2-SSSE3-NEXT: andb $1, %al ; SSE2-SSSE3-NEXT: andb $1, %al

View File

@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple i386-apple-darwin -mcpu=yonah | FileCheck %s ; RUN: llc < %s -mtriple i386-apple-darwin -mcpu=yonah | FileCheck %s
target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
@ -6,31 +7,32 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
; into loads, off the stack or a previous store. ; into loads, off the stack or a previous store.
; Be very explicit about the ordering/stack offsets. ; Be very explicit about the ordering/stack offsets.
; CHECK-LABEL: test_extractelement_legalization_storereuse:
; CHECK: # BB#0
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: movl 16(%esp), %eax
; CHECK-NEXT: movl 24(%esp), %ecx
; CHECK-NEXT: movl 20(%esp), %edx
; CHECK-NEXT: paddd (%edx), %xmm0
; CHECK-NEXT: movdqa %xmm0, (%edx)
; CHECK-NEXT: movl (%edx), %esi
; CHECK-NEXT: movl 4(%edx), %edi
; CHECK-NEXT: shll $4, %ecx
; CHECK-NEXT: movl 8(%edx), %ebx
; CHECK-NEXT: movl 12(%edx), %edx
; CHECK-NEXT: movl %esi, 12(%eax,%ecx)
; CHECK-NEXT: movl %edi, (%eax,%ecx)
; CHECK-NEXT: movl %ebx, 8(%eax,%ecx)
; CHECK-NEXT: movl %edx, 4(%eax,%ecx)
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: retl
define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* nocapture %x, i32* nocapture readonly %y, i32 %i) #0 { define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* nocapture %x, i32* nocapture readonly %y, i32 %i) #0 {
; CHECK-LABEL: _test_extractelement_legalization_storereuse: ## @test_extractelement_legalization_storereuse
; CHECK: ## BB#0: ## %entry
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: paddd (%ecx), %xmm0
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movdqa %xmm0, (%ecx)
; CHECK-NEXT: movl (%ecx), %esi
; CHECK-NEXT: movl 4(%ecx), %edi
; CHECK-NEXT: shll $4, %edx
; CHECK-NEXT: movl 8(%ecx), %ebx
; CHECK-NEXT: movl 12(%ecx), %ecx
; CHECK-NEXT: movl %esi, 12(%eax,%edx)
; CHECK-NEXT: movl %edi, (%eax,%edx)
; CHECK-NEXT: movl %ebx, 8(%eax,%edx)
; CHECK-NEXT: movl %ecx, 4(%eax,%edx)
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: retl
; CHECK-NEXT: ## -- End function
entry: entry:
%0 = bitcast i32* %y to <4 x i32>* %0 = bitcast i32* %y to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 16 %1 = load <4 x i32>, <4 x i32>* %0, align 16

View File

@ -50,8 +50,8 @@ define void @TestUnionLD1(fp128 %s, i64 %n) #0 {
; CHECK-NEXT: andq %rdi, %rcx ; CHECK-NEXT: andq %rdi, %rcx
; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000 ; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000
; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx ; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: orq %rcx, %rdx
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: orq %rcx, %rdx
; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 ; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: jmp foo # TAILCALL ; CHECK-NEXT: jmp foo # TAILCALL

View File

@ -16,11 +16,10 @@
; LIN: sarq $32, %r[[REG2]] ; LIN: sarq $32, %r[[REG2]]
; LIN: movslq %e[[REG4]], %r[[REG3:.+]] ; LIN: movslq %e[[REG4]], %r[[REG3:.+]]
; LIN: sarq $32, %r[[REG4]] ; LIN: sarq $32, %r[[REG4]]
; LIN: movsd (%rdi,%r[[REG1]],8), %xmm0 ; LIN: movsd (%rdi,%rsi,8), %xmm1
; LIN: movhpd (%rdi,%r[[REG2]],8), %xmm0 ; LIN: movhpd (%rdi,%rax,8), %xmm1
; LIN: movsd (%rdi,%r[[REG3]],8), %xmm1 ; LIN: movdqa (%rsi), %xmm0
; LIN: movhpd (%rdi,%r[[REG4]],8), %xmm1 ; LIN: movq %rdi, %xmm1
; WIN: movdqa (%rdx), %xmm0 ; WIN: movdqa (%rdx), %xmm0
; WIN: pand (%r8), %xmm0 ; WIN: pand (%r8), %xmm0
; WIN: pextrq $1, %xmm0, %r[[REG4:.+]] ; WIN: pextrq $1, %xmm0, %r[[REG4:.+]]
@ -29,10 +28,10 @@
; WIN: sarq $32, %r[[REG2]] ; WIN: sarq $32, %r[[REG2]]
; WIN: movslq %e[[REG4]], %r[[REG3:.+]] ; WIN: movslq %e[[REG4]], %r[[REG3:.+]]
; WIN: sarq $32, %r[[REG4]] ; WIN: sarq $32, %r[[REG4]]
; WIN: movsd (%rcx,%r[[REG1]],8), %xmm0 ; WIN: movsd (%rcx,%r9,8), %xmm1
; WIN: movhpd (%rcx,%r[[REG2]],8), %xmm0 ; WIN: movhpd (%rcx,%rax,8), %xmm1
; WIN: movsd (%rcx,%r[[REG3]],8), %xmm1 ; WIN: movdqa (%rdx), %xmm0
; WIN: movhpd (%rcx,%r[[REG4]],8), %xmm1 ; WIN: movq %rdx, %xmm1
define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
%a = load <4 x i32>, <4 x i32>* %i %a = load <4 x i32>, <4 x i32>* %i

File diff suppressed because it is too large Load Diff

View File

@ -112,23 +112,23 @@ define void @i56_and_or(i56* %a) {
define void @i56_insert_bit(i56* %a, i1 zeroext %bit) { define void @i56_insert_bit(i56* %a, i1 zeroext %bit) {
; CHECK-LABEL: i56_insert_bit: ; CHECK-LABEL: i56_insert_bit:
; CHECK: # BB#0: ; CHECK: # BB#0:
; CHECK-NEXT: movzbl %sil, %eax ; CHECK-NEXT: movzwl 4(%rdi), %eax
; CHECK-NEXT: movzwl 4(%rdi), %ecx ; CHECK-NEXT: movzbl 6(%rdi), %ecx
; CHECK-NEXT: movzbl 6(%rdi), %edx ; CHECK-NEXT: movl (%rdi), %edx
; CHECK-NEXT: movl (%rdi), %esi ; CHECK-NEXT: movb %cl, 6(%rdi)
; CHECK-NEXT: movb %dl, 6(%rdi) ; CHECK-NEXT: movzbl %sil, %esi
; CHECK-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<kill> %RDX<def> ; CHECK-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<kill> %RCX<def>
; CHECK-NEXT: shll $16, %edx ; CHECK-NEXT: shll $16, %ecx
; CHECK-NEXT: orl %ecx, %edx ; CHECK-NEXT: orl %eax, %ecx
; CHECK-NEXT: shlq $32, %rdx ; CHECK-NEXT: shlq $32, %rcx
; CHECK-NEXT: orq %rdx, %rsi ; CHECK-NEXT: orq %rcx, %rdx
; CHECK-NEXT: shlq $13, %rax ; CHECK-NEXT: shlq $13, %rsi
; CHECK-NEXT: movabsq $72057594037919743, %rcx # imm = 0xFFFFFFFFFFDFFF ; CHECK-NEXT: movabsq $72057594037919743, %rax # imm = 0xFFFFFFFFFFDFFF
; CHECK-NEXT: andq %rsi, %rcx ; CHECK-NEXT: andq %rdx, %rax
; CHECK-NEXT: orq %rax, %rcx ; CHECK-NEXT: orq %rsi, %rax
; CHECK-NEXT: movl %ecx, (%rdi) ; CHECK-NEXT: movl %eax, (%rdi)
; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: shrq $32, %rax
; CHECK-NEXT: movw %cx, 4(%rdi) ; CHECK-NEXT: movw %ax, 4(%rdi)
; CHECK-NEXT: retq ; CHECK-NEXT: retq
%extbit = zext i1 %bit to i56 %extbit = zext i1 %bit to i56
%b = load i56, i56* %a, align 1 %b = load i56, i56* %a, align 1

View File

@ -17,7 +17,7 @@ define i32 @test_mul_by_1(i32 %x) {
; X64-HSW-LABEL: test_mul_by_1: ; X64-HSW-LABEL: test_mul_by_1:
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_1: ; X64-JAG-LABEL: test_mul_by_1:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -32,7 +32,7 @@ define i32 @test_mul_by_1(i32 %x) {
; HSW-NOOPT-LABEL: test_mul_by_1: ; HSW-NOOPT-LABEL: test_mul_by_1:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_1: ; JAG-NOOPT-LABEL: test_mul_by_1:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -63,7 +63,7 @@ define i32 @test_mul_by_2(i32 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_2: ; X64-JAG-LABEL: test_mul_by_2:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -81,7 +81,7 @@ define i32 @test_mul_by_2(i32 %x) {
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_2: ; JAG-NOOPT-LABEL: test_mul_by_2:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -114,7 +114,7 @@ define i32 @test_mul_by_3(i32 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_3: ; X64-JAG-LABEL: test_mul_by_3:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -131,7 +131,7 @@ define i32 @test_mul_by_3(i32 %x) {
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_3: ; JAG-NOOPT-LABEL: test_mul_by_3:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -165,7 +165,7 @@ define i32 @test_mul_by_4(i32 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_4: ; X64-JAG-LABEL: test_mul_by_4:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -183,7 +183,7 @@ define i32 @test_mul_by_4(i32 %x) {
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_4: ; JAG-NOOPT-LABEL: test_mul_by_4:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -216,7 +216,7 @@ define i32 @test_mul_by_5(i32 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_5: ; X64-JAG-LABEL: test_mul_by_5:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -233,7 +233,7 @@ define i32 @test_mul_by_5(i32 %x) {
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_5: ; JAG-NOOPT-LABEL: test_mul_by_5:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -269,7 +269,7 @@ define i32 @test_mul_by_6(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] ; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_6: ; X64-JAG-LABEL: test_mul_by_6:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -285,8 +285,8 @@ define i32 @test_mul_by_6(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_6: ; HSW-NOOPT-LABEL: test_mul_by_6:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_6: ; JAG-NOOPT-LABEL: test_mul_by_6:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -321,7 +321,7 @@ define i32 @test_mul_by_7(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_7: ; X64-JAG-LABEL: test_mul_by_7:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -337,8 +337,8 @@ define i32 @test_mul_by_7(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_7: ; HSW-NOOPT-LABEL: test_mul_by_7:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_7: ; JAG-NOOPT-LABEL: test_mul_by_7:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -371,7 +371,7 @@ define i32 @test_mul_by_8(i32 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_8: ; X64-JAG-LABEL: test_mul_by_8:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -389,7 +389,7 @@ define i32 @test_mul_by_8(i32 %x) {
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_8: ; JAG-NOOPT-LABEL: test_mul_by_8:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -422,7 +422,7 @@ define i32 @test_mul_by_9(i32 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_9: ; X64-JAG-LABEL: test_mul_by_9:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -439,7 +439,7 @@ define i32 @test_mul_by_9(i32 %x) {
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_9: ; JAG-NOOPT-LABEL: test_mul_by_9:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -475,7 +475,7 @@ define i32 @test_mul_by_10(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] ; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_10: ; X64-JAG-LABEL: test_mul_by_10:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -491,8 +491,8 @@ define i32 @test_mul_by_10(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_10: ; HSW-NOOPT-LABEL: test_mul_by_10:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_10: ; JAG-NOOPT-LABEL: test_mul_by_10:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -527,7 +527,7 @@ define i32 @test_mul_by_11(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_11: ; X64-JAG-LABEL: test_mul_by_11:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -543,8 +543,8 @@ define i32 @test_mul_by_11(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_11: ; HSW-NOOPT-LABEL: test_mul_by_11:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_11: ; JAG-NOOPT-LABEL: test_mul_by_11:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -575,9 +575,9 @@ define i32 @test_mul_by_12(i32 %x) {
; X64-HSW-LABEL: test_mul_by_12: ; X64-HSW-LABEL: test_mul_by_12:
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50] ; X64-HSW-NEXT: shll $2, %edi # sched: [1:1.00]
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_12: ; X64-JAG-LABEL: test_mul_by_12:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -593,8 +593,8 @@ define i32 @test_mul_by_12(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_12: ; HSW-NOOPT-LABEL: test_mul_by_12:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_12: ; JAG-NOOPT-LABEL: test_mul_by_12:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -629,7 +629,7 @@ define i32 @test_mul_by_13(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_13: ; X64-JAG-LABEL: test_mul_by_13:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -645,8 +645,8 @@ define i32 @test_mul_by_13(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_13: ; HSW-NOOPT-LABEL: test_mul_by_13:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_13: ; JAG-NOOPT-LABEL: test_mul_by_13:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -681,7 +681,7 @@ define i32 @test_mul_by_14(i32 %x) {
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_14: ; X64-JAG-LABEL: test_mul_by_14:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -698,8 +698,8 @@ define i32 @test_mul_by_14(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_14: ; HSW-NOOPT-LABEL: test_mul_by_14:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_14: ; JAG-NOOPT-LABEL: test_mul_by_14:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -732,7 +732,7 @@ define i32 @test_mul_by_15(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_15: ; X64-JAG-LABEL: test_mul_by_15:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -748,8 +748,8 @@ define i32 @test_mul_by_15(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_15: ; HSW-NOOPT-LABEL: test_mul_by_15:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_15: ; JAG-NOOPT-LABEL: test_mul_by_15:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -780,9 +780,9 @@ define i32 @test_mul_by_16(i32 %x) {
; ;
; X64-HSW-LABEL: test_mul_by_16: ; X64-HSW-LABEL: test_mul_by_16:
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: shll $4, %edi # sched: [1:0.50] ; X64-HSW-NEXT: shll $4, %edi # sched: [1:1.00]
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG-LABEL: test_mul_by_16:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -798,9 +798,9 @@ define i32 @test_mul_by_16(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_16: ; HSW-NOOPT-LABEL: test_mul_by_16:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] ; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:1.00]
; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT-LABEL: test_mul_by_16:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -836,9 +836,9 @@ define i32 @test_mul_by_17(i32 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50] ; X64-HSW-NEXT: shll $4, %eax # sched: [1:1.00]
; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_17: ; X64-JAG-LABEL: test_mul_by_17:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -855,8 +855,8 @@ define i32 @test_mul_by_17(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_17: ; HSW-NOOPT-LABEL: test_mul_by_17:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_17: ; JAG-NOOPT-LABEL: test_mul_by_17:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -892,7 +892,7 @@ define i32 @test_mul_by_18(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] ; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_18: ; X64-JAG-LABEL: test_mul_by_18:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -908,8 +908,8 @@ define i32 @test_mul_by_18(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_18: ; HSW-NOOPT-LABEL: test_mul_by_18:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_18: ; JAG-NOOPT-LABEL: test_mul_by_18:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -944,9 +944,9 @@ define i32 @test_mul_by_19(i32 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: shll $2, %eax # sched: [1:0.50] ; X64-HSW-NEXT: shll $2, %eax # sched: [1:1.00]
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_19: ; X64-JAG-LABEL: test_mul_by_19:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -963,8 +963,8 @@ define i32 @test_mul_by_19(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_19: ; HSW-NOOPT-LABEL: test_mul_by_19:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_19: ; JAG-NOOPT-LABEL: test_mul_by_19:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -995,9 +995,9 @@ define i32 @test_mul_by_20(i32 %x) {
; X64-HSW-LABEL: test_mul_by_20: ; X64-HSW-LABEL: test_mul_by_20:
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50] ; X64-HSW-NEXT: shll $2, %edi # sched: [1:1.00]
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_20: ; X64-JAG-LABEL: test_mul_by_20:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1013,8 +1013,8 @@ define i32 @test_mul_by_20(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_20: ; HSW-NOOPT-LABEL: test_mul_by_20:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_20: ; JAG-NOOPT-LABEL: test_mul_by_20:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1049,7 +1049,7 @@ define i32 @test_mul_by_21(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_21: ; X64-JAG-LABEL: test_mul_by_21:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1065,8 +1065,8 @@ define i32 @test_mul_by_21(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_21: ; HSW-NOOPT-LABEL: test_mul_by_21:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_21: ; JAG-NOOPT-LABEL: test_mul_by_21:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1101,7 +1101,7 @@ define i32 @test_mul_by_22(i32 %x) {
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_22: ; X64-JAG-LABEL: test_mul_by_22:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1118,8 +1118,8 @@ define i32 @test_mul_by_22(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_22: ; HSW-NOOPT-LABEL: test_mul_by_22:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_22: ; JAG-NOOPT-LABEL: test_mul_by_22:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1152,9 +1152,9 @@ define i32 @test_mul_by_23(i32 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: shll $3, %eax # sched: [1:0.50] ; X64-HSW-NEXT: shll $3, %eax # sched: [1:1.00]
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_23: ; X64-JAG-LABEL: test_mul_by_23:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1171,8 +1171,8 @@ define i32 @test_mul_by_23(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_23: ; HSW-NOOPT-LABEL: test_mul_by_23:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_23: ; JAG-NOOPT-LABEL: test_mul_by_23:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1203,9 +1203,9 @@ define i32 @test_mul_by_24(i32 %x) {
; X64-HSW-LABEL: test_mul_by_24: ; X64-HSW-LABEL: test_mul_by_24:
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: shll $3, %edi # sched: [1:0.50] ; X64-HSW-NEXT: shll $3, %edi # sched: [1:1.00]
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_24: ; X64-JAG-LABEL: test_mul_by_24:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1221,8 +1221,8 @@ define i32 @test_mul_by_24(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_24: ; HSW-NOOPT-LABEL: test_mul_by_24:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_24: ; JAG-NOOPT-LABEL: test_mul_by_24:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1257,7 +1257,7 @@ define i32 @test_mul_by_25(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_25: ; X64-JAG-LABEL: test_mul_by_25:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1273,8 +1273,8 @@ define i32 @test_mul_by_25(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_25: ; HSW-NOOPT-LABEL: test_mul_by_25:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_25: ; JAG-NOOPT-LABEL: test_mul_by_25:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1311,7 +1311,7 @@ define i32 @test_mul_by_26(i32 %x) {
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_26: ; X64-JAG-LABEL: test_mul_by_26:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1328,8 +1328,8 @@ define i32 @test_mul_by_26(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_26: ; HSW-NOOPT-LABEL: test_mul_by_26:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_26: ; JAG-NOOPT-LABEL: test_mul_by_26:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1362,7 +1362,7 @@ define i32 @test_mul_by_27(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> ; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_27: ; X64-JAG-LABEL: test_mul_by_27:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1378,8 +1378,8 @@ define i32 @test_mul_by_27(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_27: ; HSW-NOOPT-LABEL: test_mul_by_27:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_27: ; JAG-NOOPT-LABEL: test_mul_by_27:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1416,7 +1416,7 @@ define i32 @test_mul_by_28(i32 %x) {
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_28: ; X64-JAG-LABEL: test_mul_by_28:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1433,8 +1433,8 @@ define i32 @test_mul_by_28(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_28: ; HSW-NOOPT-LABEL: test_mul_by_28:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_28: ; JAG-NOOPT-LABEL: test_mul_by_28:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1471,7 +1471,7 @@ define i32 @test_mul_by_29(i32 %x) {
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_29: ; X64-JAG-LABEL: test_mul_by_29:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1489,8 +1489,8 @@ define i32 @test_mul_by_29(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_29: ; HSW-NOOPT-LABEL: test_mul_by_29:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_29: ; JAG-NOOPT-LABEL: test_mul_by_29:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1523,10 +1523,10 @@ define i32 @test_mul_by_30(i32 %x) {
; X64-HSW-LABEL: test_mul_by_30: ; X64-HSW-LABEL: test_mul_by_30:
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] ; X64-HSW-NEXT: shll $5, %eax # sched: [1:1.00]
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_30: ; X64-JAG-LABEL: test_mul_by_30:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1543,8 +1543,8 @@ define i32 @test_mul_by_30(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_30: ; HSW-NOOPT-LABEL: test_mul_by_30:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_30: ; JAG-NOOPT-LABEL: test_mul_by_30:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1576,9 +1576,9 @@ define i32 @test_mul_by_31(i32 %x) {
; X64-HSW-LABEL: test_mul_by_31: ; X64-HSW-LABEL: test_mul_by_31:
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] ; X64-HSW-NEXT: shll $5, %eax # sched: [1:1.00]
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_31: ; X64-JAG-LABEL: test_mul_by_31:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1594,8 +1594,8 @@ define i32 @test_mul_by_31(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_31: ; HSW-NOOPT-LABEL: test_mul_by_31:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_31: ; JAG-NOOPT-LABEL: test_mul_by_31:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1626,9 +1626,9 @@ define i32 @test_mul_by_32(i32 %x) {
; ;
; X64-HSW-LABEL: test_mul_by_32: ; X64-HSW-LABEL: test_mul_by_32:
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: shll $5, %edi # sched: [1:0.50] ; X64-HSW-NEXT: shll $5, %edi # sched: [1:1.00]
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG-LABEL: test_mul_by_32:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1644,9 +1644,9 @@ define i32 @test_mul_by_32(i32 %x) {
; ;
; HSW-NOOPT-LABEL: test_mul_by_32: ; HSW-NOOPT-LABEL: test_mul_by_32:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] ; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:1.00]
; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT-LABEL: test_mul_by_32:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1686,8 +1686,8 @@ define i32 @test_mul_spec(i32 %x) nounwind {
; X64-HSW-NEXT: addl $42, %ecx # sched: [1:0.25] ; X64-HSW-NEXT: addl $42, %ecx # sched: [1:0.25]
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; X64-HSW-NEXT: addl $2, %eax # sched: [1:0.25] ; X64-HSW-NEXT: addl $2, %eax # sched: [1:0.25]
; X64-HSW-NEXT: imull %ecx, %eax # sched: [4:1.00] ; X64-HSW-NEXT: imull %ecx, %eax # sched: [3:1.00]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_spec: ; X64-JAG-LABEL: test_mul_spec:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1712,8 +1712,8 @@ define i32 @test_mul_spec(i32 %x) nounwind {
; HSW-NOOPT-NEXT: addl $42, %ecx # sched: [1:0.25] ; HSW-NOOPT-NEXT: addl $42, %ecx # sched: [1:0.25]
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
; HSW-NOOPT-NEXT: addl $2, %eax # sched: [1:0.25] ; HSW-NOOPT-NEXT: addl $2, %eax # sched: [1:0.25]
; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [4:1.00] ; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_spec: ; JAG-NOOPT-LABEL: test_mul_spec:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:

View File

@ -18,7 +18,7 @@ define i64 @test_mul_by_1(i64 %x) nounwind {
; X64-HSW-LABEL: test_mul_by_1: ; X64-HSW-LABEL: test_mul_by_1:
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_1: ; X64-JAG-LABEL: test_mul_by_1:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -34,7 +34,7 @@ define i64 @test_mul_by_1(i64 %x) nounwind {
; HSW-NOOPT-LABEL: test_mul_by_1: ; HSW-NOOPT-LABEL: test_mul_by_1:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_1: ; JAG-NOOPT-LABEL: test_mul_by_1:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -66,7 +66,7 @@ define i64 @test_mul_by_2(i64 %x) {
; X64-HSW-LABEL: test_mul_by_2: ; X64-HSW-LABEL: test_mul_by_2:
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_2: ; X64-JAG-LABEL: test_mul_by_2:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -84,7 +84,7 @@ define i64 @test_mul_by_2(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_2: ; HSW-NOOPT-LABEL: test_mul_by_2:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_2: ; JAG-NOOPT-LABEL: test_mul_by_2:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -116,7 +116,7 @@ define i64 @test_mul_by_3(i64 %x) {
; X64-HSW-LABEL: test_mul_by_3: ; X64-HSW-LABEL: test_mul_by_3:
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_3: ; X64-JAG-LABEL: test_mul_by_3:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -134,7 +134,7 @@ define i64 @test_mul_by_3(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_3: ; HSW-NOOPT-LABEL: test_mul_by_3:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_3: ; JAG-NOOPT-LABEL: test_mul_by_3:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -166,7 +166,7 @@ define i64 @test_mul_by_4(i64 %x) {
; X64-HSW-LABEL: test_mul_by_4: ; X64-HSW-LABEL: test_mul_by_4:
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_4: ; X64-JAG-LABEL: test_mul_by_4:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -184,7 +184,7 @@ define i64 @test_mul_by_4(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_4: ; HSW-NOOPT-LABEL: test_mul_by_4:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_4: ; JAG-NOOPT-LABEL: test_mul_by_4:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -216,7 +216,7 @@ define i64 @test_mul_by_5(i64 %x) {
; X64-HSW-LABEL: test_mul_by_5: ; X64-HSW-LABEL: test_mul_by_5:
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_5: ; X64-JAG-LABEL: test_mul_by_5:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -234,7 +234,7 @@ define i64 @test_mul_by_5(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_5: ; HSW-NOOPT-LABEL: test_mul_by_5:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_5: ; JAG-NOOPT-LABEL: test_mul_by_5:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -268,7 +268,7 @@ define i64 @test_mul_by_6(i64 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] ; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_6: ; X64-JAG-LABEL: test_mul_by_6:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -287,7 +287,7 @@ define i64 @test_mul_by_6(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_6: ; HSW-NOOPT-LABEL: test_mul_by_6:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_6: ; JAG-NOOPT-LABEL: test_mul_by_6:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -323,7 +323,7 @@ define i64 @test_mul_by_7(i64 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_7: ; X64-JAG-LABEL: test_mul_by_7:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -342,7 +342,7 @@ define i64 @test_mul_by_7(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_7: ; HSW-NOOPT-LABEL: test_mul_by_7:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_7: ; JAG-NOOPT-LABEL: test_mul_by_7:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -375,7 +375,7 @@ define i64 @test_mul_by_8(i64 %x) {
; X64-HSW-LABEL: test_mul_by_8: ; X64-HSW-LABEL: test_mul_by_8:
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_8: ; X64-JAG-LABEL: test_mul_by_8:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -393,7 +393,7 @@ define i64 @test_mul_by_8(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_8: ; HSW-NOOPT-LABEL: test_mul_by_8:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_8: ; JAG-NOOPT-LABEL: test_mul_by_8:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -425,7 +425,7 @@ define i64 @test_mul_by_9(i64 %x) {
; X64-HSW-LABEL: test_mul_by_9: ; X64-HSW-LABEL: test_mul_by_9:
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_9: ; X64-JAG-LABEL: test_mul_by_9:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -443,7 +443,7 @@ define i64 @test_mul_by_9(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_9: ; HSW-NOOPT-LABEL: test_mul_by_9:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_9: ; JAG-NOOPT-LABEL: test_mul_by_9:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -477,7 +477,7 @@ define i64 @test_mul_by_10(i64 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] ; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_10: ; X64-JAG-LABEL: test_mul_by_10:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -496,7 +496,7 @@ define i64 @test_mul_by_10(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_10: ; HSW-NOOPT-LABEL: test_mul_by_10:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_10: ; JAG-NOOPT-LABEL: test_mul_by_10:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -532,7 +532,7 @@ define i64 @test_mul_by_11(i64 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_11: ; X64-JAG-LABEL: test_mul_by_11:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -551,7 +551,7 @@ define i64 @test_mul_by_11(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_11: ; HSW-NOOPT-LABEL: test_mul_by_11:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_11: ; JAG-NOOPT-LABEL: test_mul_by_11:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -585,7 +585,7 @@ define i64 @test_mul_by_12(i64 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_12: ; X64-JAG-LABEL: test_mul_by_12:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -604,7 +604,7 @@ define i64 @test_mul_by_12(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_12: ; HSW-NOOPT-LABEL: test_mul_by_12:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_12: ; JAG-NOOPT-LABEL: test_mul_by_12:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -640,7 +640,7 @@ define i64 @test_mul_by_13(i64 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_13: ; X64-JAG-LABEL: test_mul_by_13:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -659,7 +659,7 @@ define i64 @test_mul_by_13(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_13: ; HSW-NOOPT-LABEL: test_mul_by_13:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_13: ; JAG-NOOPT-LABEL: test_mul_by_13:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -696,7 +696,7 @@ define i64 @test_mul_by_14(i64 %x) {
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_14: ; X64-JAG-LABEL: test_mul_by_14:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -716,7 +716,7 @@ define i64 @test_mul_by_14(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_14: ; HSW-NOOPT-LABEL: test_mul_by_14:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_14: ; JAG-NOOPT-LABEL: test_mul_by_14:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -751,7 +751,7 @@ define i64 @test_mul_by_15(i64 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_15: ; X64-JAG-LABEL: test_mul_by_15:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -770,7 +770,7 @@ define i64 @test_mul_by_15(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_15: ; HSW-NOOPT-LABEL: test_mul_by_15:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_15: ; JAG-NOOPT-LABEL: test_mul_by_15:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -804,7 +804,7 @@ define i64 @test_mul_by_16(i64 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50]
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG-LABEL: test_mul_by_16:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -824,7 +824,7 @@ define i64 @test_mul_by_16(i64 %x) {
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] ; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50]
; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT-LABEL: test_mul_by_16:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -864,7 +864,7 @@ define i64 @test_mul_by_17(i64 %x) {
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50] ; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_17: ; X64-JAG-LABEL: test_mul_by_17:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -884,7 +884,7 @@ define i64 @test_mul_by_17(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_17: ; HSW-NOOPT-LABEL: test_mul_by_17:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_17: ; JAG-NOOPT-LABEL: test_mul_by_17:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -920,7 +920,7 @@ define i64 @test_mul_by_18(i64 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] ; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_18: ; X64-JAG-LABEL: test_mul_by_18:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -939,7 +939,7 @@ define i64 @test_mul_by_18(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_18: ; HSW-NOOPT-LABEL: test_mul_by_18:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_18: ; JAG-NOOPT-LABEL: test_mul_by_18:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -977,7 +977,7 @@ define i64 @test_mul_by_19(i64 %x) {
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: shlq $2, %rax # sched: [1:0.50] ; X64-HSW-NEXT: shlq $2, %rax # sched: [1:0.50]
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_19: ; X64-JAG-LABEL: test_mul_by_19:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -997,7 +997,7 @@ define i64 @test_mul_by_19(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_19: ; HSW-NOOPT-LABEL: test_mul_by_19:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_19: ; JAG-NOOPT-LABEL: test_mul_by_19:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1031,7 +1031,7 @@ define i64 @test_mul_by_20(i64 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_20: ; X64-JAG-LABEL: test_mul_by_20:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1050,7 +1050,7 @@ define i64 @test_mul_by_20(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_20: ; HSW-NOOPT-LABEL: test_mul_by_20:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_20: ; JAG-NOOPT-LABEL: test_mul_by_20:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1086,7 +1086,7 @@ define i64 @test_mul_by_21(i64 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_21: ; X64-JAG-LABEL: test_mul_by_21:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1105,7 +1105,7 @@ define i64 @test_mul_by_21(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_21: ; HSW-NOOPT-LABEL: test_mul_by_21:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_21: ; JAG-NOOPT-LABEL: test_mul_by_21:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1142,7 +1142,7 @@ define i64 @test_mul_by_22(i64 %x) {
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_22: ; X64-JAG-LABEL: test_mul_by_22:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1162,7 +1162,7 @@ define i64 @test_mul_by_22(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_22: ; HSW-NOOPT-LABEL: test_mul_by_22:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_22: ; JAG-NOOPT-LABEL: test_mul_by_22:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1199,7 +1199,7 @@ define i64 @test_mul_by_23(i64 %x) {
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: shlq $3, %rax # sched: [1:0.50] ; X64-HSW-NEXT: shlq $3, %rax # sched: [1:0.50]
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_23: ; X64-JAG-LABEL: test_mul_by_23:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1219,7 +1219,7 @@ define i64 @test_mul_by_23(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_23: ; HSW-NOOPT-LABEL: test_mul_by_23:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_23: ; JAG-NOOPT-LABEL: test_mul_by_23:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1253,7 +1253,7 @@ define i64 @test_mul_by_24(i64 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: shlq $3, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: shlq $3, %rdi # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_24: ; X64-JAG-LABEL: test_mul_by_24:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1272,7 +1272,7 @@ define i64 @test_mul_by_24(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_24: ; HSW-NOOPT-LABEL: test_mul_by_24:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_24: ; JAG-NOOPT-LABEL: test_mul_by_24:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1308,7 +1308,7 @@ define i64 @test_mul_by_25(i64 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_25: ; X64-JAG-LABEL: test_mul_by_25:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1327,7 +1327,7 @@ define i64 @test_mul_by_25(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_25: ; HSW-NOOPT-LABEL: test_mul_by_25:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_25: ; JAG-NOOPT-LABEL: test_mul_by_25:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1365,7 +1365,7 @@ define i64 @test_mul_by_26(i64 %x) {
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_26: ; X64-JAG-LABEL: test_mul_by_26:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1385,7 +1385,7 @@ define i64 @test_mul_by_26(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_26: ; HSW-NOOPT-LABEL: test_mul_by_26:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_26: ; JAG-NOOPT-LABEL: test_mul_by_26:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1420,7 +1420,7 @@ define i64 @test_mul_by_27(i64 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_27: ; X64-JAG-LABEL: test_mul_by_27:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1439,7 +1439,7 @@ define i64 @test_mul_by_27(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_27: ; HSW-NOOPT-LABEL: test_mul_by_27:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_27: ; JAG-NOOPT-LABEL: test_mul_by_27:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1477,7 +1477,7 @@ define i64 @test_mul_by_28(i64 %x) {
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_28: ; X64-JAG-LABEL: test_mul_by_28:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1497,7 +1497,7 @@ define i64 @test_mul_by_28(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_28: ; HSW-NOOPT-LABEL: test_mul_by_28:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_28: ; JAG-NOOPT-LABEL: test_mul_by_28:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1536,7 +1536,7 @@ define i64 @test_mul_by_29(i64 %x) {
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_29: ; X64-JAG-LABEL: test_mul_by_29:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1557,7 +1557,7 @@ define i64 @test_mul_by_29(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_29: ; HSW-NOOPT-LABEL: test_mul_by_29:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_29: ; JAG-NOOPT-LABEL: test_mul_by_29:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1596,7 +1596,7 @@ define i64 @test_mul_by_30(i64 %x) {
; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] ; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_30: ; X64-JAG-LABEL: test_mul_by_30:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1617,7 +1617,7 @@ define i64 @test_mul_by_30(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_30: ; HSW-NOOPT-LABEL: test_mul_by_30:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_30: ; JAG-NOOPT-LABEL: test_mul_by_30:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1654,7 +1654,7 @@ define i64 @test_mul_by_31(i64 %x) {
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] ; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_31: ; X64-JAG-LABEL: test_mul_by_31:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1674,7 +1674,7 @@ define i64 @test_mul_by_31(i64 %x) {
; HSW-NOOPT-LABEL: test_mul_by_31: ; HSW-NOOPT-LABEL: test_mul_by_31:
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_31: ; JAG-NOOPT-LABEL: test_mul_by_31:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1709,7 +1709,7 @@ define i64 @test_mul_by_32(i64 %x) {
; X64-HSW: # BB#0: ; X64-HSW: # BB#0:
; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50]
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG-LABEL: test_mul_by_32:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1729,7 +1729,7 @@ define i64 @test_mul_by_32(i64 %x) {
; HSW-NOOPT: # BB#0: ; HSW-NOOPT: # BB#0:
; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] ; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50]
; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT-LABEL: test_mul_by_32:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:
@ -1793,7 +1793,7 @@ define i64 @test_mul_spec(i64 %x) nounwind {
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; X64-HSW-NEXT: addq $2, %rax # sched: [1:0.25] ; X64-HSW-NEXT: addq $2, %rax # sched: [1:0.25]
; X64-HSW-NEXT: imulq %rcx, %rax # sched: [3:1.00] ; X64-HSW-NEXT: imulq %rcx, %rax # sched: [3:1.00]
; X64-HSW-NEXT: retq # sched: [1:1.00] ; X64-HSW-NEXT: retq # sched: [2:1.00]
; ;
; X64-JAG-LABEL: test_mul_spec: ; X64-JAG-LABEL: test_mul_spec:
; X64-JAG: # BB#0: ; X64-JAG: # BB#0:
@ -1841,7 +1841,7 @@ define i64 @test_mul_spec(i64 %x) nounwind {
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
; HSW-NOOPT-NEXT: addq $2, %rax # sched: [1:0.25] ; HSW-NOOPT-NEXT: addq $2, %rax # sched: [1:0.25]
; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00] ; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
; ;
; JAG-NOOPT-LABEL: test_mul_spec: ; JAG-NOOPT-LABEL: test_mul_spec:
; JAG-NOOPT: # BB#0: ; JAG-NOOPT: # BB#0:

View File

@ -59,8 +59,8 @@ define void @foo() local_unnamed_addr {
; X86-NEXT: cmovnel %ecx, %esi ; X86-NEXT: cmovnel %ecx, %esi
; X86-NEXT: cmpl %edx, %edi ; X86-NEXT: cmpl %edx, %edi
; X86-NEXT: movl %ebp, var_50+4 ; X86-NEXT: movl %ebp, var_50+4
; X86-NEXT: movl %esi, var_50
; X86-NEXT: setge var_205 ; X86-NEXT: setge var_205
; X86-NEXT: movl %esi, var_50
; X86-NEXT: imull %eax, %ebx ; X86-NEXT: imull %eax, %ebx
; X86-NEXT: movb %bl, var_218 ; X86-NEXT: movb %bl, var_218
; X86-NEXT: popl %esi ; X86-NEXT: popl %esi

View File

@ -45,15 +45,15 @@ define float @f32_no_estimate(float %x) #0 {
; ;
; SANDY-LABEL: f32_no_estimate: ; SANDY-LABEL: f32_no_estimate:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00] ; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [14:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: f32_no_estimate: ; HASWELL-LABEL: f32_no_estimate:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00] ; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: f32_no_estimate: ; HASWELL-NO-FMA-LABEL: f32_no_estimate:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
@ -63,9 +63,9 @@ define float @f32_no_estimate(float %x) #0 {
; ;
; AVX512-LABEL: f32_no_estimate: ; AVX512-LABEL: f32_no_estimate:
; AVX512: # BB#0: ; AVX512: # BB#0:
; AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] ; AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
; AVX512-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00] ; AVX512-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
; AVX512-NEXT: retq # sched: [1:1.00] ; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast float 1.0, %x %div = fdiv fast float 1.0, %x
ret float %div ret float %div
} }
@ -113,18 +113,18 @@ define float @f32_one_step(float %x) #1 {
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: f32_one_step: ; HASWELL-LABEL: f32_one_step:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 ; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 ; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: f32_one_step: ; HASWELL-NO-FMA-LABEL: f32_one_step:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
@ -139,9 +139,9 @@ define float @f32_one_step(float %x) #1 {
; AVX512-LABEL: f32_one_step: ; AVX512-LABEL: f32_one_step:
; AVX512: # BB#0: ; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 ; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 ; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: retq # sched: [1:1.00] ; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast float 1.0, %x %div = fdiv fast float 1.0, %x
ret float %div ret float %div
} }
@ -207,7 +207,7 @@ define float @f32_two_step(float %x) #2 {
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] ; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@ -215,18 +215,18 @@ define float @f32_two_step(float %x) #2 {
; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: f32_two_step: ; HASWELL-LABEL: f32_two_step:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] ; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 ; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 ; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 ; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 ; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: f32_two_step: ; HASWELL-NO-FMA-LABEL: f32_two_step:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
@ -245,13 +245,13 @@ define float @f32_two_step(float %x) #2 {
; AVX512-LABEL: f32_two_step: ; AVX512-LABEL: f32_two_step:
; AVX512: # BB#0: ; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] ; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 ; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 ; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 ; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 ; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: retq # sched: [1:1.00] ; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast float 1.0, %x %div = fdiv fast float 1.0, %x
ret float %div ret float %div
} }
@ -284,15 +284,15 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
; ;
; SANDY-LABEL: v4f32_no_estimate: ; SANDY-LABEL: v4f32_no_estimate:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] ; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: v4f32_no_estimate: ; HASWELL-LABEL: v4f32_no_estimate:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [?:5.000000e-01]
; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] ; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: v4f32_no_estimate: ; HASWELL-NO-FMA-LABEL: v4f32_no_estimate:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
@ -302,9 +302,9 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
; ;
; AVX512-LABEL: v4f32_no_estimate: ; AVX512-LABEL: v4f32_no_estimate:
; AVX512: # BB#0: ; AVX512: # BB#0:
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50] ; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [?:5.000000e-01]
; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] ; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
; AVX512-NEXT: retq # sched: [1:1.00] ; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div ret <4 x float> %div
} }
@ -350,21 +350,21 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
; ;
; SANDY-LABEL: v4f32_one_step: ; SANDY-LABEL: v4f32_one_step:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: v4f32_one_step: ; HASWELL-LABEL: v4f32_one_step:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 ; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 ; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: v4f32_one_step: ; HASWELL-NO-FMA-LABEL: v4f32_one_step:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
@ -379,17 +379,17 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
; KNL-LABEL: v4f32_one_step: ; KNL-LABEL: v4f32_one_step:
; KNL: # BB#0: ; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 ; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 ; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [1:1.00] ; KNL-NEXT: retq # sched: [2:1.00]
; ;
; SKX-LABEL: v4f32_one_step: ; SKX-LABEL: v4f32_one_step:
; SKX: # BB#0: ; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1 ; SKX-NEXT: vrcp14ps %xmm0, %xmm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 ; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [1:1.00] ; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div ret <4 x float> %div
} }
@ -453,9 +453,9 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; ;
; SANDY-LABEL: v4f32_two_step: ; SANDY-LABEL: v4f32_two_step:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] ; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@ -463,18 +463,18 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: v4f32_two_step: ; HASWELL-LABEL: v4f32_two_step:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] ; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 ; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 ; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 ; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 ; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: v4f32_two_step: ; HASWELL-NO-FMA-LABEL: v4f32_two_step:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
@ -493,24 +493,24 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; KNL-LABEL: v4f32_two_step: ; KNL-LABEL: v4f32_two_step:
; KNL: # BB#0: ; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] ; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 ; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 ; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 ; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 ; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [1:1.00] ; KNL-NEXT: retq # sched: [2:1.00]
; ;
; SKX-LABEL: v4f32_two_step: ; SKX-LABEL: v4f32_two_step:
; SKX: # BB#0: ; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1 ; SKX-NEXT: vrcp14ps %xmm0, %xmm1
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 ; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 ; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 ; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 ; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [1:1.00] ; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div ret <4 x float> %div
} }
@ -546,15 +546,15 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
; ;
; SANDY-LABEL: v8f32_no_estimate: ; SANDY-LABEL: v8f32_no_estimate:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [12:1.00] ; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:3.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: v8f32_no_estimate: ; HASWELL-LABEL: v8f32_no_estimate:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00] ; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: v8f32_no_estimate: ; HASWELL-NO-FMA-LABEL: v8f32_no_estimate:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
@ -565,8 +565,8 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
; AVX512-LABEL: v8f32_no_estimate: ; AVX512-LABEL: v8f32_no_estimate:
; AVX512: # BB#0: ; AVX512: # BB#0:
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00] ; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00] ; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:2.00]
; AVX512-NEXT: retq # sched: [1:1.00] ; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div ret <8 x float> %div
} }
@ -621,19 +621,19 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: v8f32_one_step: ; HASWELL-LABEL: v8f32_one_step:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 ; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 ; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: v8f32_one_step: ; HASWELL-NO-FMA-LABEL: v8f32_one_step:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
@ -647,18 +647,18 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
; ;
; KNL-LABEL: v8f32_one_step: ; KNL-LABEL: v8f32_one_step:
; KNL: # BB#0: ; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 ; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 ; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [1:1.00] ; KNL-NEXT: retq # sched: [2:1.00]
; ;
; SKX-LABEL: v8f32_one_step: ; SKX-LABEL: v8f32_one_step:
; SKX: # BB#0: ; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1 ; SKX-NEXT: vrcp14ps %ymm0, %ymm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 ; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [1:1.00] ; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div ret <8 x float> %div
} }
@ -737,7 +737,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] ; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] ; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
@ -745,18 +745,18 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: v8f32_two_step: ; HASWELL-LABEL: v8f32_two_step:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] ; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 ; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 ; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 ; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 ; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: v8f32_two_step: ; HASWELL-NO-FMA-LABEL: v8f32_two_step:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
@ -774,25 +774,25 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; ;
; KNL-LABEL: v8f32_two_step: ; KNL-LABEL: v8f32_two_step:
; KNL: # BB#0: ; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] ; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 ; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 ; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 ; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 ; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [1:1.00] ; KNL-NEXT: retq # sched: [2:1.00]
; ;
; SKX-LABEL: v8f32_two_step: ; SKX-LABEL: v8f32_two_step:
; SKX: # BB#0: ; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1 ; SKX-NEXT: vrcp14ps %ymm0, %ymm1
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] ; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 ; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 ; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 ; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 ; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [1:1.00] ; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div ret <8 x float> %div
} }

View File

@ -39,26 +39,26 @@ define float @f32_no_step_2(float %x) #3 {
; SANDY-LABEL: f32_no_step_2: ; SANDY-LABEL: f32_no_step_2:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: f32_no_step_2: ; HASWELL-LABEL: f32_no_step_2:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: f32_no_step_2: ; HASWELL-NO-FMA-LABEL: f32_no_step_2:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
; ;
; AVX512-LABEL: f32_no_step_2: ; AVX512-LABEL: f32_no_step_2:
; AVX512: # BB#0: ; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: retq # sched: [1:1.00] ; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast float 1234.0, %x %div = fdiv fast float 1234.0, %x
ret float %div ret float %div
} }
@ -110,39 +110,39 @@ define float @f32_one_step_2(float %x) #1 {
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: f32_one_step_2: ; HASWELL-LABEL: f32_one_step_2:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 ; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 ; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: f32_one_step_2: ; HASWELL-NO-FMA-LABEL: f32_one_step_2:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
; ;
; AVX512-LABEL: f32_one_step_2: ; AVX512-LABEL: f32_one_step_2:
; AVX512: # BB#0: ; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 ; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 ; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: retq # sched: [1:1.00] ; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast float 3456.0, %x %div = fdiv fast float 3456.0, %x
ret float %div ret float %div
} }
@ -198,43 +198,43 @@ define float @f32_one_step_2_divs(float %x) #1 {
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: f32_one_step_2_divs: ; HASWELL-LABEL: f32_one_step_2_divs:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 ; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 ; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] ; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: f32_one_step_2_divs: ; HASWELL-NO-FMA-LABEL: f32_one_step_2_divs:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] ; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
; ;
; AVX512-LABEL: f32_one_step_2_divs: ; AVX512-LABEL: f32_one_step_2_divs:
; AVX512: # BB#0: ; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 ; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 ; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] ; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: retq # sched: [1:1.00] ; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast float 3456.0, %x %div = fdiv fast float 3456.0, %x
%div2 = fdiv fast float %div, %x %div2 = fdiv fast float %div, %x
ret float %div2 ret float %div2
@ -305,7 +305,7 @@ define float @f32_two_step_2(float %x) #2 {
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] ; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@ -313,26 +313,26 @@ define float @f32_two_step_2(float %x) #2 {
; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: f32_two_step_2: ; HASWELL-LABEL: f32_two_step_2:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] ; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 ; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 ; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 ; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 ; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: f32_two_step_2: ; HASWELL-NO-FMA-LABEL: f32_two_step_2:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@ -340,20 +340,20 @@ define float @f32_two_step_2(float %x) #2 {
; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
; ;
; AVX512-LABEL: f32_two_step_2: ; AVX512-LABEL: f32_two_step_2:
; AVX512: # BB#0: ; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] ; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 ; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 ; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 ; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 ; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; AVX512-NEXT: retq # sched: [1:1.00] ; AVX512-NEXT: retq # sched: [2:1.00]
%div = fdiv fast float 6789.0, %x %div = fdiv fast float 6789.0, %x
ret float %div ret float %div
} }
@ -403,51 +403,51 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
; ;
; SANDY-LABEL: v4f32_one_step2: ; SANDY-LABEL: v4f32_one_step2:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: v4f32_one_step2: ; HASWELL-LABEL: v4f32_one_step2:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 ; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 ; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: v4f32_one_step2: ; HASWELL-NO-FMA-LABEL: v4f32_one_step2:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
; ;
; KNL-LABEL: v4f32_one_step2: ; KNL-LABEL: v4f32_one_step2:
; KNL: # BB#0: ; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 ; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 ; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [1:1.00] ; KNL-NEXT: retq # sched: [2:1.00]
; ;
; SKX-LABEL: v4f32_one_step2: ; SKX-LABEL: v4f32_one_step2:
; SKX: # BB#0: ; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1 ; SKX-NEXT: vrcp14ps %xmm0, %xmm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 ; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [1:1.00] ; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x %div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
ret <4 x float> %div ret <4 x float> %div
} }
@ -501,56 +501,56 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
; ;
; SANDY-LABEL: v4f32_one_step_2_divs: ; SANDY-LABEL: v4f32_one_step_2_divs:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: v4f32_one_step_2_divs: ; HASWELL-LABEL: v4f32_one_step_2_divs:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 ; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 ; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] ; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: v4f32_one_step_2_divs: ; HASWELL-NO-FMA-LABEL: v4f32_one_step_2_divs:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] ; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
; ;
; KNL-LABEL: v4f32_one_step_2_divs: ; KNL-LABEL: v4f32_one_step_2_divs:
; KNL: # BB#0: ; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 ; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 ; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] ; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [1:1.00] ; KNL-NEXT: retq # sched: [2:1.00]
; ;
; SKX-LABEL: v4f32_one_step_2_divs: ; SKX-LABEL: v4f32_one_step_2_divs:
; SKX: # BB#0: ; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1 ; SKX-NEXT: vrcp14ps %xmm0, %xmm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 ; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [1:1.00] ; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x %div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
%div2 = fdiv fast <4 x float> %div, %x %div2 = fdiv fast <4 x float> %div, %x
ret <4 x float> %div2 ret <4 x float> %div2
@ -619,9 +619,9 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; ;
; SANDY-LABEL: v4f32_two_step2: ; SANDY-LABEL: v4f32_two_step2:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] ; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@ -629,26 +629,26 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: v4f32_two_step2: ; HASWELL-LABEL: v4f32_two_step2:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] ; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 ; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 ; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 ; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 ; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: v4f32_two_step2: ; HASWELL-NO-FMA-LABEL: v4f32_two_step2:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3 # sched: [4:0.50] ; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3 # sched: [?:5.000000e-01]
; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@ -656,32 +656,32 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
; ;
; KNL-LABEL: v4f32_two_step2: ; KNL-LABEL: v4f32_two_step2:
; KNL: # BB#0: ; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] ; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 ; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 ; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 ; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 ; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [1:1.00] ; KNL-NEXT: retq # sched: [2:1.00]
; ;
; SKX-LABEL: v4f32_two_step2: ; SKX-LABEL: v4f32_two_step2:
; SKX: # BB#0: ; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1 ; SKX-NEXT: vrcp14ps %xmm0, %xmm1
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 ; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 ; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 ; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 ; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [1:1.00] ; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x %div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
ret <4 x float> %div ret <4 x float> %div
} }
@ -741,49 +741,49 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] ; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:2.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: v8f32_one_step2: ; HASWELL-LABEL: v8f32_one_step2:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 ; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 ; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] ; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: v8f32_one_step2: ; HASWELL-NO-FMA-LABEL: v8f32_one_step2:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] ; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
; ;
; KNL-LABEL: v8f32_one_step2: ; KNL-LABEL: v8f32_one_step2:
; KNL: # BB#0: ; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 ; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 ; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] ; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [1:1.00] ; KNL-NEXT: retq # sched: [2:1.00]
; ;
; SKX-LABEL: v8f32_one_step2: ; SKX-LABEL: v8f32_one_step2:
; SKX: # BB#0: ; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1 ; SKX-NEXT: vrcp14ps %ymm0, %ymm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 ; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] ; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [1:1.00] ; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x %div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
ret <8 x float> %div ret <8 x float> %div
} }
@ -848,54 +848,54 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] ; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:2.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: v8f32_one_step_2_divs: ; HASWELL-LABEL: v8f32_one_step_2_divs:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 ; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 ; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] ; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: v8f32_one_step_2_divs: ; HASWELL-NO-FMA-LABEL: v8f32_one_step_2_divs:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] ; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
; ;
; KNL-LABEL: v8f32_one_step_2_divs: ; KNL-LABEL: v8f32_one_step_2_divs:
; KNL: # BB#0: ; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 ; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 ; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] ; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [1:1.00] ; KNL-NEXT: retq # sched: [2:1.00]
; ;
; SKX-LABEL: v8f32_one_step_2_divs: ; SKX-LABEL: v8f32_one_step_2_divs:
; SKX: # BB#0: ; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1 ; SKX-NEXT: vrcp14ps %ymm0, %ymm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 ; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] ; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [1:1.00] ; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x %div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
%div2 = fdiv fast <8 x float> %div, %x %div2 = fdiv fast <8 x float> %div, %x
ret <8 x float> %div2 ret <8 x float> %div2
@ -980,7 +980,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] ; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] ; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
@ -988,59 +988,59 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] ; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:2.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: v8f32_two_step2: ; HASWELL-LABEL: v8f32_two_step2:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] ; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 ; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 ; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 ; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 ; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] ; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: v8f32_two_step2: ; HASWELL-NO-FMA-LABEL: v8f32_two_step2:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] ; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
; ;
; KNL-LABEL: v8f32_two_step2: ; KNL-LABEL: v8f32_two_step2:
; KNL: # BB#0: ; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] ; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 ; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 ; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 ; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 ; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] ; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [1:1.00] ; KNL-NEXT: retq # sched: [2:1.00]
; ;
; SKX-LABEL: v8f32_two_step2: ; SKX-LABEL: v8f32_two_step2:
; SKX: # BB#0: ; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1 ; SKX-NEXT: vrcp14ps %ymm0, %ymm1
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] ; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 ; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 ; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 ; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 ; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] ; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [1:1.00] ; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x %div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
ret <8 x float> %div ret <8 x float> %div
} }
@ -1070,27 +1070,27 @@ define <8 x float> @v8f32_no_step(<8 x float> %x) #3 {
; SANDY-LABEL: v8f32_no_step: ; SANDY-LABEL: v8f32_no_step:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: v8f32_no_step: ; HASWELL-LABEL: v8f32_no_step:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] ; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: v8f32_no_step: ; HASWELL-NO-FMA-LABEL: v8f32_no_step:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] ; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
; ;
; KNL-LABEL: v8f32_no_step: ; KNL-LABEL: v8f32_no_step:
; KNL: # BB#0: ; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] ; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
; KNL-NEXT: retq # sched: [1:1.00] ; KNL-NEXT: retq # sched: [2:1.00]
; ;
; SKX-LABEL: v8f32_no_step: ; SKX-LABEL: v8f32_no_step:
; SKX: # BB#0: ; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm0 ; SKX-NEXT: vrcp14ps %ymm0, %ymm0
; SKX-NEXT: retq # sched: [1:1.00] ; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div ret <8 x float> %div
} }
@ -1125,32 +1125,32 @@ define <8 x float> @v8f32_no_step2(<8 x float> %x) #3 {
; SANDY-LABEL: v8f32_no_step2: ; SANDY-LABEL: v8f32_no_step2:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] ; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:2.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: v8f32_no_step2: ; HASWELL-LABEL: v8f32_no_step2:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] ; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] ; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; HASWELL-NO-FMA-LABEL: v8f32_no_step2: ; HASWELL-NO-FMA-LABEL: v8f32_no_step2:
; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] ; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] ; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
; ;
; KNL-LABEL: v8f32_no_step2: ; KNL-LABEL: v8f32_no_step2:
; KNL: # BB#0: ; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] ; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] ; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; KNL-NEXT: retq # sched: [1:1.00] ; KNL-NEXT: retq # sched: [2:1.00]
; ;
; SKX-LABEL: v8f32_no_step2: ; SKX-LABEL: v8f32_no_step2:
; SKX: # BB#0: ; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm0 ; SKX-NEXT: vrcp14ps %ymm0, %ymm0
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] ; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
; SKX-NEXT: retq # sched: [1:1.00] ; SKX-NEXT: retq # sched: [2:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x %div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
ret <8 x float> %div ret <8 x float> %div
} }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -31,14 +31,14 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SANDY-LABEL: test_addsubpd: ; SANDY-LABEL: test_addsubpd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_addsubpd: ; HASWELL-LABEL: test_addsubpd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_addsubpd: ; BTVER2-LABEL: test_addsubpd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -74,14 +74,14 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; SANDY-LABEL: test_addsubps: ; SANDY-LABEL: test_addsubps:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_addsubps: ; HASWELL-LABEL: test_addsubps:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_addsubps: ; BTVER2-LABEL: test_addsubps:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -116,15 +116,15 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; ;
; SANDY-LABEL: test_haddpd: ; SANDY-LABEL: test_haddpd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_haddpd: ; HASWELL-LABEL: test_haddpd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_haddpd: ; BTVER2-LABEL: test_haddpd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -159,15 +159,15 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
; ;
; SANDY-LABEL: test_haddps: ; SANDY-LABEL: test_haddps:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_haddps: ; HASWELL-LABEL: test_haddps:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_haddps: ; BTVER2-LABEL: test_haddps:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -202,15 +202,15 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; ;
; SANDY-LABEL: test_hsubpd: ; SANDY-LABEL: test_hsubpd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_hsubpd: ; HASWELL-LABEL: test_hsubpd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_hsubpd: ; BTVER2-LABEL: test_hsubpd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -245,15 +245,15 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
; ;
; SANDY-LABEL: test_hsubps: ; SANDY-LABEL: test_hsubps:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_hsubps: ; HASWELL-LABEL: test_hsubps:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_hsubps: ; BTVER2-LABEL: test_hsubps:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -287,13 +287,13 @@ define <16 x i8> @test_lddqu(i8* %a0) {
; ;
; SANDY-LABEL: test_lddqu: ; SANDY-LABEL: test_lddqu:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [4:0.50] ; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_lddqu: ; HASWELL-LABEL: test_lddqu:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [4:0.50] ; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [?:5.000000e-01]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_lddqu: ; BTVER2-LABEL: test_lddqu:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -330,16 +330,16 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
; SANDY-LABEL: test_movddup: ; SANDY-LABEL: test_movddup:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] ; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50] ; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_movddup: ; HASWELL-LABEL: test_movddup:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] ; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50] ; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [?:5.000000e-01]
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_movddup: ; BTVER2-LABEL: test_movddup:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -380,16 +380,16 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
; SANDY-LABEL: test_movshdup: ; SANDY-LABEL: test_movshdup:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] ; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50] ; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_movshdup: ; HASWELL-LABEL: test_movshdup:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] ; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50] ; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [?:5.000000e-01]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_movshdup: ; BTVER2-LABEL: test_movshdup:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -430,16 +430,16 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
; SANDY-LABEL: test_movsldup: ; SANDY-LABEL: test_movsldup:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] ; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50] ; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_movsldup: ; HASWELL-LABEL: test_movsldup:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] ; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50] ; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [?:5.000000e-01]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_movsldup: ; BTVER2-LABEL: test_movsldup:
; BTVER2: # BB#0: ; BTVER2: # BB#0:

View File

@ -25,17 +25,17 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; ;
; SANDY-LABEL: test_blendpd: ; SANDY-LABEL: test_blendpd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00]
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [5:0.50] ; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_blendpd: ; HASWELL-LABEL: test_blendpd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [5:0.50] ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_blendpd: ; BTVER2-LABEL: test_blendpd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -65,15 +65,15 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; ;
; SANDY-LABEL: test_blendps: ; SANDY-LABEL: test_blendps:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] ; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00]
; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [5:0.50] ; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_blendps: ; HASWELL-LABEL: test_blendps:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [5:0.50] ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_blendps: ; BTVER2-LABEL: test_blendps:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -107,15 +107,15 @@ define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; ;
; SANDY-LABEL: test_blendvpd: ; SANDY-LABEL: test_blendvpd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_blendvpd: ; HASWELL-LABEL: test_blendvpd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] ; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_blendvpd: ; BTVER2-LABEL: test_blendvpd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -150,15 +150,15 @@ define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; ;
; SANDY-LABEL: test_blendvps: ; SANDY-LABEL: test_blendvps:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_blendvps: ; HASWELL-LABEL: test_blendvps:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] ; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_blendvps: ; BTVER2-LABEL: test_blendvps:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -187,15 +187,15 @@ define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; ;
; SANDY-LABEL: test_dppd: ; SANDY-LABEL: test_dppd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_dppd: ; HASWELL-LABEL: test_dppd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00] ; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_dppd: ; BTVER2-LABEL: test_dppd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -224,15 +224,15 @@ define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2
; ;
; SANDY-LABEL: test_dpps: ; SANDY-LABEL: test_dpps:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00]
; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_dpps: ; HASWELL-LABEL: test_dpps:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00] ; HASWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [18:2.00] ; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [14:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_dpps: ; BTVER2-LABEL: test_dpps:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -262,14 +262,14 @@ define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2)
; SANDY-LABEL: test_insertps: ; SANDY-LABEL: test_insertps:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] ; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [5:1.00] ; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_insertps: ; HASWELL-LABEL: test_insertps:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] ; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [5:1.00] ; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_insertps: ; BTVER2-LABEL: test_insertps:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -296,13 +296,13 @@ define <2 x i64> @test_movntdqa(i8* %a0) {
; ;
; SANDY-LABEL: test_movntdqa: ; SANDY-LABEL: test_movntdqa:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [4:0.50] ; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_movntdqa: ; HASWELL-LABEL: test_movntdqa:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [4:0.50] ; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [?:5.000000e-01]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_movntdqa: ; BTVER2-LABEL: test_movntdqa:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -328,15 +328,15 @@ define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; ;
; SANDY-LABEL: test_mpsadbw: ; SANDY-LABEL: test_mpsadbw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [6:1.00] ; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_mpsadbw: ; HASWELL-LABEL: test_mpsadbw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [6:2.00] ; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] ; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_mpsadbw: ; BTVER2-LABEL: test_mpsadbw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -367,14 +367,14 @@ define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-LABEL: test_packusdw: ; SANDY-LABEL: test_packusdw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_packusdw: ; HASWELL-LABEL: test_packusdw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_packusdw: ; BTVER2-LABEL: test_packusdw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -411,14 +411,14 @@ define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16
; SANDY-LABEL: test_pblendvb: ; SANDY-LABEL: test_pblendvb:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pblendvb: ; HASWELL-LABEL: test_pblendvb:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] ; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pblendvb: ; BTVER2-LABEL: test_pblendvb:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -448,14 +448,14 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-LABEL: test_pblendw: ; SANDY-LABEL: test_pblendw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] ; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [5:0.50] ; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pblendw: ; HASWELL-LABEL: test_pblendw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] ; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [4:1.00] ; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pblendw: ; BTVER2-LABEL: test_pblendw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -484,14 +484,14 @@ define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SANDY-LABEL: test_pcmpeqq: ; SANDY-LABEL: test_pcmpeqq:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pcmpeqq: ; HASWELL-LABEL: test_pcmpeqq:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pcmpeqq: ; BTVER2-LABEL: test_pcmpeqq:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -521,15 +521,15 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) {
; ;
; SANDY-LABEL: test_pextrb: ; SANDY-LABEL: test_pextrb:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:0.50] ; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pextrb: ; HASWELL-LABEL: test_pextrb:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:1.00] ; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00] ; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pextrb: ; BTVER2-LABEL: test_pextrb:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -558,15 +558,15 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
; ;
; SANDY-LABEL: test_pextrd: ; SANDY-LABEL: test_pextrd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.50] ; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pextrd: ; HASWELL-LABEL: test_pextrd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:1.00] ; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00] ; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pextrd: ; BTVER2-LABEL: test_pextrd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -594,15 +594,15 @@ define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) {
; ;
; SANDY-LABEL: test_pextrq: ; SANDY-LABEL: test_pextrq:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:0.50] ; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pextrq: ; HASWELL-LABEL: test_pextrq:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:1.00] ; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00]
; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] ; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pextrq: ; BTVER2-LABEL: test_pextrq:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -630,15 +630,15 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) {
; ;
; SANDY-LABEL: test_pextrw: ; SANDY-LABEL: test_pextrw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:0.50] ; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pextrw: ; HASWELL-LABEL: test_pextrw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:1.00] ; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00] ; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pextrw: ; BTVER2-LABEL: test_pextrw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -667,15 +667,15 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) {
; ;
; SANDY-LABEL: test_phminposuw: ; SANDY-LABEL: test_phminposuw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_phminposuw: ; HASWELL-LABEL: test_phminposuw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_phminposuw: ; BTVER2-LABEL: test_phminposuw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -704,15 +704,15 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) {
; ;
; SANDY-LABEL: test_pinsrb: ; SANDY-LABEL: test_pinsrb:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pinsrb: ; HASWELL-LABEL: test_pinsrb:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pinsrb: ; BTVER2-LABEL: test_pinsrb:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -740,15 +740,15 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
; ;
; SANDY-LABEL: test_pinsrd: ; SANDY-LABEL: test_pinsrd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pinsrd: ; HASWELL-LABEL: test_pinsrd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pinsrd: ; BTVER2-LABEL: test_pinsrd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -778,17 +778,17 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
; ;
; SANDY-LABEL: test_pinsrq: ; SANDY-LABEL: test_pinsrq:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pinsrq: ; HASWELL-LABEL: test_pinsrq:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [1:1.00]
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pinsrq: ; BTVER2-LABEL: test_pinsrq:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -819,14 +819,14 @@ define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-LABEL: test_pmaxsb: ; SANDY-LABEL: test_pmaxsb:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmaxsb: ; HASWELL-LABEL: test_pmaxsb:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmaxsb: ; BTVER2-LABEL: test_pmaxsb:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -856,14 +856,14 @@ define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-LABEL: test_pmaxsd: ; SANDY-LABEL: test_pmaxsd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmaxsd: ; HASWELL-LABEL: test_pmaxsd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmaxsd: ; BTVER2-LABEL: test_pmaxsd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -893,14 +893,14 @@ define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-LABEL: test_pmaxud: ; SANDY-LABEL: test_pmaxud:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmaxud: ; HASWELL-LABEL: test_pmaxud:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmaxud: ; BTVER2-LABEL: test_pmaxud:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -930,14 +930,14 @@ define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-LABEL: test_pmaxuw: ; SANDY-LABEL: test_pmaxuw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmaxuw: ; HASWELL-LABEL: test_pmaxuw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmaxuw: ; BTVER2-LABEL: test_pmaxuw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -967,14 +967,14 @@ define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-LABEL: test_pminsb: ; SANDY-LABEL: test_pminsb:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pminsb: ; HASWELL-LABEL: test_pminsb:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pminsb: ; BTVER2-LABEL: test_pminsb:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1004,14 +1004,14 @@ define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-LABEL: test_pminsd: ; SANDY-LABEL: test_pminsd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pminsd: ; HASWELL-LABEL: test_pminsd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pminsd: ; BTVER2-LABEL: test_pminsd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1041,14 +1041,14 @@ define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-LABEL: test_pminud: ; SANDY-LABEL: test_pminud:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pminud: ; HASWELL-LABEL: test_pminud:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pminud: ; BTVER2-LABEL: test_pminud:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1078,14 +1078,14 @@ define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-LABEL: test_pminuw: ; SANDY-LABEL: test_pminuw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pminuw: ; HASWELL-LABEL: test_pminuw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pminuw: ; BTVER2-LABEL: test_pminuw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1118,16 +1118,16 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; SANDY-LABEL: test_pmovsxbw: ; SANDY-LABEL: test_pmovsxbw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmovsxbw: ; HASWELL-LABEL: test_pmovsxbw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [1:1.00]
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmovsxbw: ; BTVER2-LABEL: test_pmovsxbw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1162,16 +1162,16 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; SANDY-LABEL: test_pmovsxbd: ; SANDY-LABEL: test_pmovsxbd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmovsxbd: ; HASWELL-LABEL: test_pmovsxbd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [1:1.00]
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmovsxbd: ; BTVER2-LABEL: test_pmovsxbd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1206,16 +1206,16 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; SANDY-LABEL: test_pmovsxbq: ; SANDY-LABEL: test_pmovsxbq:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmovsxbq: ; HASWELL-LABEL: test_pmovsxbq:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [1:1.00]
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmovsxbq: ; BTVER2-LABEL: test_pmovsxbq:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1250,16 +1250,16 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; SANDY-LABEL: test_pmovsxdq: ; SANDY-LABEL: test_pmovsxdq:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmovsxdq: ; HASWELL-LABEL: test_pmovsxdq:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [1:1.00]
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmovsxdq: ; BTVER2-LABEL: test_pmovsxdq:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1294,16 +1294,16 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; SANDY-LABEL: test_pmovsxwd: ; SANDY-LABEL: test_pmovsxwd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmovsxwd: ; HASWELL-LABEL: test_pmovsxwd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [1:1.00]
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmovsxwd: ; BTVER2-LABEL: test_pmovsxwd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1338,16 +1338,16 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; SANDY-LABEL: test_pmovsxwq: ; SANDY-LABEL: test_pmovsxwq:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmovsxwq: ; HASWELL-LABEL: test_pmovsxwq:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [1:1.00]
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmovsxwq: ; BTVER2-LABEL: test_pmovsxwq:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1382,16 +1382,16 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; SANDY-LABEL: test_pmovzxbw: ; SANDY-LABEL: test_pmovzxbw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] ; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:0.50] ; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmovzxbw: ; HASWELL-LABEL: test_pmovzxbw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] ; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00] ; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [1:1.00]
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmovzxbw: ; BTVER2-LABEL: test_pmovzxbw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1426,16 +1426,16 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; SANDY-LABEL: test_pmovzxbd: ; SANDY-LABEL: test_pmovzxbd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] ; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:0.50] ; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmovzxbd: ; HASWELL-LABEL: test_pmovzxbd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] ; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00] ; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [1:1.00]
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmovzxbd: ; BTVER2-LABEL: test_pmovzxbd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1470,16 +1470,16 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; SANDY-LABEL: test_pmovzxbq: ; SANDY-LABEL: test_pmovzxbq:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] ; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:0.50] ; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmovzxbq: ; HASWELL-LABEL: test_pmovzxbq:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] ; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00] ; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmovzxbq: ; BTVER2-LABEL: test_pmovzxbq:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1514,16 +1514,16 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; SANDY-LABEL: test_pmovzxdq: ; SANDY-LABEL: test_pmovzxdq:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] ; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [5:0.50] ; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50]
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmovzxdq: ; HASWELL-LABEL: test_pmovzxdq:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] ; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [5:1.00] ; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [1:1.00]
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmovzxdq: ; BTVER2-LABEL: test_pmovzxdq:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1558,16 +1558,16 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; SANDY-LABEL: test_pmovzxwd: ; SANDY-LABEL: test_pmovzxwd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] ; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:0.50] ; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmovzxwd: ; HASWELL-LABEL: test_pmovzxwd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] ; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00] ; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [1:1.00]
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmovzxwd: ; BTVER2-LABEL: test_pmovzxwd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1602,16 +1602,16 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; SANDY-LABEL: test_pmovzxwq: ; SANDY-LABEL: test_pmovzxwq:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] ; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:0.50] ; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmovzxwq: ; HASWELL-LABEL: test_pmovzxwq:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] ; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:1.00] ; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [1:1.00]
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmovzxwq: ; BTVER2-LABEL: test_pmovzxwq:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1642,15 +1642,15 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; ;
; SANDY-LABEL: test_pmuldq: ; SANDY-LABEL: test_pmuldq:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmuldq: ; HASWELL-LABEL: test_pmuldq:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmuldq: ; BTVER2-LABEL: test_pmuldq:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1680,15 +1680,15 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; ;
; SANDY-LABEL: test_pmulld: ; SANDY-LABEL: test_pmulld:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmulld: ; HASWELL-LABEL: test_pmulld:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00] ; HASWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmulld: ; BTVER2-LABEL: test_pmulld:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1724,23 +1724,23 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; ;
; SANDY-LABEL: test_ptest: ; SANDY-LABEL: test_ptest:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: setb %al # sched: [1:0.33] ; SANDY-NEXT: setb %al # sched: [1:1.00]
; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00]
; SANDY-NEXT: setb %cl # sched: [1:0.33] ; SANDY-NEXT: setb %cl # sched: [1:1.00]
; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33] ; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_ptest: ; HASWELL-LABEL: test_ptest:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] ; HASWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
; HASWELL-NEXT: setb %al # sched: [1:0.50] ; HASWELL-NEXT: setb %al # sched: [1:1.00]
; HASWELL-NEXT: vptest (%rdi), %xmm0 # sched: [2:1.00] ; HASWELL-NEXT: vptest (%rdi), %xmm0 # sched: [2:1.00]
; HASWELL-NEXT: setb %cl # sched: [1:0.50] ; HASWELL-NEXT: setb %cl # sched: [1:1.00]
; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] ; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_ptest: ; BTVER2-LABEL: test_ptest:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1778,16 +1778,16 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
; SANDY-LABEL: test_roundpd: ; SANDY-LABEL: test_roundpd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00]
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_roundpd: ; HASWELL-LABEL: test_roundpd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:2.00] ; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [10:2.00] ; HASWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [6:1.00]
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_roundpd: ; BTVER2-LABEL: test_roundpd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1822,16 +1822,16 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
; SANDY-LABEL: test_roundps: ; SANDY-LABEL: test_roundps:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_roundps: ; HASWELL-LABEL: test_roundps:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:2.00] ; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [5:2.00]
; HASWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [10:2.00] ; HASWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [6:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_roundps: ; BTVER2-LABEL: test_roundps:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1867,16 +1867,16 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; SANDY-LABEL: test_roundsd: ; SANDY-LABEL: test_roundsd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] ; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_roundsd: ; HASWELL-LABEL: test_roundsd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:2.00] ; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [5:2.00]
; HASWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; HASWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_roundsd: ; BTVER2-LABEL: test_roundsd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -1912,16 +1912,16 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; SANDY-LABEL: test_roundss: ; SANDY-LABEL: test_roundss:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] ; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_roundss: ; HASWELL-LABEL: test_roundss:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:2.00] ; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; HASWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; HASWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_roundss: ; BTVER2-LABEL: test_roundss:
; BTVER2: # BB#0: ; BTVER2: # BB#0:

View File

@ -26,16 +26,16 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) {
; SANDY-LABEL: crc32_32_8: ; SANDY-LABEL: crc32_32_8:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] ; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: crc32_32_8: ; HASWELL-LABEL: crc32_32_8:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] ; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: crc32_32_8: ; BTVER2-LABEL: crc32_32_8:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -68,16 +68,16 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) {
; SANDY-LABEL: crc32_32_16: ; SANDY-LABEL: crc32_32_16:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00] ; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00]
; SANDY-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: crc32w (%rdx), %edi # sched: [8:1.00]
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: crc32_32_16: ; HASWELL-LABEL: crc32_32_16:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: crc32w %si, %edi # sched: [3:1.00] ; HASWELL-NEXT: crc32w %si, %edi # sched: [3:1.00]
; HASWELL-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: crc32w (%rdx), %edi # sched: [7:1.00]
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: crc32_32_16: ; BTVER2-LABEL: crc32_32_16:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -112,14 +112,14 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) {
; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00] ; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00]
; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: crc32_32_32: ; HASWELL-LABEL: crc32_32_32:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00] ; HASWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00]
; HASWELL-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: crc32_32_32: ; BTVER2-LABEL: crc32_32_32:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -152,16 +152,16 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind {
; SANDY-LABEL: crc32_64_8: ; SANDY-LABEL: crc32_64_8:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] ; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: crc32_64_8: ; HASWELL-LABEL: crc32_64_8:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] ; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: crc32_64_8: ; BTVER2-LABEL: crc32_64_8:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -196,14 +196,14 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) {
; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] ; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] ; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00]
; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: crc32_64_64: ; HASWELL-LABEL: crc32_64_64:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] ; HASWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
; HASWELL-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] ; HASWELL-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00]
; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: crc32_64_64: ; BTVER2-LABEL: crc32_64_64:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -256,20 +256,20 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33] ; SANDY-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def> ; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SANDY-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] ; SANDY-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pcmpestri: ; HASWELL-LABEL: test_pcmpestri:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
; HASWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [11:3.00] ; HASWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
; HASWELL-NEXT: movl %ecx, %esi # sched: [1:0.25] ; HASWELL-NEXT: movl %ecx, %esi # sched: [1:0.25]
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
; HASWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [11:3.00] ; HASWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [18:4.00]
; HASWELL-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def> ; HASWELL-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; HASWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] ; HASWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pcmpestri: ; BTVER2-LABEL: test_pcmpestri:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -320,17 +320,17 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-NEXT: movl $7, %eax # sched: [1:0.33] ; SANDY-NEXT: movl $7, %eax # sched: [1:0.33]
; SANDY-NEXT: movl $7, %edx # sched: [1:0.33] ; SANDY-NEXT: movl $7, %edx # sched: [1:0.33]
; SANDY-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33] ; SANDY-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pcmpestrm: ; HASWELL-LABEL: test_pcmpestrm:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
; HASWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [10:4.00] ; HASWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [10:3.00] ; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pcmpestrm: ; BTVER2-LABEL: test_pcmpestrm:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -369,12 +369,12 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; ;
; SANDY-LABEL: test_pcmpistri: ; SANDY-LABEL: test_pcmpistri:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
; SANDY-NEXT: movl %ecx, %eax # sched: [1:0.33] ; SANDY-NEXT: movl %ecx, %eax # sched: [1:0.33]
; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def> ; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SANDY-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] ; SANDY-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pcmpistri: ; HASWELL-LABEL: test_pcmpistri:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
@ -383,7 +383,7 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; HASWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [11:3.00] ; HASWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [11:3.00]
; HASWELL-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def> ; HASWELL-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; HASWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] ; HASWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pcmpistri: ; BTVER2-LABEL: test_pcmpistri:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -416,15 +416,15 @@ define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; ;
; SANDY-LABEL: test_pcmpistrm: ; SANDY-LABEL: test_pcmpistrm:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00] ; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pcmpistrm: ; HASWELL-LABEL: test_pcmpistrm:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00] ; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [10:3.00] ; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:3.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pcmpistrm: ; BTVER2-LABEL: test_pcmpistrm:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -453,15 +453,15 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; ;
; SANDY-LABEL: test_pcmpgtq: ; SANDY-LABEL: test_pcmpgtq:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pcmpgtq: ; HASWELL-LABEL: test_pcmpgtq:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pcmpgtq: ; BTVER2-LABEL: test_pcmpgtq:
; BTVER2: # BB#0: ; BTVER2: # BB#0:

View File

@ -35,16 +35,16 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) {
; SANDY-LABEL: test_pabsb: ; SANDY-LABEL: test_pabsb:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pabsb: ; HASWELL-LABEL: test_pabsb:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [5:0.50] ; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [1:0.50]
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pabsb: ; BTVER2-LABEL: test_pabsb:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -86,16 +86,16 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) {
; SANDY-LABEL: test_pabsd: ; SANDY-LABEL: test_pabsd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50]
; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pabsd: ; HASWELL-LABEL: test_pabsd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [5:0.50] ; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [1:0.50]
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pabsd: ; BTVER2-LABEL: test_pabsd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -136,12 +136,12 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) {
; SANDY-LABEL: test_pabsw: ; SANDY-LABEL: test_pabsw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pabsw: ; HASWELL-LABEL: test_pabsw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pabsw: ; BTVER2-LABEL: test_pabsw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -182,14 +182,14 @@ define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-LABEL: test_palignr: ; SANDY-LABEL: test_palignr:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] ; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:0.50] ; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_palignr: ; HASWELL-LABEL: test_palignr:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] ; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:1.00] ; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_palignr: ; BTVER2-LABEL: test_palignr:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -223,15 +223,15 @@ define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; ;
; SANDY-LABEL: test_phaddd: ; SANDY-LABEL: test_phaddd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_phaddd: ; HASWELL-LABEL: test_phaddd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [6:2.00] ; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_phaddd: ; BTVER2-LABEL: test_phaddd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -274,15 +274,15 @@ define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; ;
; SANDY-LABEL: test_phaddsw: ; SANDY-LABEL: test_phaddsw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_phaddsw: ; HASWELL-LABEL: test_phaddsw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [6:2.00] ; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_phaddsw: ; BTVER2-LABEL: test_phaddsw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -317,15 +317,15 @@ define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; ;
; SANDY-LABEL: test_phaddw: ; SANDY-LABEL: test_phaddw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_phaddw: ; HASWELL-LABEL: test_phaddw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [6:2.00] ; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_phaddw: ; BTVER2-LABEL: test_phaddw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -360,15 +360,15 @@ define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; ;
; SANDY-LABEL: test_phsubd: ; SANDY-LABEL: test_phsubd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_phsubd: ; HASWELL-LABEL: test_phsubd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [6:2.00] ; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_phsubd: ; BTVER2-LABEL: test_phsubd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -411,15 +411,15 @@ define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; ;
; SANDY-LABEL: test_phsubsw: ; SANDY-LABEL: test_phsubsw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_phsubsw: ; HASWELL-LABEL: test_phsubsw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [6:2.00] ; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_phsubsw: ; BTVER2-LABEL: test_phsubsw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -454,15 +454,15 @@ define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; ;
; SANDY-LABEL: test_phsubw: ; SANDY-LABEL: test_phsubw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_phsubw: ; HASWELL-LABEL: test_phsubw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [6:2.00] ; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_phsubw: ; BTVER2-LABEL: test_phsubw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -497,15 +497,15 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; ;
; SANDY-LABEL: test_pmaddubsw: ; SANDY-LABEL: test_pmaddubsw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmaddubsw: ; HASWELL-LABEL: test_pmaddubsw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmaddubsw: ; BTVER2-LABEL: test_pmaddubsw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -538,13 +538,13 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; ;
; SANDY-LABEL: test_pmulhrsw: ; SANDY-LABEL: test_pmulhrsw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pmulhrsw: ; HASWELL-LABEL: test_pmulhrsw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pmulhrsw: ; BTVER2-LABEL: test_pmulhrsw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -579,14 +579,14 @@ define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-LABEL: test_pshufb: ; SANDY-LABEL: test_pshufb:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_pshufb: ; HASWELL-LABEL: test_pshufb:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_pshufb: ; BTVER2-LABEL: test_pshufb:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -630,14 +630,14 @@ define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-LABEL: test_psignb: ; SANDY-LABEL: test_psignb:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_psignb: ; HASWELL-LABEL: test_psignb:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_psignb: ; BTVER2-LABEL: test_psignb:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -681,14 +681,14 @@ define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-LABEL: test_psignd: ; SANDY-LABEL: test_psignd:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_psignd: ; HASWELL-LABEL: test_psignd:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_psignd: ; BTVER2-LABEL: test_psignd:
; BTVER2: # BB#0: ; BTVER2: # BB#0:
@ -732,14 +732,14 @@ define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-LABEL: test_psignw: ; SANDY-LABEL: test_psignw:
; SANDY: # BB#0: ; SANDY: # BB#0:
; SANDY-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00]
; ;
; HASWELL-LABEL: test_psignw: ; HASWELL-LABEL: test_psignw:
; HASWELL: # BB#0: ; HASWELL: # BB#0:
; HASWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00]
; ;
; BTVER2-LABEL: test_psignw: ; BTVER2-LABEL: test_psignw:
; BTVER2: # BB#0: ; BTVER2: # BB#0:

View File

@ -201,14 +201,14 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5 ; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5
; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8 ; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8
; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512DQ-NEXT: vpsraw $4, %ymm5, %ymm9
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm9, %ymm5, %ymm3
; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5 ; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5
; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9 ; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9
; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4 ; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4
; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4 ; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4
@ -328,14 +328,14 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5 ; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5
; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8 ; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8
; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512DQ-NEXT: vpsraw $4, %ymm5, %ymm9
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm9, %ymm5, %ymm3
; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5 ; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5
; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9 ; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9
; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4 ; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4
; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4 ; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4

View File

@ -68,13 +68,13 @@ define <32 x i16> @shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_1
; KNL-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[8,9,12,13,12,13,10,11,0,1,4,5,4,5,0,1] ; KNL-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[8,9,12,13,12,13,10,11,0,1,4,5,4,5,0,1]
; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,1,0,3] ; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,1,0,3]
; KNL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[0,3,2,2,4,5,6,7] ; KNL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[0,3,2,2,4,5,6,7]
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm1 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm5 ; KNL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; KNL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,10,11,8,9,14,15,4,5,2,3,2,3,6,7] ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,10,11,8,9,14,15,4,5,2,3,2,3,6,7]
; KNL-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[6,7,2,3,4,5,6,7,2,3,2,3,0,1,14,15] ; KNL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,7,2,3,4,5,6,7,2,3,2,3,0,1,14,15]
; KNL-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm5
; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm5[1],ymm0[2],ymm5[3],ymm0[4],ymm5[5],ymm0[6],ymm5[7],ymm0[8],ymm5[9],ymm0[10],ymm5[11],ymm0[12],ymm5[13],ymm0[14],ymm5[15]
; KNL-NEXT: vextracti128 $1, %ymm3, %xmm3 ; KNL-NEXT: vextracti128 $1, %ymm3, %xmm3
; KNL-NEXT: vpbroadcastw %xmm3, %ymm3 ; KNL-NEXT: vpbroadcastw %xmm3, %ymm3
; KNL-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0] ; KNL-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]