forked from OSchip/llvm-project
Updated and extended the information about each instruction in HSW and SNB to include the following data:
•static latency •number of uOps from which the instructions consists •all ports used by the instruction Reviewers: RKSimon zvi aymanmus m_zuckerman Differential Revision: https://reviews.llvm.org/D33897 llvm-svn: 306414
This commit is contained in:
parent
a179d25b99
commit
13759a7ed6
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1619,10 +1619,10 @@ define <8 x float> @test_gather_mask(<8 x float> %a0, float* %a, <8 x i32> %idx
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: test_gather_mask:
|
; AVX512VL-LABEL: test_gather_mask:
|
||||||
; AVX512VL: ## BB#0:
|
; AVX512VL: ## BB#0:
|
||||||
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
|
|
||||||
; AVX512VL-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda]
|
|
||||||
; AVX512VL-NEXT: vgatherdps %ymm3, (%eax,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x88]
|
|
||||||
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
|
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
|
||||||
|
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
|
||||||
|
; AVX512VL-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda]
|
||||||
|
; AVX512VL-NEXT: vgatherdps %ymm3, (%ecx,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x89]
|
||||||
; AVX512VL-NEXT: vmovups %ymm2, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x10]
|
; AVX512VL-NEXT: vmovups %ymm2, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x10]
|
||||||
; AVX512VL-NEXT: retl ## encoding: [0xc3]
|
; AVX512VL-NEXT: retl ## encoding: [0xc3]
|
||||||
%a_i8 = bitcast float* %a to i8*
|
%a_i8 = bitcast float* %a to i8*
|
||||||
|
|
|
@ -9,7 +9,7 @@ define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
|
||||||
; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [5:0.50]
|
; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_pabsb:
|
; ZNVER1-LABEL: test_pabsb:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -29,9 +29,9 @@ define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
|
||||||
; HASWELL-LABEL: test_pabsd:
|
; HASWELL-LABEL: test_pabsd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [5:0.50]
|
; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_pabsd:
|
; ZNVER1-LABEL: test_pabsd:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -51,9 +51,9 @@ define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
|
||||||
; HASWELL-LABEL: test_pabsw:
|
; HASWELL-LABEL: test_pabsw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [5:0.50]
|
; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_pabsw:
|
; ZNVER1-LABEL: test_pabsw:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -74,7 +74,7 @@ define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_paddb:
|
; ZNVER1-LABEL: test_paddb:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -92,7 +92,7 @@ define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_paddd:
|
; ZNVER1-LABEL: test_paddd:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -109,8 +109,8 @@ define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
|
||||||
; HASWELL-LABEL: test_paddq:
|
; HASWELL-LABEL: test_paddq:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_paddq:
|
; ZNVER1-LABEL: test_paddq:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -128,7 +128,7 @@ define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_paddw:
|
; ZNVER1-LABEL: test_paddw:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -145,9 +145,9 @@ define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
|
||||||
; HASWELL-LABEL: test_pand:
|
; HASWELL-LABEL: test_pand:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||||
; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_pand:
|
; ZNVER1-LABEL: test_pand:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -166,9 +166,9 @@ define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
|
||||||
; HASWELL-LABEL: test_pandn:
|
; HASWELL-LABEL: test_pandn:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||||
; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [5:0.50]
|
; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_pandn:
|
; ZNVER1-LABEL: test_pandn:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -190,7 +190,7 @@ define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
|
; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
|
||||||
; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
|
; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_pmulld:
|
; ZNVER1-LABEL: test_pmulld:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -207,8 +207,8 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2)
|
||||||
; HASWELL-LABEL: test_pmullw:
|
; HASWELL-LABEL: test_pmullw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
|
; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_pmullw:
|
; ZNVER1-LABEL: test_pmullw:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -225,9 +225,9 @@ define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
|
||||||
; HASWELL-LABEL: test_por:
|
; HASWELL-LABEL: test_por:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||||
; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_por:
|
; ZNVER1-LABEL: test_por:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -246,8 +246,8 @@ define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
|
||||||
; HASWELL-LABEL: test_psubb:
|
; HASWELL-LABEL: test_psubb:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_psubb:
|
; ZNVER1-LABEL: test_psubb:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -264,8 +264,8 @@ define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
|
||||||
; HASWELL-LABEL: test_psubd:
|
; HASWELL-LABEL: test_psubd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_psubd:
|
; ZNVER1-LABEL: test_psubd:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -282,8 +282,8 @@ define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
|
||||||
; HASWELL-LABEL: test_psubq:
|
; HASWELL-LABEL: test_psubq:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_psubq:
|
; ZNVER1-LABEL: test_psubq:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -300,8 +300,8 @@ define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
|
||||||
; HASWELL-LABEL: test_psubw:
|
; HASWELL-LABEL: test_psubw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_psubw:
|
; ZNVER1-LABEL: test_psubw:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
@ -318,9 +318,9 @@ define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
|
||||||
; HASWELL-LABEL: test_pxor:
|
; HASWELL-LABEL: test_pxor:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
|
||||||
; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; ZNVER1-LABEL: test_pxor:
|
; ZNVER1-LABEL: test_pxor:
|
||||||
; ZNVER1: # BB#0:
|
; ZNVER1: # BB#0:
|
||||||
|
|
|
@ -381,6 +381,7 @@ define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
|
||||||
; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
|
; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
|
||||||
; X32-NEXT: vzeroupper
|
; X32-NEXT: vzeroupper
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
|
; X32-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; X64-LABEL: srl_trunc_and_v4i64:
|
; X64-LABEL: srl_trunc_and_v4i64:
|
||||||
; X64: ## BB#0:
|
; X64: ## BB#0:
|
||||||
|
@ -391,6 +392,7 @@ define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
|
||||||
; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
|
; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
|
||||||
; X64-NEXT: vzeroupper
|
; X64-NEXT: vzeroupper
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
; X64-NEXT: ## -- End function
|
||||||
%and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8>
|
%and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8>
|
||||||
%trunc = trunc <4 x i64> %and to <4 x i32>
|
%trunc = trunc <4 x i64> %and to <4 x i32>
|
||||||
%sra = lshr <4 x i32> %x, %trunc
|
%sra = lshr <4 x i32> %x, %trunc
|
||||||
|
@ -412,6 +414,7 @@ define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
|
||||||
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||||
; X32-NEXT: vzeroupper
|
; X32-NEXT: vzeroupper
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
|
; X32-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; X64-LABEL: shl_8i16:
|
; X64-LABEL: shl_8i16:
|
||||||
; X64: ## BB#0:
|
; X64: ## BB#0:
|
||||||
|
@ -423,6 +426,7 @@ define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
|
||||||
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||||
; X64-NEXT: vzeroupper
|
; X64-NEXT: vzeroupper
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
; X64-NEXT: ## -- End function
|
||||||
%shl = shl <8 x i16> %r, %a
|
%shl = shl <8 x i16> %r, %a
|
||||||
ret <8 x i16> %shl
|
ret <8 x i16> %shl
|
||||||
}
|
}
|
||||||
|
@ -434,13 +438,14 @@ define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
|
||||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||||
; X32-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
|
; X32-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
|
||||||
; X32-NEXT: vpsrld $16, %ymm3, %ymm3
|
|
||||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||||
; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
||||||
|
; X32-NEXT: vpsrld $16, %ymm3, %ymm1
|
||||||
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
|
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||||
; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
|
; X32-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; X64-LABEL: shl_16i16:
|
; X64-LABEL: shl_16i16:
|
||||||
; X64: ## BB#0:
|
; X64: ## BB#0:
|
||||||
|
@ -448,13 +453,14 @@ define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
|
||||||
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||||
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||||
; X64-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
|
; X64-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
|
||||||
; X64-NEXT: vpsrld $16, %ymm3, %ymm3
|
|
||||||
; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||||
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||||
; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
|
||||||
|
; X64-NEXT: vpsrld $16, %ymm3, %ymm1
|
||||||
; X64-NEXT: vpsrld $16, %ymm0, %ymm0
|
; X64-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||||
; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
; X64-NEXT: ## -- End function
|
||||||
%shl = shl <16 x i16> %r, %a
|
%shl = shl <16 x i16> %r, %a
|
||||||
ret <16 x i16> %shl
|
ret <16 x i16> %shl
|
||||||
}
|
}
|
||||||
|
@ -474,6 +480,7 @@ define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||||
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||||
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
|
; X32-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; X64-LABEL: shl_32i8:
|
; X64-LABEL: shl_32i8:
|
||||||
; X64: ## BB#0:
|
; X64: ## BB#0:
|
||||||
|
@ -489,6 +496,7 @@ define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||||
; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||||
; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
; X64-NEXT: ## -- End function
|
||||||
%shl = shl <32 x i8> %r, %a
|
%shl = shl <32 x i8> %r, %a
|
||||||
ret <32 x i8> %shl
|
ret <32 x i8> %shl
|
||||||
}
|
}
|
||||||
|
@ -504,6 +512,7 @@ define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
|
||||||
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||||
; X32-NEXT: vzeroupper
|
; X32-NEXT: vzeroupper
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
|
; X32-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; X64-LABEL: ashr_8i16:
|
; X64-LABEL: ashr_8i16:
|
||||||
; X64: ## BB#0:
|
; X64: ## BB#0:
|
||||||
|
@ -515,6 +524,7 @@ define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
|
||||||
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||||
; X64-NEXT: vzeroupper
|
; X64-NEXT: vzeroupper
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
; X64-NEXT: ## -- End function
|
||||||
%ashr = ashr <8 x i16> %r, %a
|
%ashr = ashr <8 x i16> %r, %a
|
||||||
ret <8 x i16> %ashr
|
ret <8 x i16> %ashr
|
||||||
}
|
}
|
||||||
|
@ -526,13 +536,14 @@ define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
|
||||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||||
; X32-NEXT: vpsravd %ymm3, %ymm4, %ymm3
|
; X32-NEXT: vpsravd %ymm3, %ymm4, %ymm3
|
||||||
; X32-NEXT: vpsrld $16, %ymm3, %ymm3
|
|
||||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||||
; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
||||||
|
; X32-NEXT: vpsrld $16, %ymm3, %ymm1
|
||||||
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
|
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||||
; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
|
; X32-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; X64-LABEL: ashr_16i16:
|
; X64-LABEL: ashr_16i16:
|
||||||
; X64: ## BB#0:
|
; X64: ## BB#0:
|
||||||
|
@ -540,13 +551,14 @@ define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
|
||||||
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||||
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||||
; X64-NEXT: vpsravd %ymm3, %ymm4, %ymm3
|
; X64-NEXT: vpsravd %ymm3, %ymm4, %ymm3
|
||||||
; X64-NEXT: vpsrld $16, %ymm3, %ymm3
|
|
||||||
; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||||
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||||
; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
|
||||||
|
; X64-NEXT: vpsrld $16, %ymm3, %ymm1
|
||||||
; X64-NEXT: vpsrld $16, %ymm0, %ymm0
|
; X64-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||||
; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
; X64-NEXT: ## -- End function
|
||||||
%ashr = ashr <16 x i16> %r, %a
|
%ashr = ashr <16 x i16> %r, %a
|
||||||
ret <16 x i16> %ashr
|
ret <16 x i16> %ashr
|
||||||
}
|
}
|
||||||
|
@ -579,6 +591,7 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||||
; X32-NEXT: vpsrlw $8, %ymm0, %ymm0
|
; X32-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||||
; X32-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
; X32-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
|
; X32-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; X64-LABEL: ashr_32i8:
|
; X64-LABEL: ashr_32i8:
|
||||||
; X64: ## BB#0:
|
; X64: ## BB#0:
|
||||||
|
@ -607,6 +620,7 @@ define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||||
; X64-NEXT: vpsrlw $8, %ymm0, %ymm0
|
; X64-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||||
; X64-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
; X64-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
; X64-NEXT: ## -- End function
|
||||||
%ashr = ashr <32 x i8> %r, %a
|
%ashr = ashr <32 x i8> %r, %a
|
||||||
ret <32 x i8> %ashr
|
ret <32 x i8> %ashr
|
||||||
}
|
}
|
||||||
|
@ -622,6 +636,7 @@ define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
|
||||||
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
; X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||||
; X32-NEXT: vzeroupper
|
; X32-NEXT: vzeroupper
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
|
; X32-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; X64-LABEL: lshr_8i16:
|
; X64-LABEL: lshr_8i16:
|
||||||
; X64: ## BB#0:
|
; X64: ## BB#0:
|
||||||
|
@ -633,6 +648,7 @@ define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
|
||||||
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
; X64-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
|
||||||
; X64-NEXT: vzeroupper
|
; X64-NEXT: vzeroupper
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
; X64-NEXT: ## -- End function
|
||||||
%lshr = lshr <8 x i16> %r, %a
|
%lshr = lshr <8 x i16> %r, %a
|
||||||
ret <8 x i16> %lshr
|
ret <8 x i16> %lshr
|
||||||
}
|
}
|
||||||
|
@ -644,13 +660,14 @@ define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
|
||||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||||
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||||
; X32-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
|
; X32-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
|
||||||
; X32-NEXT: vpsrld $16, %ymm3, %ymm3
|
|
||||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||||
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||||
; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
||||||
|
; X32-NEXT: vpsrld $16, %ymm3, %ymm1
|
||||||
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
|
; X32-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||||
; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
|
; X32-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; X64-LABEL: lshr_16i16:
|
; X64-LABEL: lshr_16i16:
|
||||||
; X64: ## BB#0:
|
; X64: ## BB#0:
|
||||||
|
@ -658,13 +675,14 @@ define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
|
||||||
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
|
||||||
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
|
||||||
; X64-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
|
; X64-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
|
||||||
; X64-NEXT: vpsrld $16, %ymm3, %ymm3
|
|
||||||
; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
|
||||||
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
|
||||||
; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
|
||||||
|
; X64-NEXT: vpsrld $16, %ymm3, %ymm1
|
||||||
; X64-NEXT: vpsrld $16, %ymm0, %ymm0
|
; X64-NEXT: vpsrld $16, %ymm0, %ymm0
|
||||||
; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
|
; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
; X64-NEXT: ## -- End function
|
||||||
%lshr = lshr <16 x i16> %r, %a
|
%lshr = lshr <16 x i16> %r, %a
|
||||||
ret <16 x i16> %lshr
|
ret <16 x i16> %lshr
|
||||||
}
|
}
|
||||||
|
@ -685,6 +703,7 @@ define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||||
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||||
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
|
; X32-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; X64-LABEL: lshr_32i8:
|
; X64-LABEL: lshr_32i8:
|
||||||
; X64: ## BB#0:
|
; X64: ## BB#0:
|
||||||
|
@ -701,6 +720,7 @@ define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
|
||||||
; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1
|
||||||
; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
; X64-NEXT: ## -- End function
|
||||||
%lshr = lshr <32 x i8> %r, %a
|
%lshr = lshr <32 x i8> %r, %a
|
||||||
ret <32 x i8> %lshr
|
ret <32 x i8> %lshr
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,7 @@ define double @test1(double %a, double %b) nounwind {
|
||||||
; ALL-NEXT: LBB0_2: ## %l2
|
; ALL-NEXT: LBB0_2: ## %l2
|
||||||
; ALL-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
; ALL-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
||||||
; ALL-NEXT: retq
|
; ALL-NEXT: retq
|
||||||
|
; ALL-NEXT: ## -- End function
|
||||||
%tobool = fcmp une double %a, %b
|
%tobool = fcmp une double %a, %b
|
||||||
br i1 %tobool, label %l1, label %l2
|
br i1 %tobool, label %l1, label %l2
|
||||||
|
|
||||||
|
@ -36,6 +37,7 @@ define float @test2(float %a, float %b) nounwind {
|
||||||
; ALL-NEXT: LBB1_2: ## %l2
|
; ALL-NEXT: LBB1_2: ## %l2
|
||||||
; ALL-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
; ALL-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
||||||
; ALL-NEXT: retq
|
; ALL-NEXT: retq
|
||||||
|
; ALL-NEXT: ## -- End function
|
||||||
%tobool = fcmp olt float %a, %b
|
%tobool = fcmp olt float %a, %b
|
||||||
br i1 %tobool, label %l1, label %l2
|
br i1 %tobool, label %l1, label %l2
|
||||||
|
|
||||||
|
@ -124,11 +126,11 @@ entry:
|
||||||
define i32 @test8(i32 %a1, i32 %a2, i32 %a3) {
|
define i32 @test8(i32 %a1, i32 %a2, i32 %a3) {
|
||||||
; ALL-LABEL: test8:
|
; ALL-LABEL: test8:
|
||||||
; ALL: ## BB#0:
|
; ALL: ## BB#0:
|
||||||
; ALL-NEXT: notl %edi
|
|
||||||
; ALL-NEXT: xorl $-2147483648, %esi ## imm = 0x80000000
|
; ALL-NEXT: xorl $-2147483648, %esi ## imm = 0x80000000
|
||||||
; ALL-NEXT: testl %edx, %edx
|
; ALL-NEXT: testl %edx, %edx
|
||||||
; ALL-NEXT: movl $1, %eax
|
; ALL-NEXT: movl $1, %eax
|
||||||
; ALL-NEXT: cmovel %eax, %edx
|
; ALL-NEXT: cmovel %eax, %edx
|
||||||
|
; ALL-NEXT: notl %edi
|
||||||
; ALL-NEXT: orl %edi, %esi
|
; ALL-NEXT: orl %edi, %esi
|
||||||
; ALL-NEXT: cmovnel %edx, %eax
|
; ALL-NEXT: cmovnel %edx, %eax
|
||||||
; ALL-NEXT: retq
|
; ALL-NEXT: retq
|
||||||
|
|
|
@ -1545,19 +1545,19 @@ define <4 x double> @uitofp_4i1_double(<4 x i32> %a) {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
|
define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
|
||||||
; NOVL-LABEL: uitofp_2i1_float:
|
; KNL-LABEL: uitofp_2i1_float:
|
||||||
; NOVL: # BB#0:
|
; KNL: # BB#0:
|
||||||
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||||
; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
||||||
; NOVL-NEXT: vpextrb $8, %xmm0, %eax
|
; KNL-NEXT: vpextrb $8, %xmm0, %eax
|
||||||
; NOVL-NEXT: andl $1, %eax
|
; KNL-NEXT: andl $1, %eax
|
||||||
; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1
|
; KNL-NEXT: vpextrb $0, %xmm0, %ecx
|
||||||
; NOVL-NEXT: vpextrb $0, %xmm0, %eax
|
; KNL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
|
||||||
; NOVL-NEXT: andl $1, %eax
|
; KNL-NEXT: andl $1, %ecx
|
||||||
; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
|
; KNL-NEXT: vcvtsi2ssl %ecx, %xmm2, %xmm1
|
||||||
; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
|
||||||
; NOVL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
;
|
;
|
||||||
; VL-LABEL: uitofp_2i1_float:
|
; VL-LABEL: uitofp_2i1_float:
|
||||||
; VL: # BB#0:
|
; VL: # BB#0:
|
||||||
|
@ -1567,6 +1567,34 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
|
||||||
; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
|
; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
|
||||||
; VL-NEXT: vcvtudq2ps %xmm0, %xmm0
|
; VL-NEXT: vcvtudq2ps %xmm0, %xmm0
|
||||||
; VL-NEXT: retq
|
; VL-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512DQ-LABEL: uitofp_2i1_float:
|
||||||
|
; AVX512DQ: # BB#0:
|
||||||
|
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
|
; AVX512DQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||||
|
; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
||||||
|
; AVX512DQ-NEXT: vpextrb $8, %xmm0, %eax
|
||||||
|
; AVX512DQ-NEXT: andl $1, %eax
|
||||||
|
; AVX512DQ-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1
|
||||||
|
; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax
|
||||||
|
; AVX512DQ-NEXT: andl $1, %eax
|
||||||
|
; AVX512DQ-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
|
||||||
|
; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
||||||
|
; AVX512DQ-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX512BW-LABEL: uitofp_2i1_float:
|
||||||
|
; AVX512BW: # BB#0:
|
||||||
|
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
|
; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
|
||||||
|
; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
|
||||||
|
; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax
|
||||||
|
; AVX512BW-NEXT: andl $1, %eax
|
||||||
|
; AVX512BW-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1
|
||||||
|
; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax
|
||||||
|
; AVX512BW-NEXT: andl $1, %eax
|
||||||
|
; AVX512BW-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
|
||||||
|
; AVX512BW-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
|
||||||
|
; AVX512BW-NEXT: retq
|
||||||
%mask = icmp ult <2 x i32> %a, zeroinitializer
|
%mask = icmp ult <2 x i32> %a, zeroinitializer
|
||||||
%1 = uitofp <2 x i1> %mask to <2 x float>
|
%1 = uitofp <2 x i1> %mask to <2 x float>
|
||||||
ret <2 x float> %1
|
ret <2 x float> %1
|
||||||
|
|
|
@ -12,6 +12,7 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
|
||||||
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||||
; KNL-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0
|
; KNL-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test1:
|
; SKX-LABEL: test1:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
|
@ -21,6 +22,7 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
|
||||||
; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
|
||||||
; SKX-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0
|
; SKX-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
%rrr = load float, float* %br
|
%rrr = load float, float* %br
|
||||||
%rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
|
%rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
|
||||||
%rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
|
%rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
|
||||||
|
@ -36,6 +38,7 @@ define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
|
||||||
; KNL-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
; KNL-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||||
; KNL-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0
|
; KNL-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test2:
|
; SKX-LABEL: test2:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
|
@ -45,6 +48,7 @@ define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
|
||||||
; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
|
||||||
; SKX-NEXT: vinsertf64x2 $3, %xmm0, %zmm2, %zmm0
|
; SKX-NEXT: vinsertf64x2 $3, %xmm0, %zmm2, %zmm0
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
%rrr = load double, double* %br
|
%rrr = load double, double* %br
|
||||||
%rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
|
%rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
|
||||||
%rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
|
%rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
|
||||||
|
@ -58,6 +62,7 @@ define <16 x float> @test3(<16 x float> %x) nounwind {
|
||||||
; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
|
; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
|
||||||
; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
|
; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test3:
|
; SKX-LABEL: test3:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
|
@ -65,6 +70,7 @@ define <16 x float> @test3(<16 x float> %x) nounwind {
|
||||||
; SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
|
; SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
|
||||||
; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
|
; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
%eee = extractelement <16 x float> %x, i32 4
|
%eee = extractelement <16 x float> %x, i32 4
|
||||||
%rrr2 = insertelement <16 x float> %x, float %eee, i32 1
|
%rrr2 = insertelement <16 x float> %x, float %eee, i32 1
|
||||||
ret <16 x float> %rrr2
|
ret <16 x float> %rrr2
|
||||||
|
@ -78,6 +84,7 @@ define <8 x i64> @test4(<8 x i64> %x) nounwind {
|
||||||
; KNL-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
|
; KNL-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
|
||||||
; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
|
; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test4:
|
; SKX-LABEL: test4:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
|
@ -86,6 +93,7 @@ define <8 x i64> @test4(<8 x i64> %x) nounwind {
|
||||||
; SKX-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
|
; SKX-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
|
||||||
; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0
|
; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
%eee = extractelement <8 x i64> %x, i32 4
|
%eee = extractelement <8 x i64> %x, i32 4
|
||||||
%rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
|
%rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
|
||||||
ret <8 x i64> %rrr2
|
ret <8 x i64> %rrr2
|
||||||
|
@ -96,11 +104,13 @@ define i32 @test5(<4 x float> %x) nounwind {
|
||||||
; KNL: ## BB#0:
|
; KNL: ## BB#0:
|
||||||
; KNL-NEXT: vextractps $3, %xmm0, %eax
|
; KNL-NEXT: vextractps $3, %xmm0, %eax
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test5:
|
; SKX-LABEL: test5:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
; SKX-NEXT: vextractps $3, %xmm0, %eax
|
; SKX-NEXT: vextractps $3, %xmm0, %eax
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
%ef = extractelement <4 x float> %x, i32 3
|
%ef = extractelement <4 x float> %x, i32 3
|
||||||
%ei = bitcast float %ef to i32
|
%ei = bitcast float %ef to i32
|
||||||
ret i32 %ei
|
ret i32 %ei
|
||||||
|
@ -111,11 +121,13 @@ define void @test6(<4 x float> %x, float* %out) nounwind {
|
||||||
; KNL: ## BB#0:
|
; KNL: ## BB#0:
|
||||||
; KNL-NEXT: vextractps $3, %xmm0, (%rdi)
|
; KNL-NEXT: vextractps $3, %xmm0, (%rdi)
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test6:
|
; SKX-LABEL: test6:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
; SKX-NEXT: vextractps $3, %xmm0, (%rdi)
|
; SKX-NEXT: vextractps $3, %xmm0, (%rdi)
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
%ef = extractelement <4 x float> %x, i32 3
|
%ef = extractelement <4 x float> %x, i32 3
|
||||||
store float %ef, float* %out, align 4
|
store float %ef, float* %out, align 4
|
||||||
ret void
|
ret void
|
||||||
|
@ -135,6 +147,7 @@ define float @test7(<16 x float> %x, i32 %ind) nounwind {
|
||||||
; KNL-NEXT: movq %rbp, %rsp
|
; KNL-NEXT: movq %rbp, %rsp
|
||||||
; KNL-NEXT: popq %rbp
|
; KNL-NEXT: popq %rbp
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test7:
|
; SKX-LABEL: test7:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
|
@ -150,6 +163,7 @@ define float @test7(<16 x float> %x, i32 %ind) nounwind {
|
||||||
; SKX-NEXT: popq %rbp
|
; SKX-NEXT: popq %rbp
|
||||||
; SKX-NEXT: vzeroupper
|
; SKX-NEXT: vzeroupper
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
%e = extractelement <16 x float> %x, i32 %ind
|
%e = extractelement <16 x float> %x, i32 %ind
|
||||||
ret float %e
|
ret float %e
|
||||||
}
|
}
|
||||||
|
@ -168,6 +182,7 @@ define double @test8(<8 x double> %x, i32 %ind) nounwind {
|
||||||
; KNL-NEXT: movq %rbp, %rsp
|
; KNL-NEXT: movq %rbp, %rsp
|
||||||
; KNL-NEXT: popq %rbp
|
; KNL-NEXT: popq %rbp
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test8:
|
; SKX-LABEL: test8:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
|
@ -183,6 +198,7 @@ define double @test8(<8 x double> %x, i32 %ind) nounwind {
|
||||||
; SKX-NEXT: popq %rbp
|
; SKX-NEXT: popq %rbp
|
||||||
; SKX-NEXT: vzeroupper
|
; SKX-NEXT: vzeroupper
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
%e = extractelement <8 x double> %x, i32 %ind
|
%e = extractelement <8 x double> %x, i32 %ind
|
||||||
ret double %e
|
ret double %e
|
||||||
}
|
}
|
||||||
|
@ -201,6 +217,7 @@ define float @test9(<8 x float> %x, i32 %ind) nounwind {
|
||||||
; KNL-NEXT: movq %rbp, %rsp
|
; KNL-NEXT: movq %rbp, %rsp
|
||||||
; KNL-NEXT: popq %rbp
|
; KNL-NEXT: popq %rbp
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test9:
|
; SKX-LABEL: test9:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
|
@ -216,6 +233,7 @@ define float @test9(<8 x float> %x, i32 %ind) nounwind {
|
||||||
; SKX-NEXT: popq %rbp
|
; SKX-NEXT: popq %rbp
|
||||||
; SKX-NEXT: vzeroupper
|
; SKX-NEXT: vzeroupper
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
%e = extractelement <8 x float> %x, i32 %ind
|
%e = extractelement <8 x float> %x, i32 %ind
|
||||||
ret float %e
|
ret float %e
|
||||||
}
|
}
|
||||||
|
@ -234,6 +252,7 @@ define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
|
||||||
; KNL-NEXT: movq %rbp, %rsp
|
; KNL-NEXT: movq %rbp, %rsp
|
||||||
; KNL-NEXT: popq %rbp
|
; KNL-NEXT: popq %rbp
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test10:
|
; SKX-LABEL: test10:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
|
@ -249,6 +268,7 @@ define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
|
||||||
; SKX-NEXT: popq %rbp
|
; SKX-NEXT: popq %rbp
|
||||||
; SKX-NEXT: vzeroupper
|
; SKX-NEXT: vzeroupper
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
%e = extractelement <16 x i32> %x, i32 %ind
|
%e = extractelement <16 x i32> %x, i32 %ind
|
||||||
ret i32 %e
|
ret i32 %e
|
||||||
}
|
}
|
||||||
|
@ -1114,137 +1134,137 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
|
||||||
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
||||||
; KNL-NEXT: andq $-32, %rsp
|
; KNL-NEXT: andq $-32, %rsp
|
||||||
; KNL-NEXT: subq $32, %rsp
|
; KNL-NEXT: subq $32, %rsp
|
||||||
; KNL-NEXT: xorl %eax, %eax
|
|
||||||
; KNL-NEXT: cmpl %esi, %edi
|
|
||||||
; KNL-NEXT: setb %al
|
|
||||||
; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0
|
; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0
|
||||||
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %edx
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
; KNL-NEXT: vmovd %edx, %xmm1
|
; KNL-NEXT: vmovd %ecx, %xmm1
|
||||||
; KNL-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1
|
; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
|
; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $12, %k0, %k1
|
; KNL-NEXT: kshiftlw $12, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1
|
; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $11, %k0, %k1
|
; KNL-NEXT: kshiftlw $11, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
|
; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $10, %k0, %k1
|
; KNL-NEXT: kshiftlw $10, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1
|
; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $9, %k0, %k1
|
; KNL-NEXT: kshiftlw $9, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1
|
; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $8, %k0, %k1
|
; KNL-NEXT: kshiftlw $8, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1
|
; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $7, %k0, %k1
|
; KNL-NEXT: kshiftlw $7, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
|
; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $6, %k0, %k1
|
; KNL-NEXT: kshiftlw $6, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1
|
; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $5, %k0, %k1
|
; KNL-NEXT: kshiftlw $5, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1
|
; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $4, %k0, %k1
|
; KNL-NEXT: kshiftlw $4, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1
|
; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $3, %k0, %k1
|
; KNL-NEXT: kshiftlw $3, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1
|
; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $2, %k0, %k1
|
; KNL-NEXT: kshiftlw $2, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
|
; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1
|
; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: kmovw %k0, %ecx
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
; KNL-NEXT: vpinsrb $15, %ecx, %xmm1, %xmm1
|
; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0
|
; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: vmovd %eax, %xmm0
|
||||||
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: kmovw %k1, %edx
|
|
||||||
; KNL-NEXT: vmovd %edx, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: kshiftlw $12, %k0, %k1
|
; KNL-NEXT: kshiftlw $12, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: kshiftlw $11, %k0, %k1
|
; KNL-NEXT: kshiftlw $11, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: kshiftlw $10, %k0, %k1
|
; KNL-NEXT: kshiftlw $10, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: kshiftlw $9, %k0, %k1
|
; KNL-NEXT: kshiftlw $9, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: kshiftlw $8, %k0, %k1
|
; KNL-NEXT: kshiftlw $8, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: kshiftlw $7, %k0, %k1
|
; KNL-NEXT: kshiftlw $7, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: kshiftlw $6, %k0, %k1
|
; KNL-NEXT: kshiftlw $6, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: kshiftlw $5, %k0, %k1
|
; KNL-NEXT: kshiftlw $5, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: kshiftlw $4, %k0, %k1
|
; KNL-NEXT: kshiftlw $4, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: kshiftlw $3, %k0, %k1
|
; KNL-NEXT: kshiftlw $3, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: kshiftlw $2, %k0, %k1
|
; KNL-NEXT: kshiftlw $2, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: xorl %eax, %eax
|
||||||
|
; KNL-NEXT: cmpl %esi, %edi
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: kmovw %k0, %ecx
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
; KNL-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: setb %al
|
||||||
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||||
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
|
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||||
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||||
|
@ -1299,8 +1319,8 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y)
|
||||||
; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
|
||||||
; KNL-NEXT: vpextrb $4, %xmm0, %ecx
|
; KNL-NEXT: vpextrb $4, %xmm0, %ecx
|
||||||
; KNL-NEXT: kmovw %ecx, %k1
|
; KNL-NEXT: kmovw %ecx, %k1
|
||||||
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
|
||||||
; KNL-NEXT: vpextrb $0, %xmm0, %ecx
|
; KNL-NEXT: vpextrb $0, %xmm0, %ecx
|
||||||
|
; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
|
||||||
; KNL-NEXT: kmovw %ecx, %k1
|
; KNL-NEXT: kmovw %ecx, %k1
|
||||||
; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
|
||||||
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
|
||||||
|
@ -2124,8 +2144,8 @@ define i16 @test_extractelement_variable_v32i16(<32 x i16> %t1, i32 %index) {
|
||||||
define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) {
|
define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) {
|
||||||
; KNL-LABEL: test_extractelement_variable_v16i8:
|
; KNL-LABEL: test_extractelement_variable_v16i8:
|
||||||
; KNL: ## BB#0:
|
; KNL: ## BB#0:
|
||||||
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
|
|
||||||
; KNL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
; KNL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||||
|
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; KNL-NEXT: andl $15, %edi
|
; KNL-NEXT: andl $15, %edi
|
||||||
; KNL-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
|
; KNL-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
|
||||||
; KNL-NEXT: movb (%rdi,%rax), %al
|
; KNL-NEXT: movb (%rdi,%rax), %al
|
||||||
|
@ -2156,8 +2176,8 @@ define i8 @test_extractelement_variable_v32i8(<32 x i8> %t1, i32 %index) {
|
||||||
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
||||||
; KNL-NEXT: andq $-32, %rsp
|
; KNL-NEXT: andq $-32, %rsp
|
||||||
; KNL-NEXT: subq $64, %rsp
|
; KNL-NEXT: subq $64, %rsp
|
||||||
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
|
|
||||||
; KNL-NEXT: vmovaps %ymm0, (%rsp)
|
; KNL-NEXT: vmovaps %ymm0, (%rsp)
|
||||||
|
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; KNL-NEXT: andl $31, %edi
|
; KNL-NEXT: andl $31, %edi
|
||||||
; KNL-NEXT: movq %rsp, %rax
|
; KNL-NEXT: movq %rsp, %rax
|
||||||
; KNL-NEXT: movb (%rdi,%rax), %al
|
; KNL-NEXT: movb (%rdi,%rax), %al
|
||||||
|
@ -2204,9 +2224,9 @@ define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) {
|
||||||
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
||||||
; KNL-NEXT: andq $-64, %rsp
|
; KNL-NEXT: andq $-64, %rsp
|
||||||
; KNL-NEXT: subq $128, %rsp
|
; KNL-NEXT: subq $128, %rsp
|
||||||
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
|
|
||||||
; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
|
; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
|
||||||
; KNL-NEXT: vmovaps %ymm0, (%rsp)
|
; KNL-NEXT: vmovaps %ymm0, (%rsp)
|
||||||
|
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; KNL-NEXT: andl $63, %edi
|
; KNL-NEXT: andl $63, %edi
|
||||||
; KNL-NEXT: movq %rsp, %rax
|
; KNL-NEXT: movq %rsp, %rax
|
||||||
; KNL-NEXT: movb (%rdi,%rax), %al
|
; KNL-NEXT: movb (%rdi,%rax), %al
|
||||||
|
@ -2295,12 +2315,12 @@ define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index)
|
||||||
define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b, i32 %index) {
|
define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b, i32 %index) {
|
||||||
; KNL-LABEL: test_extractelement_varible_v2i1:
|
; KNL-LABEL: test_extractelement_varible_v2i1:
|
||||||
; KNL: ## BB#0:
|
; KNL: ## BB#0:
|
||||||
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
|
|
||||||
; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
|
||||||
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||||
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||||
; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
||||||
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
||||||
|
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; KNL-NEXT: andl $1, %edi
|
; KNL-NEXT: andl $1, %edi
|
||||||
; KNL-NEXT: movl -24(%rsp,%rdi,8), %eax
|
; KNL-NEXT: movl -24(%rsp,%rdi,8), %eax
|
||||||
; KNL-NEXT: andl $1, %eax
|
; KNL-NEXT: andl $1, %eax
|
||||||
|
@ -2325,12 +2345,12 @@ define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b,
|
||||||
define zeroext i8 @test_extractelement_varible_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %index) {
|
define zeroext i8 @test_extractelement_varible_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %index) {
|
||||||
; KNL-LABEL: test_extractelement_varible_v4i1:
|
; KNL-LABEL: test_extractelement_varible_v4i1:
|
||||||
; KNL: ## BB#0:
|
; KNL: ## BB#0:
|
||||||
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
|
|
||||||
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
|
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
|
||||||
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
|
||||||
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||||
; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
||||||
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
|
||||||
|
; KNL-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; KNL-NEXT: andl $3, %edi
|
; KNL-NEXT: andl $3, %edi
|
||||||
; KNL-NEXT: movl -24(%rsp,%rdi,4), %eax
|
; KNL-NEXT: movl -24(%rsp,%rdi,4), %eax
|
||||||
; KNL-NEXT: andl $1, %eax
|
; KNL-NEXT: andl $1, %eax
|
||||||
|
|
|
@ -2880,7 +2880,6 @@ declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8
|
||||||
define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
|
define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
|
||||||
; CHECK-LABEL: test_mask_vextractf32x4:
|
; CHECK-LABEL: test_mask_vextractf32x4:
|
||||||
; CHECK: ## BB#0:
|
; CHECK: ## BB#0:
|
||||||
; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1
|
|
||||||
; CHECK-NEXT: kmovw %edi, %k0
|
; CHECK-NEXT: kmovw %edi, %k0
|
||||||
; CHECK-NEXT: kshiftlw $12, %k0, %k1
|
; CHECK-NEXT: kshiftlw $12, %k0, %k1
|
||||||
; CHECK-NEXT: kshiftrw $15, %k1, %k1
|
; CHECK-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
@ -2898,6 +2897,7 @@ define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8
|
||||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
|
||||||
; CHECK-NEXT: kmovw %k1, %eax
|
; CHECK-NEXT: kmovw %k1, %eax
|
||||||
; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
|
; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
|
||||||
|
; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1
|
||||||
; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
|
; CHECK-NEXT: vpslld $31, %xmm2, %xmm2
|
||||||
; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
|
; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
@ -2941,7 +2941,6 @@ declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i
|
||||||
define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
|
define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
|
||||||
; CHECK-LABEL: test_maskz_vextracti32x4:
|
; CHECK-LABEL: test_maskz_vextracti32x4:
|
||||||
; CHECK: ## BB#0:
|
; CHECK: ## BB#0:
|
||||||
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0
|
|
||||||
; CHECK-NEXT: kmovw %edi, %k0
|
; CHECK-NEXT: kmovw %edi, %k0
|
||||||
; CHECK-NEXT: kshiftlw $12, %k0, %k1
|
; CHECK-NEXT: kshiftlw $12, %k0, %k1
|
||||||
; CHECK-NEXT: kshiftrw $15, %k1, %k1
|
; CHECK-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
@ -2959,6 +2958,7 @@ define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
|
||||||
; CHECK-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
; CHECK-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
||||||
; CHECK-NEXT: kmovw %k1, %eax
|
; CHECK-NEXT: kmovw %k1, %eax
|
||||||
; CHECK-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
; CHECK-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
||||||
|
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0
|
||||||
; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
|
; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
|
||||||
; CHECK-NEXT: vpsrad $31, %xmm1, %xmm1
|
; CHECK-NEXT: vpsrad $31, %xmm1, %xmm1
|
||||||
; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
|
; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0
|
||||||
|
|
|
@ -1837,73 +1837,8 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
||||||
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
; KNL-NEXT: .cfi_def_cfa_register %rbp
|
||||||
; KNL-NEXT: andq $-32, %rsp
|
; KNL-NEXT: andq $-32, %rsp
|
||||||
; KNL-NEXT: subq $32, %rsp
|
; KNL-NEXT: subq $32, %rsp
|
||||||
; KNL-NEXT: vmovups (%rdi), %zmm2
|
; KNL-NEXT: vmovups 64(%rdi), %zmm2
|
||||||
; KNL-NEXT: vmovups 64(%rdi), %zmm3
|
; KNL-NEXT: vcmpltps %zmm1, %zmm2, %k2
|
||||||
; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1
|
|
||||||
; KNL-NEXT: kshiftlw $14, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %ecx
|
|
||||||
; KNL-NEXT: vmovd %ecx, %xmm3
|
|
||||||
; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $13, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $12, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $11, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $10, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $9, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $8, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $7, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $6, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $5, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $4, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $3, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $2, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $1, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2
|
|
||||||
; KNL-NEXT: kshiftlw $14, %k2, %k0
|
; KNL-NEXT: kshiftlw $14, %k2, %k0
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
@ -1967,10 +1902,139 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
||||||
; KNL-NEXT: kshiftrw $15, %k2, %k0
|
; KNL-NEXT: kshiftrw $15, %k2, %k0
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
||||||
; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
|
; KNL-NEXT: vmovups (%rdi), %zmm3
|
||||||
; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z}
|
; KNL-NEXT: vcmpltps %zmm0, %zmm3, %k1
|
||||||
; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z}
|
; KNL-NEXT: kshiftlw $14, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
|
; KNL-NEXT: vmovd %ecx, %xmm3
|
||||||
|
; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftlw $13, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftlw $12, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftlw $11, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftlw $10, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftlw $9, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftlw $8, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftlw $7, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftlw $6, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftlw $5, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftlw $4, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftlw $3, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftlw $2, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftlw $1, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k2} {z}
|
||||||
; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0
|
; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0
|
||||||
|
; KNL-NEXT: kshiftlw $14, %k0, %k2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||||
|
; KNL-NEXT: kmovw %k2, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k0, %k2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||||
|
; KNL-NEXT: kmovw %k2, %ecx
|
||||||
|
; KNL-NEXT: vmovd %ecx, %xmm4
|
||||||
|
; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
|
||||||
|
; KNL-NEXT: kshiftlw $13, %k0, %k2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||||
|
; KNL-NEXT: kmovw %k2, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
|
||||||
|
; KNL-NEXT: kshiftlw $12, %k0, %k2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||||
|
; KNL-NEXT: kmovw %k2, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
|
||||||
|
; KNL-NEXT: kshiftlw $11, %k0, %k2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||||
|
; KNL-NEXT: kmovw %k2, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
|
||||||
|
; KNL-NEXT: kshiftlw $10, %k0, %k2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||||
|
; KNL-NEXT: kmovw %k2, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
|
||||||
|
; KNL-NEXT: kshiftlw $9, %k0, %k2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||||
|
; KNL-NEXT: kmovw %k2, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
|
||||||
|
; KNL-NEXT: kshiftlw $8, %k0, %k2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||||
|
; KNL-NEXT: kmovw %k2, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
|
||||||
|
; KNL-NEXT: kshiftlw $7, %k0, %k2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||||
|
; KNL-NEXT: kmovw %k2, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
|
||||||
|
; KNL-NEXT: kshiftlw $6, %k0, %k2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||||
|
; KNL-NEXT: kmovw %k2, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
|
||||||
|
; KNL-NEXT: kshiftlw $5, %k0, %k2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||||
|
; KNL-NEXT: kmovw %k2, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
|
||||||
|
; KNL-NEXT: kshiftlw $4, %k0, %k2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||||
|
; KNL-NEXT: kmovw %k2, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
|
||||||
|
; KNL-NEXT: kshiftlw $3, %k0, %k2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||||
|
; KNL-NEXT: kmovw %k2, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
|
||||||
|
; KNL-NEXT: kshiftlw $2, %k0, %k2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||||
|
; KNL-NEXT: kmovw %k2, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
|
||||||
|
; KNL-NEXT: kshiftlw $1, %k0, %k2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k2, %k2
|
||||||
|
; KNL-NEXT: kmovw %k2, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
|
||||||
|
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3
|
||||||
|
; KNL-NEXT: vmovups 4(%rdi), %zmm4 {%k1} {z}
|
||||||
|
; KNL-NEXT: vcmpltps %zmm4, %zmm0, %k0
|
||||||
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
|
@ -2034,71 +2098,7 @@ define void @ktest_2(<32 x float> %in, float * %base) {
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
|
; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
|
||||||
; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0
|
; KNL-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
|
||||||
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
|
||||||
; KNL-NEXT: vmovd %ecx, %xmm3
|
|
||||||
; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $12, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $11, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $10, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $9, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $8, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $7, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $6, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $5, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $4, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $3, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $2, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
|
|
||||||
; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
|
|
||||||
; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2
|
; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2
|
||||||
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
|
; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
|
||||||
; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
|
; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
|
||||||
|
@ -2943,36 +2943,6 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
;
|
;
|
||||||
; KNL-LABEL: store_64i1:
|
; KNL-LABEL: store_64i1:
|
||||||
; KNL: ## BB#0:
|
; KNL: ## BB#0:
|
||||||
; KNL-NEXT: pushq %rbp
|
|
||||||
; KNL-NEXT: Lcfi9:
|
|
||||||
; KNL-NEXT: .cfi_def_cfa_offset 16
|
|
||||||
; KNL-NEXT: pushq %r15
|
|
||||||
; KNL-NEXT: Lcfi10:
|
|
||||||
; KNL-NEXT: .cfi_def_cfa_offset 24
|
|
||||||
; KNL-NEXT: pushq %r14
|
|
||||||
; KNL-NEXT: Lcfi11:
|
|
||||||
; KNL-NEXT: .cfi_def_cfa_offset 32
|
|
||||||
; KNL-NEXT: pushq %r13
|
|
||||||
; KNL-NEXT: Lcfi12:
|
|
||||||
; KNL-NEXT: .cfi_def_cfa_offset 40
|
|
||||||
; KNL-NEXT: pushq %r12
|
|
||||||
; KNL-NEXT: Lcfi13:
|
|
||||||
; KNL-NEXT: .cfi_def_cfa_offset 48
|
|
||||||
; KNL-NEXT: pushq %rbx
|
|
||||||
; KNL-NEXT: Lcfi14:
|
|
||||||
; KNL-NEXT: .cfi_def_cfa_offset 56
|
|
||||||
; KNL-NEXT: Lcfi15:
|
|
||||||
; KNL-NEXT: .cfi_offset %rbx, -56
|
|
||||||
; KNL-NEXT: Lcfi16:
|
|
||||||
; KNL-NEXT: .cfi_offset %r12, -48
|
|
||||||
; KNL-NEXT: Lcfi17:
|
|
||||||
; KNL-NEXT: .cfi_offset %r13, -40
|
|
||||||
; KNL-NEXT: Lcfi18:
|
|
||||||
; KNL-NEXT: .cfi_offset %r14, -32
|
|
||||||
; KNL-NEXT: Lcfi19:
|
|
||||||
; KNL-NEXT: .cfi_offset %r15, -24
|
|
||||||
; KNL-NEXT: Lcfi20:
|
|
||||||
; KNL-NEXT: .cfi_offset %rbp, -16
|
|
||||||
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||||
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||||
|
@ -2984,281 +2954,275 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
||||||
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
|
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
|
||||||
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %r8d
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %r9d
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %r10d
|
; KNL-NEXT: vmovd %ecx, %xmm3
|
||||||
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
; KNL-NEXT: kshiftlw $12, %k0, %k1
|
; KNL-NEXT: kshiftlw $12, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %r11d
|
; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: kshiftlw $11, %k0, %k1
|
; KNL-NEXT: kshiftlw $11, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %r14d
|
; KNL-NEXT: vpinsrb $2, %ecx, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
; KNL-NEXT: kshiftlw $10, %k0, %k1
|
; KNL-NEXT: kshiftlw $10, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %r15d
|
; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: kshiftlw $9, %k0, %k1
|
; KNL-NEXT: kshiftlw $9, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %r12d
|
; KNL-NEXT: vpinsrb $4, %ecx, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
; KNL-NEXT: kshiftlw $8, %k0, %k1
|
; KNL-NEXT: kshiftlw $8, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %r13d
|
; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: kshiftlw $7, %k0, %k1
|
; KNL-NEXT: kshiftlw $7, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ebx
|
; KNL-NEXT: vpinsrb $6, %ecx, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
; KNL-NEXT: kshiftlw $6, %k0, %k1
|
; KNL-NEXT: kshiftlw $6, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ebp
|
; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: kshiftlw $5, %k0, %k1
|
; KNL-NEXT: kshiftlw $5, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: vpinsrb $8, %ecx, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
; KNL-NEXT: kshiftlw $4, %k0, %k1
|
; KNL-NEXT: kshiftlw $4, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: kshiftlw $3, %k0, %k1
|
; KNL-NEXT: kshiftlw $3, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %edx
|
; KNL-NEXT: vpinsrb $10, %ecx, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
; KNL-NEXT: kshiftlw $2, %k0, %k1
|
; KNL-NEXT: kshiftlw $2, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %esi
|
; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: vmovd %r9d, %xmm3
|
; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3
|
||||||
; KNL-NEXT: kmovw %k1, %r9d
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2
|
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2
|
; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm2
|
||||||
; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2
|
|
||||||
; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
|
|
||||||
; KNL-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2
|
|
||||||
; KNL-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2
|
|
||||||
; KNL-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2
|
|
||||||
; KNL-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2
|
|
||||||
; KNL-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2
|
|
||||||
; KNL-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2
|
|
||||||
; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
|
||||||
; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
|
|
||||||
; KNL-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
|
|
||||||
; KNL-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2
|
|
||||||
; KNL-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
|
||||||
; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
|
|
||||||
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
|
|
||||||
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, 6(%rdi)
|
|
||||||
; KNL-NEXT: kshiftlw $14, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %r8d
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %r10d
|
|
||||||
; KNL-NEXT: kshiftlw $13, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %r9d
|
|
||||||
; KNL-NEXT: kshiftlw $12, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %r11d
|
|
||||||
; KNL-NEXT: kshiftlw $11, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %r14d
|
|
||||||
; KNL-NEXT: kshiftlw $10, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %r15d
|
|
||||||
; KNL-NEXT: kshiftlw $9, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %r12d
|
|
||||||
; KNL-NEXT: kshiftlw $8, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %r13d
|
|
||||||
; KNL-NEXT: kshiftlw $7, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %ecx
|
|
||||||
; KNL-NEXT: kshiftlw $6, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %esi
|
|
||||||
; KNL-NEXT: kshiftlw $5, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %ebp
|
|
||||||
; KNL-NEXT: kshiftlw $4, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %ebx
|
|
||||||
; KNL-NEXT: kshiftlw $3, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: kshiftlw $2, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %edx
|
|
||||||
; KNL-NEXT: kshiftlw $1, %k2, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: vmovd %r10d, %xmm2
|
|
||||||
; KNL-NEXT: kmovw %k0, %r10d
|
|
||||||
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k2, %k0
|
|
||||||
; KNL-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1
|
|
||||||
; KNL-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1
|
|
||||||
; KNL-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1
|
|
||||||
; KNL-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1
|
|
||||||
; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1
|
|
||||||
; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1
|
|
||||||
; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1
|
|
||||||
; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
|
|
||||||
; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
|
|
||||||
; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1
|
|
||||||
; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1
|
|
||||||
; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
|
||||||
; KNL-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1
|
|
||||||
; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1
|
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
|
||||||
; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
|
|
||||||
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
|
||||||
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
|
||||||
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, 4(%rdi)
|
|
||||||
; KNL-NEXT: kshiftlw $14, %k1, %k0
|
; KNL-NEXT: kshiftlw $14, %k1, %k0
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: kmovw %k0, %r8d
|
; KNL-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
|
||||||
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
; KNL-NEXT: kshiftlw $15, %k1, %k0
|
; KNL-NEXT: kshiftlw $15, %k1, %k0
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: kmovw %k0, %r10d
|
; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
; KNL-NEXT: kshiftlw $13, %k1, %k0
|
; KNL-NEXT: kshiftlw $13, %k1, %k0
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: kmovw %k0, %r9d
|
; KNL-NEXT: vmovd %eax, %xmm3
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
; KNL-NEXT: kshiftlw $12, %k1, %k0
|
; KNL-NEXT: kshiftlw $12, %k1, %k0
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: vpinsrb $1, %ecx, %xmm3, %xmm3
|
||||||
; KNL-NEXT: kmovw %k0, %r11d
|
|
||||||
; KNL-NEXT: kshiftlw $11, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %r14d
|
|
||||||
; KNL-NEXT: kshiftlw $10, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %r15d
|
|
||||||
; KNL-NEXT: kshiftlw $9, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %r12d
|
|
||||||
; KNL-NEXT: kshiftlw $8, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
|
||||||
; KNL-NEXT: kmovw %k0, %r13d
|
|
||||||
; KNL-NEXT: kshiftlw $7, %k1, %k0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: kmovw %k0, %ecx
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $11, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $10, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $3, %ecx, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $9, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $8, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $5, %ecx, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $7, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
; KNL-NEXT: kshiftlw $6, %k1, %k0
|
; KNL-NEXT: kshiftlw $6, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $7, %ecx, %xmm3, %xmm3
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: kmovw %k0, %esi
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
; KNL-NEXT: kshiftlw $5, %k1, %k0
|
; KNL-NEXT: kshiftlw $5, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: kmovw %k0, %ebp
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
; KNL-NEXT: kshiftlw $4, %k1, %k0
|
; KNL-NEXT: kshiftlw $4, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $9, %ecx, %xmm3, %xmm3
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: kmovw %k0, %ebx
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
; KNL-NEXT: kshiftlw $3, %k1, %k0
|
; KNL-NEXT: kshiftlw $3, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
; KNL-NEXT: kshiftlw $2, %k1, %k0
|
; KNL-NEXT: kshiftlw $2, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $11, %ecx, %xmm3, %xmm3
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: kmovw %k0, %edx
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
; KNL-NEXT: kshiftlw $1, %k1, %k0
|
; KNL-NEXT: kshiftlw $1, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: vmovd %r10d, %xmm1
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
; KNL-NEXT: kmovw %k0, %r10d
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
||||||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: vpmovsxbd %xmm2, %zmm1
|
||||||
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
||||||
|
; KNL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm2
|
||||||
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
|
||||||
|
; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm1
|
||||||
|
; KNL-NEXT: vpinsrb $15, %ecx, %xmm1, %xmm1
|
||||||
|
; KNL-NEXT: kmovw %k1, 6(%rdi)
|
||||||
|
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
|
||||||
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
||||||
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
|
||||||
|
; KNL-NEXT: kmovw %k1, 4(%rdi)
|
||||||
|
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: vmovd %ecx, %xmm1
|
||||||
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $12, %k0, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
||||||
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $11, %k0, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
|
||||||
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $10, %k0, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
||||||
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $9, %k0, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
|
||||||
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $8, %k0, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
||||||
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $7, %k0, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1
|
||||||
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $6, %k0, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
||||||
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $5, %k0, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
|
||||||
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $4, %k0, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
||||||
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $3, %k0, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1
|
||||||
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $2, %k0, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
|
||||||
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
|
; KNL-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1
|
||||||
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
|
||||||
|
; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm0
|
||||||
|
; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||||
; KNL-NEXT: kmovw %k1, 2(%rdi)
|
; KNL-NEXT: kmovw %k0, 2(%rdi)
|
||||||
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
; KNL-NEXT: kshiftlw $14, %k1, %k0
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %r8d
|
|
||||||
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %r9d
|
|
||||||
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %r10d
|
|
||||||
; KNL-NEXT: kshiftlw $12, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %r11d
|
|
||||||
; KNL-NEXT: kshiftlw $11, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %r14d
|
|
||||||
; KNL-NEXT: kshiftlw $10, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %r15d
|
|
||||||
; KNL-NEXT: kshiftlw $9, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %r12d
|
|
||||||
; KNL-NEXT: kshiftlw $8, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %r13d
|
|
||||||
; KNL-NEXT: kshiftlw $7, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %edx
|
|
||||||
; KNL-NEXT: kshiftlw $6, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %esi
|
|
||||||
; KNL-NEXT: kshiftlw $5, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %ebp
|
|
||||||
; KNL-NEXT: kshiftlw $4, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %ebx
|
|
||||||
; KNL-NEXT: kshiftlw $3, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
|
||||||
; KNL-NEXT: kshiftlw $2, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
|
||||||
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
|
||||||
; KNL-NEXT: vmovd %r9d, %xmm0
|
|
||||||
; KNL-NEXT: kmovw %k1, %r9d
|
|
||||||
; KNL-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
; KNL-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0
|
; KNL-NEXT: kshiftlw $15, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $13, %k1, %k0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: vmovd %ecx, %xmm0
|
||||||
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $12, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $11, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $10, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $9, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $8, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $7, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $6, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $5, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $4, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $3, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
|
; KNL-NEXT: kshiftlw $2, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
|
; KNL-NEXT: kshiftlw $1, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
|
; KNL-NEXT: kmovw %k0, %ecx
|
||||||
|
; KNL-NEXT: kshiftrw $15, %k1, %k0
|
||||||
|
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||||
; KNL-NEXT: kmovw %k0, (%rdi)
|
; KNL-NEXT: kmovw %k0, (%rdi)
|
||||||
; KNL-NEXT: popq %rbx
|
|
||||||
; KNL-NEXT: popq %r12
|
|
||||||
; KNL-NEXT: popq %r13
|
|
||||||
; KNL-NEXT: popq %r14
|
|
||||||
; KNL-NEXT: popq %r15
|
|
||||||
; KNL-NEXT: popq %rbp
|
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
;
|
;
|
||||||
; SKX-LABEL: store_64i1:
|
; SKX-LABEL: store_64i1:
|
||||||
|
|
|
@ -8,6 +8,7 @@ define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
|
||||||
; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1
|
; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1
|
||||||
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%mask = fcmp ole <16 x float> %x, %y
|
%mask = fcmp ole <16 x float> %x, %y
|
||||||
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
|
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
|
||||||
ret <16 x float> %max
|
ret <16 x float> %max
|
||||||
|
@ -19,6 +20,7 @@ define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
|
||||||
; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1
|
; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1
|
||||||
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%mask = fcmp ole <8 x double> %x, %y
|
%mask = fcmp ole <8 x double> %x, %y
|
||||||
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
|
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
|
||||||
ret <8 x double> %max
|
ret <8 x double> %max
|
||||||
|
@ -30,6 +32,7 @@ define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwin
|
||||||
; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
|
; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
|
||||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%y = load <16 x i32>, <16 x i32>* %yp, align 4
|
%y = load <16 x i32>, <16 x i32>* %yp, align 4
|
||||||
%mask = icmp eq <16 x i32> %x, %y
|
%mask = icmp eq <16 x i32> %x, %y
|
||||||
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
|
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
|
||||||
|
@ -42,6 +45,7 @@ define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1)
|
||||||
; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
|
; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
|
||||||
; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%mask = icmp uge <16 x i32> %x, %y
|
%mask = icmp uge <16 x i32> %x, %y
|
||||||
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
|
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
|
||||||
ret <16 x i32> %max
|
ret <16 x i32> %max
|
||||||
|
@ -53,6 +57,7 @@ define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
|
||||||
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
|
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
|
||||||
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%mask = icmp eq <8 x i64> %x, %y
|
%mask = icmp eq <8 x i64> %x, %y
|
||||||
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
|
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
|
||||||
ret <8 x i64> %max
|
ret <8 x i64> %max
|
||||||
|
@ -64,6 +69,7 @@ define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) noun
|
||||||
; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
|
; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
|
||||||
; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%mask = icmp ugt <8 x i64> %x, %y
|
%mask = icmp ugt <8 x i64> %x, %y
|
||||||
%max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
|
%max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
|
||||||
ret <8 x i64> %max
|
ret <8 x i64> %max
|
||||||
|
@ -117,12 +123,14 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
|
||||||
; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test9:
|
; SKX-LABEL: test9:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
|
; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
|
||||||
; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
%mask = icmp eq <8 x i32> %x, %y
|
%mask = icmp eq <8 x i32> %x, %y
|
||||||
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
|
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
|
||||||
ret <8 x i32> %max
|
ret <8 x i32> %max
|
||||||
|
@ -137,12 +145,14 @@ define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
|
||||||
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test10:
|
; SKX-LABEL: test10:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1
|
; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1
|
||||||
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
|
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
|
|
||||||
%mask = fcmp oeq <8 x float> %x, %y
|
%mask = fcmp oeq <8 x float> %x, %y
|
||||||
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
|
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
|
||||||
|
@ -154,6 +164,7 @@ define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
|
||||||
; CHECK: ## BB#0:
|
; CHECK: ## BB#0:
|
||||||
; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
|
; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%mask = icmp ugt <8 x i32> %x, %y
|
%mask = icmp ugt <8 x i32> %x, %y
|
||||||
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
|
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
|
||||||
ret <8 x i32> %max
|
ret <8 x i32> %max
|
||||||
|
@ -168,6 +179,7 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
|
; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test12:
|
; SKX-LABEL: test12:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
|
@ -178,6 +190,7 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
|
||||||
; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
|
; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
|
||||||
; SKX-NEXT: vzeroupper
|
; SKX-NEXT: vzeroupper
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
%res = icmp eq <16 x i64> %a, %b
|
%res = icmp eq <16 x i64> %a, %b
|
||||||
%res1 = bitcast <16 x i1> %res to i16
|
%res1 = bitcast <16 x i1> %res to i16
|
||||||
ret i16 %res1
|
ret i16 %res1
|
||||||
|
@ -330,6 +343,7 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
|
||||||
; KNL-NEXT: movq %rbp, %rsp
|
; KNL-NEXT: movq %rbp, %rsp
|
||||||
; KNL-NEXT: popq %rbp
|
; KNL-NEXT: popq %rbp
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test12_v32i32:
|
; SKX-LABEL: test12_v32i32:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
|
@ -339,6 +353,7 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
|
||||||
; SKX-NEXT: kmovd %k0, %eax
|
; SKX-NEXT: kmovd %k0, %eax
|
||||||
; SKX-NEXT: vzeroupper
|
; SKX-NEXT: vzeroupper
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
%res = icmp eq <32 x i32> %a, %b
|
%res = icmp eq <32 x i32> %a, %b
|
||||||
%res1 = bitcast <32 x i1> %res to i32
|
%res1 = bitcast <32 x i1> %res to i32
|
||||||
ret i32 %res1
|
ret i32 %res1
|
||||||
|
@ -562,72 +577,72 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
|
||||||
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||||
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm1
|
||||||
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
|
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
|
||||||
; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0
|
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
|
||||||
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
|
||||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
|
||||||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
|
||||||
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %ecx
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
; KNL-NEXT: vmovd %ecx, %xmm0
|
; KNL-NEXT: vmovd %ecx, %xmm1
|
||||||
; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $12, %k0, %k1
|
; KNL-NEXT: kshiftlw $12, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $11, %k0, %k1
|
; KNL-NEXT: kshiftlw $11, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $10, %k0, %k1
|
; KNL-NEXT: kshiftlw $10, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $9, %k0, %k1
|
; KNL-NEXT: kshiftlw $9, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $8, %k0, %k1
|
; KNL-NEXT: kshiftlw $8, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $7, %k0, %k1
|
; KNL-NEXT: kshiftlw $7, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $6, %k0, %k1
|
; KNL-NEXT: kshiftlw $6, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $5, %k0, %k1
|
; KNL-NEXT: kshiftlw $5, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $4, %k0, %k1
|
; KNL-NEXT: kshiftlw $4, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $3, %k0, %k1
|
; KNL-NEXT: kshiftlw $3, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
||||||
; KNL-NEXT: kshiftlw $2, %k0, %k1
|
; KNL-NEXT: kshiftlw $2, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %eax
|
||||||
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
|
|
||||||
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
||||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||||
; KNL-NEXT: kmovw %k1, %eax
|
; KNL-NEXT: kmovw %k1, %ecx
|
||||||
; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
|
||||||
|
; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm0
|
||||||
|
; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
|
||||||
|
; KNL-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
|
||||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||||
; KNL-NEXT: kmovw %k0, %eax
|
; KNL-NEXT: kmovw %k0, %eax
|
||||||
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||||
|
@ -642,6 +657,7 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
|
||||||
; KNL-NEXT: movq %rbp, %rsp
|
; KNL-NEXT: movq %rbp, %rsp
|
||||||
; KNL-NEXT: popq %rbp
|
; KNL-NEXT: popq %rbp
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test12_v64i16:
|
; SKX-LABEL: test12_v64i16:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
|
@ -651,6 +667,7 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
|
||||||
; SKX-NEXT: kmovq %k0, %rax
|
; SKX-NEXT: kmovq %k0, %rax
|
||||||
; SKX-NEXT: vzeroupper
|
; SKX-NEXT: vzeroupper
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
%res = icmp eq <64 x i16> %a, %b
|
%res = icmp eq <64 x i16> %a, %b
|
||||||
%res1 = bitcast <64 x i1> %res to i64
|
%res1 = bitcast <64 x i1> %res to i64
|
||||||
ret i64 %res1
|
ret i64 %res1
|
||||||
|
@ -704,6 +721,7 @@ define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind
|
||||||
; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1
|
; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1
|
||||||
; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%mask = icmp sge <16 x i32> %x, %y
|
%mask = icmp sge <16 x i32> %x, %y
|
||||||
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
|
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
|
||||||
ret <16 x i32> %max
|
ret <16 x i32> %max
|
||||||
|
@ -715,6 +733,7 @@ define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou
|
||||||
; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
|
; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
|
||||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
|
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
|
||||||
%mask = icmp sgt <16 x i32> %x, %y
|
%mask = icmp sgt <16 x i32> %x, %y
|
||||||
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
|
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
|
||||||
|
@ -727,6 +746,7 @@ define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou
|
||||||
; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1
|
; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1
|
||||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
|
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
|
||||||
%mask = icmp sle <16 x i32> %x, %y
|
%mask = icmp sle <16 x i32> %x, %y
|
||||||
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
|
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
|
||||||
|
@ -739,6 +759,7 @@ define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou
|
||||||
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1
|
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1
|
||||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
|
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
|
||||||
%mask = icmp ule <16 x i32> %x, %y
|
%mask = icmp ule <16 x i32> %x, %y
|
||||||
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
|
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
|
||||||
|
@ -752,6 +773,7 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3
|
||||||
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
|
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
|
||||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%mask1 = icmp eq <16 x i32> %x1, %y1
|
%mask1 = icmp eq <16 x i32> %x1, %y1
|
||||||
%mask0 = icmp eq <16 x i32> %x, %y
|
%mask0 = icmp eq <16 x i32> %x, %y
|
||||||
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
|
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
|
||||||
|
@ -766,6 +788,7 @@ define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y
|
||||||
; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
|
; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
|
||||||
; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%mask1 = icmp sge <8 x i64> %x1, %y1
|
%mask1 = icmp sge <8 x i64> %x1, %y1
|
||||||
%mask0 = icmp sle <8 x i64> %x, %y
|
%mask0 = icmp sle <8 x i64> %x, %y
|
||||||
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
|
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
|
||||||
|
@ -780,6 +803,7 @@ define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i6
|
||||||
; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
|
; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
|
||||||
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%mask1 = icmp sgt <8 x i64> %x1, %y1
|
%mask1 = icmp sgt <8 x i64> %x1, %y1
|
||||||
%y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
|
%y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
|
||||||
%mask0 = icmp sgt <8 x i64> %x, %y
|
%mask0 = icmp sgt <8 x i64> %x, %y
|
||||||
|
@ -795,6 +819,7 @@ define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16
|
||||||
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
|
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
|
||||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%mask1 = icmp sge <16 x i32> %x1, %y1
|
%mask1 = icmp sge <16 x i32> %x1, %y1
|
||||||
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
|
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
|
||||||
%mask0 = icmp ule <16 x i32> %x, %y
|
%mask0 = icmp ule <16 x i32> %x, %y
|
||||||
|
@ -809,6 +834,7 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
|
||||||
; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
|
; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
|
||||||
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%yb = load i64, i64* %yb.ptr, align 4
|
%yb = load i64, i64* %yb.ptr, align 4
|
||||||
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
|
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
|
||||||
%y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
|
%y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
|
||||||
|
@ -823,6 +849,7 @@ define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind
|
||||||
; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
|
; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
|
||||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%yb = load i32, i32* %yb.ptr, align 4
|
%yb = load i32, i32* %yb.ptr, align 4
|
||||||
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
|
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
|
||||||
%y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
|
%y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
|
||||||
|
@ -838,6 +865,7 @@ define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32
|
||||||
; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
|
; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
|
||||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%mask1 = icmp sge <16 x i32> %x1, %y1
|
%mask1 = icmp sge <16 x i32> %x1, %y1
|
||||||
%yb = load i32, i32* %yb.ptr, align 4
|
%yb = load i32, i32* %yb.ptr, align 4
|
||||||
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
|
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
|
||||||
|
@ -855,6 +883,7 @@ define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y
|
||||||
; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
|
; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
|
||||||
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%mask1 = icmp sge <8 x i64> %x1, %y1
|
%mask1 = icmp sge <8 x i64> %x1, %y1
|
||||||
%yb = load i64, i64* %yb.ptr, align 4
|
%yb = load i64, i64* %yb.ptr, align 4
|
||||||
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
|
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
|
||||||
|
@ -920,12 +949,14 @@ define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
|
||||||
; KNL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm2
|
; KNL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm2
|
||||||
; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test30:
|
; SKX-LABEL: test30:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
|
; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
|
||||||
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
|
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
|
|
||||||
%mask = fcmp oeq <4 x double> %x, %y
|
%mask = fcmp oeq <4 x double> %x, %y
|
||||||
%max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
|
%max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
|
||||||
|
@ -938,12 +969,14 @@ define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp
|
||||||
; KNL-NEXT: vcmpltpd (%rdi), %xmm0, %xmm2
|
; KNL-NEXT: vcmpltpd (%rdi), %xmm0, %xmm2
|
||||||
; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test31:
|
; SKX-LABEL: test31:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1
|
; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1
|
||||||
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
|
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
|
|
||||||
%y = load <2 x double>, <2 x double>* %yp, align 4
|
%y = load <2 x double>, <2 x double>* %yp, align 4
|
||||||
%mask = fcmp olt <2 x double> %x, %y
|
%mask = fcmp olt <2 x double> %x, %y
|
||||||
|
@ -957,12 +990,14 @@ define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp
|
||||||
; KNL-NEXT: vcmpltpd (%rdi), %ymm0, %ymm2
|
; KNL-NEXT: vcmpltpd (%rdi), %ymm0, %ymm2
|
||||||
; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test32:
|
; SKX-LABEL: test32:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1
|
; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1
|
||||||
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
|
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
|
|
||||||
%y = load <4 x double>, <4 x double>* %yp, align 4
|
%y = load <4 x double>, <4 x double>* %yp, align 4
|
||||||
%mask = fcmp ogt <4 x double> %y, %x
|
%mask = fcmp ogt <4 x double> %y, %x
|
||||||
|
@ -976,6 +1011,7 @@ define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp
|
||||||
; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1
|
; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1
|
||||||
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%y = load <8 x double>, <8 x double>* %yp, align 4
|
%y = load <8 x double>, <8 x double>* %yp, align 4
|
||||||
%mask = fcmp olt <8 x double> %x, %y
|
%mask = fcmp olt <8 x double> %x, %y
|
||||||
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
|
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
|
||||||
|
@ -988,12 +1024,14 @@ define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) no
|
||||||
; KNL-NEXT: vcmpltps (%rdi), %xmm0, %xmm2
|
; KNL-NEXT: vcmpltps (%rdi), %xmm0, %xmm2
|
||||||
; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
|
; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test34:
|
; SKX-LABEL: test34:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1
|
; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1
|
||||||
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
|
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
%y = load <4 x float>, <4 x float>* %yp, align 4
|
%y = load <4 x float>, <4 x float>* %yp, align 4
|
||||||
%mask = fcmp olt <4 x float> %x, %y
|
%mask = fcmp olt <4 x float> %x, %y
|
||||||
%max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
|
%max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
|
||||||
|
@ -1010,12 +1048,14 @@ define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) no
|
||||||
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test35:
|
; SKX-LABEL: test35:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1
|
; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1
|
||||||
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
|
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
|
|
||||||
%y = load <8 x float>, <8 x float>* %yp, align 4
|
%y = load <8 x float>, <8 x float>* %yp, align 4
|
||||||
%mask = fcmp ogt <8 x float> %y, %x
|
%mask = fcmp ogt <8 x float> %y, %x
|
||||||
|
@ -1029,6 +1069,7 @@ define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp
|
||||||
; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1
|
; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1
|
||||||
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
%y = load <16 x float>, <16 x float>* %yp, align 4
|
%y = load <16 x float>, <16 x float>* %yp, align 4
|
||||||
%mask = fcmp olt <16 x float> %x, %y
|
%mask = fcmp olt <16 x float> %x, %y
|
||||||
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
|
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
|
||||||
|
@ -1041,6 +1082,7 @@ define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nou
|
||||||
; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1
|
; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1
|
||||||
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
|
|
||||||
%a = load double, double* %ptr
|
%a = load double, double* %ptr
|
||||||
%v = insertelement <8 x double> undef, double %a, i32 0
|
%v = insertelement <8 x double> undef, double %a, i32 0
|
||||||
|
@ -1058,12 +1100,14 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nou
|
||||||
; KNL-NEXT: vcmpltpd %ymm2, %ymm0, %ymm2
|
; KNL-NEXT: vcmpltpd %ymm2, %ymm0, %ymm2
|
||||||
; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test38:
|
; SKX-LABEL: test38:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1
|
; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1
|
||||||
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
|
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
|
|
||||||
%a = load double, double* %ptr
|
%a = load double, double* %ptr
|
||||||
%v = insertelement <4 x double> undef, double %a, i32 0
|
%v = insertelement <4 x double> undef, double %a, i32 0
|
||||||
|
@ -1081,12 +1125,14 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nou
|
||||||
; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
|
; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
|
||||||
; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test39:
|
; SKX-LABEL: test39:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1
|
; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1
|
||||||
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
|
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
|
|
||||||
%a = load double, double* %ptr
|
%a = load double, double* %ptr
|
||||||
%v = insertelement <2 x double> undef, double %a, i32 0
|
%v = insertelement <2 x double> undef, double %a, i32 0
|
||||||
|
@ -1104,6 +1150,7 @@ define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) n
|
||||||
; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1
|
; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1
|
||||||
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
|
|
||||||
%a = load float, float* %ptr
|
%a = load float, float* %ptr
|
||||||
%v = insertelement <16 x float> undef, float %a, i32 0
|
%v = insertelement <16 x float> undef, float %a, i32 0
|
||||||
|
@ -1124,12 +1171,14 @@ define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) noun
|
||||||
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test41:
|
; SKX-LABEL: test41:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1
|
; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1
|
||||||
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
|
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
|
|
||||||
%a = load float, float* %ptr
|
%a = load float, float* %ptr
|
||||||
%v = insertelement <8 x float> undef, float %a, i32 0
|
%v = insertelement <8 x float> undef, float %a, i32 0
|
||||||
|
@ -1147,12 +1196,14 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) noun
|
||||||
; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
|
; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
|
||||||
; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
|
; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test42:
|
; SKX-LABEL: test42:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1
|
; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1
|
||||||
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
|
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
|
|
||||||
%a = load float, float* %ptr
|
%a = load float, float* %ptr
|
||||||
%v = insertelement <4 x float> undef, float %a, i32 0
|
%v = insertelement <4 x float> undef, float %a, i32 0
|
||||||
|
@ -1172,6 +1223,7 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x
|
||||||
; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
|
; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
|
||||||
; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
|
; KNL-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; SKX-LABEL: test43:
|
; SKX-LABEL: test43:
|
||||||
; SKX: ## BB#0:
|
; SKX: ## BB#0:
|
||||||
|
@ -1180,6 +1232,7 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x
|
||||||
; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
|
; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
|
||||||
; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
||||||
; SKX-NEXT: retq
|
; SKX-NEXT: retq
|
||||||
|
; SKX-NEXT: ## -- End function
|
||||||
|
|
||||||
%a = load double, double* %ptr
|
%a = load double, double* %ptr
|
||||||
%v = insertelement <8 x double> undef, double %a, i32 0
|
%v = insertelement <8 x double> undef, double %a, i32 0
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2695,32 +2695,32 @@ declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32
|
||||||
define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
|
define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
|
||||||
; CHECK-LABEL: test_cmp_b_256:
|
; CHECK-LABEL: test_cmp_b_256:
|
||||||
; CHECK: ## BB#0:
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
|
||||||
|
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||||
|
; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xc0,0x02]
|
||||||
|
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
|
||||||
|
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
|
||||||
|
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
|
||||||
|
; CHECK-NEXT: vmovd %eax, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
|
||||||
|
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd1,0x01]
|
||||||
|
; CHECK-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd2,0x02]
|
||||||
|
; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0]
|
||||||
|
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||||
|
; CHECK-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd0,0x03]
|
||||||
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
|
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
|
||||||
; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0]
|
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||||
; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0]
|
; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0]
|
||||||
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
|
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
|
||||||
; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
|
; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
|
||||||
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
|
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
|
||||||
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
|
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||||
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
|
; CHECK-NEXT: vmovd %eax, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc8]
|
||||||
; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xc0,0x02]
|
; CHECK-NEXT: vpunpckldq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x62,0xc0]
|
||||||
; CHECK-NEXT: kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8]
|
; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||||
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
|
; CHECK-NEXT: vmovd %edx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
|
||||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xc1]
|
||||||
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
|
; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0]
|
||||||
; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
|
; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc2,0x01]
|
||||||
; CHECK-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02]
|
|
||||||
; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0]
|
|
||||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
|
||||||
; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
|
|
||||||
; CHECK-NEXT: vmovd %ecx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
|
|
||||||
; CHECK-NEXT: vmovd %r8d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
|
|
||||||
; CHECK-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
|
|
||||||
; CHECK-NEXT: ## xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
|
||||||
; CHECK-NEXT: vmovd %edx, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
|
|
||||||
; CHECK-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
|
|
||||||
; CHECK-NEXT: ## xmm1 = xmm1[0],xmm2[0]
|
|
||||||
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
|
|
||||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||||
%res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
|
%res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
|
||||||
%vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
|
%vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
|
||||||
|
@ -2750,23 +2750,23 @@ define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
|
||||||
; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0]
|
; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0]
|
||||||
; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
|
; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
|
||||||
; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
|
; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
|
||||||
; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
|
|
||||||
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
|
|
||||||
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
|
|
||||||
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
|
|
||||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
|
||||||
; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x3f,0xc0,0x02]
|
|
||||||
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
|
|
||||||
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
|
|
||||||
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
|
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
|
||||||
; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
|
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
|
||||||
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01]
|
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
|
||||||
; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
|
; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x3f,0xc0,0x02]
|
||||||
|
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||||
|
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
|
||||||
|
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
|
||||||
|
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
|
||||||
|
; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
|
||||||
|
; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
|
||||||
; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
|
; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
|
||||||
; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8]
|
; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8]
|
||||||
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
|
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
|
||||||
; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
|
; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02]
|
||||||
; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
|
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
|
||||||
|
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||||
|
; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03]
|
||||||
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
|
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
|
||||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||||
%res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
|
%res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
|
||||||
|
@ -2793,32 +2793,32 @@ declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) noun
|
||||||
define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
|
define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
|
||||||
; CHECK-LABEL: test_ucmp_b_256:
|
; CHECK-LABEL: test_ucmp_b_256:
|
||||||
; CHECK: ## BB#0:
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
|
||||||
|
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||||
|
; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05]
|
||||||
|
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
|
||||||
|
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06]
|
||||||
|
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
|
||||||
|
; CHECK-NEXT: vmovd %eax, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
|
||||||
|
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd1,0x01]
|
||||||
|
; CHECK-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd2,0x02]
|
||||||
|
; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0]
|
||||||
|
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||||
|
; CHECK-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd0,0x03]
|
||||||
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
|
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
|
||||||
; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0]
|
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||||
; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01]
|
; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01]
|
||||||
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
|
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
|
||||||
; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
|
; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
|
||||||
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
|
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
|
||||||
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
|
; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||||
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
|
; CHECK-NEXT: vmovd %eax, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc8]
|
||||||
; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05]
|
; CHECK-NEXT: vpunpckldq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x62,0xc0]
|
||||||
; CHECK-NEXT: kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8]
|
; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||||
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06]
|
; CHECK-NEXT: vmovd %edx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
|
||||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xc1]
|
||||||
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
|
; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0]
|
||||||
; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
|
; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc2,0x01]
|
||||||
; CHECK-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02]
|
|
||||||
; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0]
|
|
||||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
|
||||||
; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
|
|
||||||
; CHECK-NEXT: vmovd %ecx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
|
|
||||||
; CHECK-NEXT: vmovd %r8d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
|
|
||||||
; CHECK-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
|
|
||||||
; CHECK-NEXT: ## xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
|
|
||||||
; CHECK-NEXT: vmovd %edx, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
|
|
||||||
; CHECK-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
|
|
||||||
; CHECK-NEXT: ## xmm1 = xmm1[0],xmm2[0]
|
|
||||||
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
|
|
||||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||||
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
|
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
|
||||||
%vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
|
%vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
|
||||||
|
@ -2848,23 +2848,23 @@ define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask)
|
||||||
; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
|
; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
|
||||||
; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
|
; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
|
||||||
; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
|
; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
|
||||||
; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
|
|
||||||
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
|
|
||||||
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
|
|
||||||
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
|
|
||||||
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
|
||||||
; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
|
|
||||||
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
|
|
||||||
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
|
|
||||||
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
|
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
|
||||||
; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
|
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
|
||||||
; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01]
|
; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
|
||||||
; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
|
; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
|
||||||
|
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||||
|
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
|
||||||
|
; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
|
||||||
|
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
|
||||||
|
; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
|
||||||
|
; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
|
||||||
; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
|
; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
|
||||||
; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8]
|
; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8]
|
||||||
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
|
; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
|
||||||
; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
|
; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02]
|
||||||
; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
|
; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
|
||||||
|
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
|
||||||
|
; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03]
|
||||||
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
|
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
|
||||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||||
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
|
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
|
||||||
|
|
|
@ -453,10 +453,10 @@ define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
|
||||||
; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0
|
; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0
|
||||||
; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1
|
; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1
|
||||||
; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4
|
; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4
|
||||||
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
|
|
||||||
; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5
|
; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5
|
||||||
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5
|
; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5
|
||||||
; SSE2-SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp)
|
; SSE2-SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp)
|
||||||
|
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4
|
||||||
; SSE2-SSSE3-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp)
|
; SSE2-SSSE3-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp)
|
||||||
; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
|
; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al
|
||||||
; SSE2-SSSE3-NEXT: andb $1, %al
|
; SSE2-SSSE3-NEXT: andb $1, %al
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc < %s -mtriple i386-apple-darwin -mcpu=yonah | FileCheck %s
|
; RUN: llc < %s -mtriple i386-apple-darwin -mcpu=yonah | FileCheck %s
|
||||||
|
|
||||||
target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
|
target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
|
||||||
|
@ -6,31 +7,32 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
|
||||||
; into loads, off the stack or a previous store.
|
; into loads, off the stack or a previous store.
|
||||||
; Be very explicit about the ordering/stack offsets.
|
; Be very explicit about the ordering/stack offsets.
|
||||||
|
|
||||||
; CHECK-LABEL: test_extractelement_legalization_storereuse:
|
|
||||||
; CHECK: # BB#0
|
|
||||||
; CHECK-NEXT: pushl %ebx
|
|
||||||
; CHECK-NEXT: pushl %edi
|
|
||||||
; CHECK-NEXT: pushl %esi
|
|
||||||
; CHECK-NEXT: movl 16(%esp), %eax
|
|
||||||
; CHECK-NEXT: movl 24(%esp), %ecx
|
|
||||||
; CHECK-NEXT: movl 20(%esp), %edx
|
|
||||||
; CHECK-NEXT: paddd (%edx), %xmm0
|
|
||||||
; CHECK-NEXT: movdqa %xmm0, (%edx)
|
|
||||||
; CHECK-NEXT: movl (%edx), %esi
|
|
||||||
; CHECK-NEXT: movl 4(%edx), %edi
|
|
||||||
; CHECK-NEXT: shll $4, %ecx
|
|
||||||
; CHECK-NEXT: movl 8(%edx), %ebx
|
|
||||||
; CHECK-NEXT: movl 12(%edx), %edx
|
|
||||||
; CHECK-NEXT: movl %esi, 12(%eax,%ecx)
|
|
||||||
; CHECK-NEXT: movl %edi, (%eax,%ecx)
|
|
||||||
; CHECK-NEXT: movl %ebx, 8(%eax,%ecx)
|
|
||||||
; CHECK-NEXT: movl %edx, 4(%eax,%ecx)
|
|
||||||
; CHECK-NEXT: popl %esi
|
|
||||||
; CHECK-NEXT: popl %edi
|
|
||||||
; CHECK-NEXT: popl %ebx
|
|
||||||
; CHECK-NEXT: retl
|
|
||||||
|
|
||||||
define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* nocapture %x, i32* nocapture readonly %y, i32 %i) #0 {
|
define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* nocapture %x, i32* nocapture readonly %y, i32 %i) #0 {
|
||||||
|
; CHECK-LABEL: _test_extractelement_legalization_storereuse: ## @test_extractelement_legalization_storereuse
|
||||||
|
; CHECK: ## BB#0: ## %entry
|
||||||
|
; CHECK-NEXT: pushl %ebx
|
||||||
|
; CHECK-NEXT: pushl %edi
|
||||||
|
; CHECK-NEXT: pushl %esi
|
||||||
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
|
; CHECK-NEXT: paddd (%ecx), %xmm0
|
||||||
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
|
; CHECK-NEXT: movdqa %xmm0, (%ecx)
|
||||||
|
; CHECK-NEXT: movl (%ecx), %esi
|
||||||
|
; CHECK-NEXT: movl 4(%ecx), %edi
|
||||||
|
; CHECK-NEXT: shll $4, %edx
|
||||||
|
; CHECK-NEXT: movl 8(%ecx), %ebx
|
||||||
|
; CHECK-NEXT: movl 12(%ecx), %ecx
|
||||||
|
; CHECK-NEXT: movl %esi, 12(%eax,%edx)
|
||||||
|
; CHECK-NEXT: movl %edi, (%eax,%edx)
|
||||||
|
; CHECK-NEXT: movl %ebx, 8(%eax,%edx)
|
||||||
|
; CHECK-NEXT: movl %ecx, 4(%eax,%edx)
|
||||||
|
; CHECK-NEXT: popl %esi
|
||||||
|
; CHECK-NEXT: popl %edi
|
||||||
|
; CHECK-NEXT: popl %ebx
|
||||||
|
; CHECK-NEXT: retl
|
||||||
|
; CHECK-NEXT: ## -- End function
|
||||||
entry:
|
entry:
|
||||||
%0 = bitcast i32* %y to <4 x i32>*
|
%0 = bitcast i32* %y to <4 x i32>*
|
||||||
%1 = load <4 x i32>, <4 x i32>* %0, align 16
|
%1 = load <4 x i32>, <4 x i32>* %0, align 16
|
||||||
|
|
|
@ -50,8 +50,8 @@ define void @TestUnionLD1(fp128 %s, i64 %n) #0 {
|
||||||
; CHECK-NEXT: andq %rdi, %rcx
|
; CHECK-NEXT: andq %rdi, %rcx
|
||||||
; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000
|
; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000
|
||||||
; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx
|
; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx
|
||||||
; CHECK-NEXT: orq %rcx, %rdx
|
|
||||||
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
|
||||||
|
; CHECK-NEXT: orq %rcx, %rdx
|
||||||
; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
|
; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
|
||||||
; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
|
; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0
|
||||||
; CHECK-NEXT: jmp foo # TAILCALL
|
; CHECK-NEXT: jmp foo # TAILCALL
|
||||||
|
|
|
@ -16,11 +16,10 @@
|
||||||
; LIN: sarq $32, %r[[REG2]]
|
; LIN: sarq $32, %r[[REG2]]
|
||||||
; LIN: movslq %e[[REG4]], %r[[REG3:.+]]
|
; LIN: movslq %e[[REG4]], %r[[REG3:.+]]
|
||||||
; LIN: sarq $32, %r[[REG4]]
|
; LIN: sarq $32, %r[[REG4]]
|
||||||
; LIN: movsd (%rdi,%r[[REG1]],8), %xmm0
|
; LIN: movsd (%rdi,%rsi,8), %xmm1
|
||||||
; LIN: movhpd (%rdi,%r[[REG2]],8), %xmm0
|
; LIN: movhpd (%rdi,%rax,8), %xmm1
|
||||||
; LIN: movsd (%rdi,%r[[REG3]],8), %xmm1
|
; LIN: movdqa (%rsi), %xmm0
|
||||||
; LIN: movhpd (%rdi,%r[[REG4]],8), %xmm1
|
; LIN: movq %rdi, %xmm1
|
||||||
|
|
||||||
; WIN: movdqa (%rdx), %xmm0
|
; WIN: movdqa (%rdx), %xmm0
|
||||||
; WIN: pand (%r8), %xmm0
|
; WIN: pand (%r8), %xmm0
|
||||||
; WIN: pextrq $1, %xmm0, %r[[REG4:.+]]
|
; WIN: pextrq $1, %xmm0, %r[[REG4:.+]]
|
||||||
|
@ -29,10 +28,10 @@
|
||||||
; WIN: sarq $32, %r[[REG2]]
|
; WIN: sarq $32, %r[[REG2]]
|
||||||
; WIN: movslq %e[[REG4]], %r[[REG3:.+]]
|
; WIN: movslq %e[[REG4]], %r[[REG3:.+]]
|
||||||
; WIN: sarq $32, %r[[REG4]]
|
; WIN: sarq $32, %r[[REG4]]
|
||||||
; WIN: movsd (%rcx,%r[[REG1]],8), %xmm0
|
; WIN: movsd (%rcx,%r9,8), %xmm1
|
||||||
; WIN: movhpd (%rcx,%r[[REG2]],8), %xmm0
|
; WIN: movhpd (%rcx,%rax,8), %xmm1
|
||||||
; WIN: movsd (%rcx,%r[[REG3]],8), %xmm1
|
; WIN: movdqa (%rdx), %xmm0
|
||||||
; WIN: movhpd (%rcx,%r[[REG4]],8), %xmm1
|
; WIN: movq %rdx, %xmm1
|
||||||
|
|
||||||
define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
|
define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
|
||||||
%a = load <4 x i32>, <4 x i32>* %i
|
%a = load <4 x i32>, <4 x i32>* %i
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -112,23 +112,23 @@ define void @i56_and_or(i56* %a) {
|
||||||
define void @i56_insert_bit(i56* %a, i1 zeroext %bit) {
|
define void @i56_insert_bit(i56* %a, i1 zeroext %bit) {
|
||||||
; CHECK-LABEL: i56_insert_bit:
|
; CHECK-LABEL: i56_insert_bit:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: movzbl %sil, %eax
|
; CHECK-NEXT: movzwl 4(%rdi), %eax
|
||||||
; CHECK-NEXT: movzwl 4(%rdi), %ecx
|
; CHECK-NEXT: movzbl 6(%rdi), %ecx
|
||||||
; CHECK-NEXT: movzbl 6(%rdi), %edx
|
; CHECK-NEXT: movl (%rdi), %edx
|
||||||
; CHECK-NEXT: movl (%rdi), %esi
|
; CHECK-NEXT: movb %cl, 6(%rdi)
|
||||||
; CHECK-NEXT: movb %dl, 6(%rdi)
|
; CHECK-NEXT: movzbl %sil, %esi
|
||||||
; CHECK-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<kill> %RDX<def>
|
; CHECK-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<kill> %RCX<def>
|
||||||
; CHECK-NEXT: shll $16, %edx
|
; CHECK-NEXT: shll $16, %ecx
|
||||||
; CHECK-NEXT: orl %ecx, %edx
|
; CHECK-NEXT: orl %eax, %ecx
|
||||||
; CHECK-NEXT: shlq $32, %rdx
|
; CHECK-NEXT: shlq $32, %rcx
|
||||||
; CHECK-NEXT: orq %rdx, %rsi
|
; CHECK-NEXT: orq %rcx, %rdx
|
||||||
; CHECK-NEXT: shlq $13, %rax
|
; CHECK-NEXT: shlq $13, %rsi
|
||||||
; CHECK-NEXT: movabsq $72057594037919743, %rcx # imm = 0xFFFFFFFFFFDFFF
|
; CHECK-NEXT: movabsq $72057594037919743, %rax # imm = 0xFFFFFFFFFFDFFF
|
||||||
; CHECK-NEXT: andq %rsi, %rcx
|
; CHECK-NEXT: andq %rdx, %rax
|
||||||
; CHECK-NEXT: orq %rax, %rcx
|
; CHECK-NEXT: orq %rsi, %rax
|
||||||
; CHECK-NEXT: movl %ecx, (%rdi)
|
; CHECK-NEXT: movl %eax, (%rdi)
|
||||||
; CHECK-NEXT: shrq $32, %rcx
|
; CHECK-NEXT: shrq $32, %rax
|
||||||
; CHECK-NEXT: movw %cx, 4(%rdi)
|
; CHECK-NEXT: movw %ax, 4(%rdi)
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%extbit = zext i1 %bit to i56
|
%extbit = zext i1 %bit to i56
|
||||||
%b = load i56, i56* %a, align 1
|
%b = load i56, i56* %a, align 1
|
||||||
|
|
|
@ -17,7 +17,7 @@ define i32 @test_mul_by_1(i32 %x) {
|
||||||
; X64-HSW-LABEL: test_mul_by_1:
|
; X64-HSW-LABEL: test_mul_by_1:
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_1:
|
; X64-JAG-LABEL: test_mul_by_1:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -32,7 +32,7 @@ define i32 @test_mul_by_1(i32 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_1:
|
; HSW-NOOPT-LABEL: test_mul_by_1:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
|
; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_1:
|
; JAG-NOOPT-LABEL: test_mul_by_1:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -63,7 +63,7 @@ define i32 @test_mul_by_2(i32 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_2:
|
; X64-JAG-LABEL: test_mul_by_2:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -81,7 +81,7 @@ define i32 @test_mul_by_2(i32 %x) {
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_2:
|
; JAG-NOOPT-LABEL: test_mul_by_2:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -114,7 +114,7 @@ define i32 @test_mul_by_3(i32 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_3:
|
; X64-JAG-LABEL: test_mul_by_3:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -131,7 +131,7 @@ define i32 @test_mul_by_3(i32 %x) {
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_3:
|
; JAG-NOOPT-LABEL: test_mul_by_3:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -165,7 +165,7 @@ define i32 @test_mul_by_4(i32 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_4:
|
; X64-JAG-LABEL: test_mul_by_4:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -183,7 +183,7 @@ define i32 @test_mul_by_4(i32 %x) {
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_4:
|
; JAG-NOOPT-LABEL: test_mul_by_4:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -216,7 +216,7 @@ define i32 @test_mul_by_5(i32 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_5:
|
; X64-JAG-LABEL: test_mul_by_5:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -233,7 +233,7 @@ define i32 @test_mul_by_5(i32 %x) {
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_5:
|
; JAG-NOOPT-LABEL: test_mul_by_5:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -269,7 +269,7 @@ define i32 @test_mul_by_6(i32 %x) {
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
|
; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_6:
|
; X64-JAG-LABEL: test_mul_by_6:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -285,8 +285,8 @@ define i32 @test_mul_by_6(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_6:
|
; HSW-NOOPT-LABEL: test_mul_by_6:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_6:
|
; JAG-NOOPT-LABEL: test_mul_by_6:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -321,7 +321,7 @@ define i32 @test_mul_by_7(i32 %x) {
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_7:
|
; X64-JAG-LABEL: test_mul_by_7:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -337,8 +337,8 @@ define i32 @test_mul_by_7(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_7:
|
; HSW-NOOPT-LABEL: test_mul_by_7:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_7:
|
; JAG-NOOPT-LABEL: test_mul_by_7:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -371,7 +371,7 @@ define i32 @test_mul_by_8(i32 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_8:
|
; X64-JAG-LABEL: test_mul_by_8:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -389,7 +389,7 @@ define i32 @test_mul_by_8(i32 %x) {
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_8:
|
; JAG-NOOPT-LABEL: test_mul_by_8:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -422,7 +422,7 @@ define i32 @test_mul_by_9(i32 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_9:
|
; X64-JAG-LABEL: test_mul_by_9:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -439,7 +439,7 @@ define i32 @test_mul_by_9(i32 %x) {
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_9:
|
; JAG-NOOPT-LABEL: test_mul_by_9:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -475,7 +475,7 @@ define i32 @test_mul_by_10(i32 %x) {
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
|
; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_10:
|
; X64-JAG-LABEL: test_mul_by_10:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -491,8 +491,8 @@ define i32 @test_mul_by_10(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_10:
|
; HSW-NOOPT-LABEL: test_mul_by_10:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_10:
|
; JAG-NOOPT-LABEL: test_mul_by_10:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -527,7 +527,7 @@ define i32 @test_mul_by_11(i32 %x) {
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_11:
|
; X64-JAG-LABEL: test_mul_by_11:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -543,8 +543,8 @@ define i32 @test_mul_by_11(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_11:
|
; HSW-NOOPT-LABEL: test_mul_by_11:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_11:
|
; JAG-NOOPT-LABEL: test_mul_by_11:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -575,9 +575,9 @@ define i32 @test_mul_by_12(i32 %x) {
|
||||||
; X64-HSW-LABEL: test_mul_by_12:
|
; X64-HSW-LABEL: test_mul_by_12:
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50]
|
; X64-HSW-NEXT: shll $2, %edi # sched: [1:1.00]
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_12:
|
; X64-JAG-LABEL: test_mul_by_12:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -593,8 +593,8 @@ define i32 @test_mul_by_12(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_12:
|
; HSW-NOOPT-LABEL: test_mul_by_12:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_12:
|
; JAG-NOOPT-LABEL: test_mul_by_12:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -629,7 +629,7 @@ define i32 @test_mul_by_13(i32 %x) {
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_13:
|
; X64-JAG-LABEL: test_mul_by_13:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -645,8 +645,8 @@ define i32 @test_mul_by_13(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_13:
|
; HSW-NOOPT-LABEL: test_mul_by_13:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_13:
|
; JAG-NOOPT-LABEL: test_mul_by_13:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -681,7 +681,7 @@ define i32 @test_mul_by_14(i32 %x) {
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_14:
|
; X64-JAG-LABEL: test_mul_by_14:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -698,8 +698,8 @@ define i32 @test_mul_by_14(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_14:
|
; HSW-NOOPT-LABEL: test_mul_by_14:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_14:
|
; JAG-NOOPT-LABEL: test_mul_by_14:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -732,7 +732,7 @@ define i32 @test_mul_by_15(i32 %x) {
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_15:
|
; X64-JAG-LABEL: test_mul_by_15:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -748,8 +748,8 @@ define i32 @test_mul_by_15(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_15:
|
; HSW-NOOPT-LABEL: test_mul_by_15:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_15:
|
; JAG-NOOPT-LABEL: test_mul_by_15:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -780,9 +780,9 @@ define i32 @test_mul_by_16(i32 %x) {
|
||||||
;
|
;
|
||||||
; X64-HSW-LABEL: test_mul_by_16:
|
; X64-HSW-LABEL: test_mul_by_16:
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: shll $4, %edi # sched: [1:0.50]
|
; X64-HSW-NEXT: shll $4, %edi # sched: [1:1.00]
|
||||||
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_16:
|
; X64-JAG-LABEL: test_mul_by_16:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -798,9 +798,9 @@ define i32 @test_mul_by_16(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_16:
|
; HSW-NOOPT-LABEL: test_mul_by_16:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:1.00]
|
||||||
; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
|
; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_16:
|
; JAG-NOOPT-LABEL: test_mul_by_16:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -836,9 +836,9 @@ define i32 @test_mul_by_17(i32 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: shll $4, %eax # sched: [1:1.00]
|
||||||
; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_17:
|
; X64-JAG-LABEL: test_mul_by_17:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -855,8 +855,8 @@ define i32 @test_mul_by_17(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_17:
|
; HSW-NOOPT-LABEL: test_mul_by_17:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_17:
|
; JAG-NOOPT-LABEL: test_mul_by_17:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -892,7 +892,7 @@ define i32 @test_mul_by_18(i32 %x) {
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
|
; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_18:
|
; X64-JAG-LABEL: test_mul_by_18:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -908,8 +908,8 @@ define i32 @test_mul_by_18(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_18:
|
; HSW-NOOPT-LABEL: test_mul_by_18:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_18:
|
; JAG-NOOPT-LABEL: test_mul_by_18:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -944,9 +944,9 @@ define i32 @test_mul_by_19(i32 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: shll $2, %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: shll $2, %eax # sched: [1:1.00]
|
||||||
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_19:
|
; X64-JAG-LABEL: test_mul_by_19:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -963,8 +963,8 @@ define i32 @test_mul_by_19(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_19:
|
; HSW-NOOPT-LABEL: test_mul_by_19:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_19:
|
; JAG-NOOPT-LABEL: test_mul_by_19:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -995,9 +995,9 @@ define i32 @test_mul_by_20(i32 %x) {
|
||||||
; X64-HSW-LABEL: test_mul_by_20:
|
; X64-HSW-LABEL: test_mul_by_20:
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50]
|
; X64-HSW-NEXT: shll $2, %edi # sched: [1:1.00]
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_20:
|
; X64-JAG-LABEL: test_mul_by_20:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1013,8 +1013,8 @@ define i32 @test_mul_by_20(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_20:
|
; HSW-NOOPT-LABEL: test_mul_by_20:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_20:
|
; JAG-NOOPT-LABEL: test_mul_by_20:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1049,7 +1049,7 @@ define i32 @test_mul_by_21(i32 %x) {
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_21:
|
; X64-JAG-LABEL: test_mul_by_21:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1065,8 +1065,8 @@ define i32 @test_mul_by_21(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_21:
|
; HSW-NOOPT-LABEL: test_mul_by_21:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_21:
|
; JAG-NOOPT-LABEL: test_mul_by_21:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1101,7 +1101,7 @@ define i32 @test_mul_by_22(i32 %x) {
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_22:
|
; X64-JAG-LABEL: test_mul_by_22:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1118,8 +1118,8 @@ define i32 @test_mul_by_22(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_22:
|
; HSW-NOOPT-LABEL: test_mul_by_22:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_22:
|
; JAG-NOOPT-LABEL: test_mul_by_22:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1152,9 +1152,9 @@ define i32 @test_mul_by_23(i32 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: shll $3, %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: shll $3, %eax # sched: [1:1.00]
|
||||||
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_23:
|
; X64-JAG-LABEL: test_mul_by_23:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1171,8 +1171,8 @@ define i32 @test_mul_by_23(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_23:
|
; HSW-NOOPT-LABEL: test_mul_by_23:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_23:
|
; JAG-NOOPT-LABEL: test_mul_by_23:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1203,9 +1203,9 @@ define i32 @test_mul_by_24(i32 %x) {
|
||||||
; X64-HSW-LABEL: test_mul_by_24:
|
; X64-HSW-LABEL: test_mul_by_24:
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: shll $3, %edi # sched: [1:0.50]
|
; X64-HSW-NEXT: shll $3, %edi # sched: [1:1.00]
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_24:
|
; X64-JAG-LABEL: test_mul_by_24:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1221,8 +1221,8 @@ define i32 @test_mul_by_24(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_24:
|
; HSW-NOOPT-LABEL: test_mul_by_24:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_24:
|
; JAG-NOOPT-LABEL: test_mul_by_24:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1257,7 +1257,7 @@ define i32 @test_mul_by_25(i32 %x) {
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_25:
|
; X64-JAG-LABEL: test_mul_by_25:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1273,8 +1273,8 @@ define i32 @test_mul_by_25(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_25:
|
; HSW-NOOPT-LABEL: test_mul_by_25:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_25:
|
; JAG-NOOPT-LABEL: test_mul_by_25:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1311,7 +1311,7 @@ define i32 @test_mul_by_26(i32 %x) {
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_26:
|
; X64-JAG-LABEL: test_mul_by_26:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1328,8 +1328,8 @@ define i32 @test_mul_by_26(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_26:
|
; HSW-NOOPT-LABEL: test_mul_by_26:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_26:
|
; JAG-NOOPT-LABEL: test_mul_by_26:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1362,7 +1362,7 @@ define i32 @test_mul_by_27(i32 %x) {
|
||||||
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_27:
|
; X64-JAG-LABEL: test_mul_by_27:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1378,8 +1378,8 @@ define i32 @test_mul_by_27(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_27:
|
; HSW-NOOPT-LABEL: test_mul_by_27:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_27:
|
; JAG-NOOPT-LABEL: test_mul_by_27:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1416,7 +1416,7 @@ define i32 @test_mul_by_28(i32 %x) {
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_28:
|
; X64-JAG-LABEL: test_mul_by_28:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1433,8 +1433,8 @@ define i32 @test_mul_by_28(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_28:
|
; HSW-NOOPT-LABEL: test_mul_by_28:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_28:
|
; JAG-NOOPT-LABEL: test_mul_by_28:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1471,7 +1471,7 @@ define i32 @test_mul_by_29(i32 %x) {
|
||||||
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_29:
|
; X64-JAG-LABEL: test_mul_by_29:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1489,8 +1489,8 @@ define i32 @test_mul_by_29(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_29:
|
; HSW-NOOPT-LABEL: test_mul_by_29:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_29:
|
; JAG-NOOPT-LABEL: test_mul_by_29:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1523,10 +1523,10 @@ define i32 @test_mul_by_30(i32 %x) {
|
||||||
; X64-HSW-LABEL: test_mul_by_30:
|
; X64-HSW-LABEL: test_mul_by_30:
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: shll $5, %eax # sched: [1:1.00]
|
||||||
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_30:
|
; X64-JAG-LABEL: test_mul_by_30:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1543,8 +1543,8 @@ define i32 @test_mul_by_30(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_30:
|
; HSW-NOOPT-LABEL: test_mul_by_30:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_30:
|
; JAG-NOOPT-LABEL: test_mul_by_30:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1576,9 +1576,9 @@ define i32 @test_mul_by_31(i32 %x) {
|
||||||
; X64-HSW-LABEL: test_mul_by_31:
|
; X64-HSW-LABEL: test_mul_by_31:
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: shll $5, %eax # sched: [1:1.00]
|
||||||
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_31:
|
; X64-JAG-LABEL: test_mul_by_31:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1594,8 +1594,8 @@ define i32 @test_mul_by_31(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_31:
|
; HSW-NOOPT-LABEL: test_mul_by_31:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_31:
|
; JAG-NOOPT-LABEL: test_mul_by_31:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1626,9 +1626,9 @@ define i32 @test_mul_by_32(i32 %x) {
|
||||||
;
|
;
|
||||||
; X64-HSW-LABEL: test_mul_by_32:
|
; X64-HSW-LABEL: test_mul_by_32:
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: shll $5, %edi # sched: [1:0.50]
|
; X64-HSW-NEXT: shll $5, %edi # sched: [1:1.00]
|
||||||
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_32:
|
; X64-JAG-LABEL: test_mul_by_32:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1644,9 +1644,9 @@ define i32 @test_mul_by_32(i32 %x) {
|
||||||
;
|
;
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_32:
|
; HSW-NOOPT-LABEL: test_mul_by_32:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:1.00]
|
||||||
; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
|
; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_32:
|
; JAG-NOOPT-LABEL: test_mul_by_32:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1686,8 +1686,8 @@ define i32 @test_mul_spec(i32 %x) nounwind {
|
||||||
; X64-HSW-NEXT: addl $42, %ecx # sched: [1:0.25]
|
; X64-HSW-NEXT: addl $42, %ecx # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: addl $2, %eax # sched: [1:0.25]
|
; X64-HSW-NEXT: addl $2, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: imull %ecx, %eax # sched: [4:1.00]
|
; X64-HSW-NEXT: imull %ecx, %eax # sched: [3:1.00]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_spec:
|
; X64-JAG-LABEL: test_mul_spec:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1712,8 +1712,8 @@ define i32 @test_mul_spec(i32 %x) nounwind {
|
||||||
; HSW-NOOPT-NEXT: addl $42, %ecx # sched: [1:0.25]
|
; HSW-NOOPT-NEXT: addl $42, %ecx # sched: [1:0.25]
|
||||||
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: addl $2, %eax # sched: [1:0.25]
|
; HSW-NOOPT-NEXT: addl $2, %eax # sched: [1:0.25]
|
||||||
; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [4:1.00]
|
; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_spec:
|
; JAG-NOOPT-LABEL: test_mul_spec:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
|
|
@ -18,7 +18,7 @@ define i64 @test_mul_by_1(i64 %x) nounwind {
|
||||||
; X64-HSW-LABEL: test_mul_by_1:
|
; X64-HSW-LABEL: test_mul_by_1:
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_1:
|
; X64-JAG-LABEL: test_mul_by_1:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -34,7 +34,7 @@ define i64 @test_mul_by_1(i64 %x) nounwind {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_1:
|
; HSW-NOOPT-LABEL: test_mul_by_1:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_1:
|
; JAG-NOOPT-LABEL: test_mul_by_1:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -66,7 +66,7 @@ define i64 @test_mul_by_2(i64 %x) {
|
||||||
; X64-HSW-LABEL: test_mul_by_2:
|
; X64-HSW-LABEL: test_mul_by_2:
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_2:
|
; X64-JAG-LABEL: test_mul_by_2:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -84,7 +84,7 @@ define i64 @test_mul_by_2(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_2:
|
; HSW-NOOPT-LABEL: test_mul_by_2:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_2:
|
; JAG-NOOPT-LABEL: test_mul_by_2:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -116,7 +116,7 @@ define i64 @test_mul_by_3(i64 %x) {
|
||||||
; X64-HSW-LABEL: test_mul_by_3:
|
; X64-HSW-LABEL: test_mul_by_3:
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_3:
|
; X64-JAG-LABEL: test_mul_by_3:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -134,7 +134,7 @@ define i64 @test_mul_by_3(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_3:
|
; HSW-NOOPT-LABEL: test_mul_by_3:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_3:
|
; JAG-NOOPT-LABEL: test_mul_by_3:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -166,7 +166,7 @@ define i64 @test_mul_by_4(i64 %x) {
|
||||||
; X64-HSW-LABEL: test_mul_by_4:
|
; X64-HSW-LABEL: test_mul_by_4:
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_4:
|
; X64-JAG-LABEL: test_mul_by_4:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -184,7 +184,7 @@ define i64 @test_mul_by_4(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_4:
|
; HSW-NOOPT-LABEL: test_mul_by_4:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_4:
|
; JAG-NOOPT-LABEL: test_mul_by_4:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -216,7 +216,7 @@ define i64 @test_mul_by_5(i64 %x) {
|
||||||
; X64-HSW-LABEL: test_mul_by_5:
|
; X64-HSW-LABEL: test_mul_by_5:
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_5:
|
; X64-JAG-LABEL: test_mul_by_5:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -234,7 +234,7 @@ define i64 @test_mul_by_5(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_5:
|
; HSW-NOOPT-LABEL: test_mul_by_5:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_5:
|
; JAG-NOOPT-LABEL: test_mul_by_5:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -268,7 +268,7 @@ define i64 @test_mul_by_6(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
|
; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_6:
|
; X64-JAG-LABEL: test_mul_by_6:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -287,7 +287,7 @@ define i64 @test_mul_by_6(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_6:
|
; HSW-NOOPT-LABEL: test_mul_by_6:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_6:
|
; JAG-NOOPT-LABEL: test_mul_by_6:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -323,7 +323,7 @@ define i64 @test_mul_by_7(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_7:
|
; X64-JAG-LABEL: test_mul_by_7:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -342,7 +342,7 @@ define i64 @test_mul_by_7(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_7:
|
; HSW-NOOPT-LABEL: test_mul_by_7:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_7:
|
; JAG-NOOPT-LABEL: test_mul_by_7:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -375,7 +375,7 @@ define i64 @test_mul_by_8(i64 %x) {
|
||||||
; X64-HSW-LABEL: test_mul_by_8:
|
; X64-HSW-LABEL: test_mul_by_8:
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_8:
|
; X64-JAG-LABEL: test_mul_by_8:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -393,7 +393,7 @@ define i64 @test_mul_by_8(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_8:
|
; HSW-NOOPT-LABEL: test_mul_by_8:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_8:
|
; JAG-NOOPT-LABEL: test_mul_by_8:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -425,7 +425,7 @@ define i64 @test_mul_by_9(i64 %x) {
|
||||||
; X64-HSW-LABEL: test_mul_by_9:
|
; X64-HSW-LABEL: test_mul_by_9:
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_9:
|
; X64-JAG-LABEL: test_mul_by_9:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -443,7 +443,7 @@ define i64 @test_mul_by_9(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_9:
|
; HSW-NOOPT-LABEL: test_mul_by_9:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_9:
|
; JAG-NOOPT-LABEL: test_mul_by_9:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -477,7 +477,7 @@ define i64 @test_mul_by_10(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
|
; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_10:
|
; X64-JAG-LABEL: test_mul_by_10:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -496,7 +496,7 @@ define i64 @test_mul_by_10(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_10:
|
; HSW-NOOPT-LABEL: test_mul_by_10:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_10:
|
; JAG-NOOPT-LABEL: test_mul_by_10:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -532,7 +532,7 @@ define i64 @test_mul_by_11(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_11:
|
; X64-JAG-LABEL: test_mul_by_11:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -551,7 +551,7 @@ define i64 @test_mul_by_11(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_11:
|
; HSW-NOOPT-LABEL: test_mul_by_11:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_11:
|
; JAG-NOOPT-LABEL: test_mul_by_11:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -585,7 +585,7 @@ define i64 @test_mul_by_12(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
|
; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_12:
|
; X64-JAG-LABEL: test_mul_by_12:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -604,7 +604,7 @@ define i64 @test_mul_by_12(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_12:
|
; HSW-NOOPT-LABEL: test_mul_by_12:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_12:
|
; JAG-NOOPT-LABEL: test_mul_by_12:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -640,7 +640,7 @@ define i64 @test_mul_by_13(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_13:
|
; X64-JAG-LABEL: test_mul_by_13:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -659,7 +659,7 @@ define i64 @test_mul_by_13(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_13:
|
; HSW-NOOPT-LABEL: test_mul_by_13:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_13:
|
; JAG-NOOPT-LABEL: test_mul_by_13:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -696,7 +696,7 @@ define i64 @test_mul_by_14(i64 %x) {
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_14:
|
; X64-JAG-LABEL: test_mul_by_14:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -716,7 +716,7 @@ define i64 @test_mul_by_14(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_14:
|
; HSW-NOOPT-LABEL: test_mul_by_14:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_14:
|
; JAG-NOOPT-LABEL: test_mul_by_14:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -751,7 +751,7 @@ define i64 @test_mul_by_15(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_15:
|
; X64-JAG-LABEL: test_mul_by_15:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -770,7 +770,7 @@ define i64 @test_mul_by_15(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_15:
|
; HSW-NOOPT-LABEL: test_mul_by_15:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_15:
|
; JAG-NOOPT-LABEL: test_mul_by_15:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -804,7 +804,7 @@ define i64 @test_mul_by_16(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50]
|
; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_16:
|
; X64-JAG-LABEL: test_mul_by_16:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -824,7 +824,7 @@ define i64 @test_mul_by_16(i64 %x) {
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_16:
|
; JAG-NOOPT-LABEL: test_mul_by_16:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -864,7 +864,7 @@ define i64 @test_mul_by_17(i64 %x) {
|
||||||
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_17:
|
; X64-JAG-LABEL: test_mul_by_17:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -884,7 +884,7 @@ define i64 @test_mul_by_17(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_17:
|
; HSW-NOOPT-LABEL: test_mul_by_17:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_17:
|
; JAG-NOOPT-LABEL: test_mul_by_17:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -920,7 +920,7 @@ define i64 @test_mul_by_18(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
|
; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_18:
|
; X64-JAG-LABEL: test_mul_by_18:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -939,7 +939,7 @@ define i64 @test_mul_by_18(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_18:
|
; HSW-NOOPT-LABEL: test_mul_by_18:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_18:
|
; JAG-NOOPT-LABEL: test_mul_by_18:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -977,7 +977,7 @@ define i64 @test_mul_by_19(i64 %x) {
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: shlq $2, %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: shlq $2, %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_19:
|
; X64-JAG-LABEL: test_mul_by_19:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -997,7 +997,7 @@ define i64 @test_mul_by_19(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_19:
|
; HSW-NOOPT-LABEL: test_mul_by_19:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_19:
|
; JAG-NOOPT-LABEL: test_mul_by_19:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1031,7 +1031,7 @@ define i64 @test_mul_by_20(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
|
; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_20:
|
; X64-JAG-LABEL: test_mul_by_20:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1050,7 +1050,7 @@ define i64 @test_mul_by_20(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_20:
|
; HSW-NOOPT-LABEL: test_mul_by_20:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_20:
|
; JAG-NOOPT-LABEL: test_mul_by_20:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1086,7 +1086,7 @@ define i64 @test_mul_by_21(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_21:
|
; X64-JAG-LABEL: test_mul_by_21:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1105,7 +1105,7 @@ define i64 @test_mul_by_21(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_21:
|
; HSW-NOOPT-LABEL: test_mul_by_21:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_21:
|
; JAG-NOOPT-LABEL: test_mul_by_21:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1142,7 +1142,7 @@ define i64 @test_mul_by_22(i64 %x) {
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_22:
|
; X64-JAG-LABEL: test_mul_by_22:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1162,7 +1162,7 @@ define i64 @test_mul_by_22(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_22:
|
; HSW-NOOPT-LABEL: test_mul_by_22:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_22:
|
; JAG-NOOPT-LABEL: test_mul_by_22:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1199,7 +1199,7 @@ define i64 @test_mul_by_23(i64 %x) {
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: shlq $3, %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: shlq $3, %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_23:
|
; X64-JAG-LABEL: test_mul_by_23:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1219,7 +1219,7 @@ define i64 @test_mul_by_23(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_23:
|
; HSW-NOOPT-LABEL: test_mul_by_23:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_23:
|
; JAG-NOOPT-LABEL: test_mul_by_23:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1253,7 +1253,7 @@ define i64 @test_mul_by_24(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: shlq $3, %rdi # sched: [1:0.50]
|
; X64-HSW-NEXT: shlq $3, %rdi # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_24:
|
; X64-JAG-LABEL: test_mul_by_24:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1272,7 +1272,7 @@ define i64 @test_mul_by_24(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_24:
|
; HSW-NOOPT-LABEL: test_mul_by_24:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_24:
|
; JAG-NOOPT-LABEL: test_mul_by_24:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1308,7 +1308,7 @@ define i64 @test_mul_by_25(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_25:
|
; X64-JAG-LABEL: test_mul_by_25:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1327,7 +1327,7 @@ define i64 @test_mul_by_25(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_25:
|
; HSW-NOOPT-LABEL: test_mul_by_25:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_25:
|
; JAG-NOOPT-LABEL: test_mul_by_25:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1365,7 +1365,7 @@ define i64 @test_mul_by_26(i64 %x) {
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_26:
|
; X64-JAG-LABEL: test_mul_by_26:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1385,7 +1385,7 @@ define i64 @test_mul_by_26(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_26:
|
; HSW-NOOPT-LABEL: test_mul_by_26:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_26:
|
; JAG-NOOPT-LABEL: test_mul_by_26:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1420,7 +1420,7 @@ define i64 @test_mul_by_27(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_27:
|
; X64-JAG-LABEL: test_mul_by_27:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1439,7 +1439,7 @@ define i64 @test_mul_by_27(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_27:
|
; HSW-NOOPT-LABEL: test_mul_by_27:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_27:
|
; JAG-NOOPT-LABEL: test_mul_by_27:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1477,7 +1477,7 @@ define i64 @test_mul_by_28(i64 %x) {
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_28:
|
; X64-JAG-LABEL: test_mul_by_28:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1497,7 +1497,7 @@ define i64 @test_mul_by_28(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_28:
|
; HSW-NOOPT-LABEL: test_mul_by_28:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_28:
|
; JAG-NOOPT-LABEL: test_mul_by_28:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1536,7 +1536,7 @@ define i64 @test_mul_by_29(i64 %x) {
|
||||||
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_29:
|
; X64-JAG-LABEL: test_mul_by_29:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1557,7 +1557,7 @@ define i64 @test_mul_by_29(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_29:
|
; HSW-NOOPT-LABEL: test_mul_by_29:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_29:
|
; JAG-NOOPT-LABEL: test_mul_by_29:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1596,7 +1596,7 @@ define i64 @test_mul_by_30(i64 %x) {
|
||||||
; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_30:
|
; X64-JAG-LABEL: test_mul_by_30:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1617,7 +1617,7 @@ define i64 @test_mul_by_30(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_30:
|
; HSW-NOOPT-LABEL: test_mul_by_30:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_30:
|
; JAG-NOOPT-LABEL: test_mul_by_30:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1654,7 +1654,7 @@ define i64 @test_mul_by_31(i64 %x) {
|
||||||
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_31:
|
; X64-JAG-LABEL: test_mul_by_31:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1674,7 +1674,7 @@ define i64 @test_mul_by_31(i64 %x) {
|
||||||
; HSW-NOOPT-LABEL: test_mul_by_31:
|
; HSW-NOOPT-LABEL: test_mul_by_31:
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_31:
|
; JAG-NOOPT-LABEL: test_mul_by_31:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1709,7 +1709,7 @@ define i64 @test_mul_by_32(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50]
|
; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_32:
|
; X64-JAG-LABEL: test_mul_by_32:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1729,7 +1729,7 @@ define i64 @test_mul_by_32(i64 %x) {
|
||||||
; HSW-NOOPT: # BB#0:
|
; HSW-NOOPT: # BB#0:
|
||||||
; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_by_32:
|
; JAG-NOOPT-LABEL: test_mul_by_32:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
@ -1793,7 +1793,7 @@ define i64 @test_mul_spec(i64 %x) nounwind {
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: addq $2, %rax # sched: [1:0.25]
|
; X64-HSW-NEXT: addq $2, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: imulq %rcx, %rax # sched: [3:1.00]
|
; X64-HSW-NEXT: imulq %rcx, %rax # sched: [3:1.00]
|
||||||
; X64-HSW-NEXT: retq # sched: [1:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_spec:
|
; X64-JAG-LABEL: test_mul_spec:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
|
@ -1841,7 +1841,7 @@ define i64 @test_mul_spec(i64 %x) nounwind {
|
||||||
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
|
||||||
; HSW-NOOPT-NEXT: addq $2, %rax # sched: [1:0.25]
|
; HSW-NOOPT-NEXT: addq $2, %rax # sched: [1:0.25]
|
||||||
; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
|
; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
|
||||||
; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
|
; HSW-NOOPT-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; JAG-NOOPT-LABEL: test_mul_spec:
|
; JAG-NOOPT-LABEL: test_mul_spec:
|
||||||
; JAG-NOOPT: # BB#0:
|
; JAG-NOOPT: # BB#0:
|
||||||
|
|
|
@ -59,8 +59,8 @@ define void @foo() local_unnamed_addr {
|
||||||
; X86-NEXT: cmovnel %ecx, %esi
|
; X86-NEXT: cmovnel %ecx, %esi
|
||||||
; X86-NEXT: cmpl %edx, %edi
|
; X86-NEXT: cmpl %edx, %edi
|
||||||
; X86-NEXT: movl %ebp, var_50+4
|
; X86-NEXT: movl %ebp, var_50+4
|
||||||
; X86-NEXT: movl %esi, var_50
|
|
||||||
; X86-NEXT: setge var_205
|
; X86-NEXT: setge var_205
|
||||||
|
; X86-NEXT: movl %esi, var_50
|
||||||
; X86-NEXT: imull %eax, %ebx
|
; X86-NEXT: imull %eax, %ebx
|
||||||
; X86-NEXT: movb %bl, var_218
|
; X86-NEXT: movb %bl, var_218
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
|
|
|
@ -45,15 +45,15 @@ define float @f32_no_estimate(float %x) #0 {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: f32_no_estimate:
|
; SANDY-LABEL: f32_no_estimate:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
|
; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
|
||||||
; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
|
; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [14:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: f32_no_estimate:
|
; HASWELL-LABEL: f32_no_estimate:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
|
; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
|
||||||
; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
|
; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: f32_no_estimate:
|
; HASWELL-NO-FMA-LABEL: f32_no_estimate:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
|
@ -63,9 +63,9 @@ define float @f32_no_estimate(float %x) #0 {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: f32_no_estimate:
|
; AVX512-LABEL: f32_no_estimate:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
|
; AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
|
||||||
; AVX512-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
|
; AVX512-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
|
||||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
; AVX512-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast float 1.0, %x
|
%div = fdiv fast float 1.0, %x
|
||||||
ret float %div
|
ret float %div
|
||||||
}
|
}
|
||||||
|
@ -113,18 +113,18 @@ define float @f32_one_step(float %x) #1 {
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
|
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
|
||||||
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: f32_one_step:
|
; HASWELL-LABEL: f32_one_step:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
|
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
|
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: f32_one_step:
|
; HASWELL-NO-FMA-LABEL: f32_one_step:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
|
@ -139,9 +139,9 @@ define float @f32_one_step(float %x) #1 {
|
||||||
; AVX512-LABEL: f32_one_step:
|
; AVX512-LABEL: f32_one_step:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
|
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
|
||||||
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
|
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
|
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
; AVX512-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast float 1.0, %x
|
%div = fdiv fast float 1.0, %x
|
||||||
ret float %div
|
ret float %div
|
||||||
}
|
}
|
||||||
|
@ -207,7 +207,7 @@ define float @f32_two_step(float %x) #2 {
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50]
|
; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50]
|
||||||
; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
||||||
|
@ -215,18 +215,18 @@ define float @f32_two_step(float %x) #2 {
|
||||||
; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: f32_two_step:
|
; HASWELL-LABEL: f32_two_step:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
|
; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
|
||||||
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3
|
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3
|
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0
|
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0
|
; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: f32_two_step:
|
; HASWELL-NO-FMA-LABEL: f32_two_step:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
|
@ -245,13 +245,13 @@ define float @f32_two_step(float %x) #2 {
|
||||||
; AVX512-LABEL: f32_two_step:
|
; AVX512-LABEL: f32_two_step:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
|
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
|
||||||
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
|
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
|
||||||
; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||||
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3
|
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3
|
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0
|
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0
|
; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
; AVX512-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast float 1.0, %x
|
%div = fdiv fast float 1.0, %x
|
||||||
ret float %div
|
ret float %div
|
||||||
}
|
}
|
||||||
|
@ -284,15 +284,15 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: v4f32_no_estimate:
|
; SANDY-LABEL: v4f32_no_estimate:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
|
||||||
; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
|
; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: v4f32_no_estimate:
|
; HASWELL-LABEL: v4f32_no_estimate:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50]
|
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [?:5.000000e-01]
|
||||||
; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
|
; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: v4f32_no_estimate:
|
; HASWELL-NO-FMA-LABEL: v4f32_no_estimate:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
|
@ -302,9 +302,9 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: v4f32_no_estimate:
|
; AVX512-LABEL: v4f32_no_estimate:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50]
|
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [?:5.000000e-01]
|
||||||
; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
|
; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:1.00]
|
||||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
; AVX512-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||||
ret <4 x float> %div
|
ret <4 x float> %div
|
||||||
}
|
}
|
||||||
|
@ -350,21 +350,21 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: v4f32_one_step:
|
; SANDY-LABEL: v4f32_one_step:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
|
||||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
|
||||||
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: v4f32_one_step:
|
; HASWELL-LABEL: v4f32_one_step:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
|
||||||
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
|
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: v4f32_one_step:
|
; HASWELL-NO-FMA-LABEL: v4f32_one_step:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
|
@ -379,17 +379,17 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
|
||||||
; KNL-LABEL: v4f32_one_step:
|
; KNL-LABEL: v4f32_one_step:
|
||||||
; KNL: # BB#0:
|
; KNL: # BB#0:
|
||||||
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
|
||||||
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
|
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: retq # sched: [1:1.00]
|
; KNL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; SKX-LABEL: v4f32_one_step:
|
; SKX-LABEL: v4f32_one_step:
|
||||||
; SKX: # BB#0:
|
; SKX: # BB#0:
|
||||||
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
|
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
|
||||||
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
|
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
|
||||||
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: retq # sched: [1:1.00]
|
; SKX-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||||
ret <4 x float> %div
|
ret <4 x float> %div
|
||||||
}
|
}
|
||||||
|
@ -453,9 +453,9 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: v4f32_two_step:
|
; SANDY-LABEL: v4f32_two_step:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
|
||||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
|
||||||
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
||||||
|
@ -463,18 +463,18 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
|
||||||
; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: v4f32_two_step:
|
; HASWELL-LABEL: v4f32_two_step:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
|
||||||
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
|
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
|
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
|
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
|
; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: v4f32_two_step:
|
; HASWELL-NO-FMA-LABEL: v4f32_two_step:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
|
@ -493,24 +493,24 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
|
||||||
; KNL-LABEL: v4f32_two_step:
|
; KNL-LABEL: v4f32_two_step:
|
||||||
; KNL: # BB#0:
|
; KNL: # BB#0:
|
||||||
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
|
||||||
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||||
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
|
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
|
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
|
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
|
; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: retq # sched: [1:1.00]
|
; KNL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; SKX-LABEL: v4f32_two_step:
|
; SKX-LABEL: v4f32_two_step:
|
||||||
; SKX: # BB#0:
|
; SKX: # BB#0:
|
||||||
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
|
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
|
||||||
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
|
||||||
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||||
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
|
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
|
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
|
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
|
; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: retq # sched: [1:1.00]
|
; SKX-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||||
ret <4 x float> %div
|
ret <4 x float> %div
|
||||||
}
|
}
|
||||||
|
@ -546,15 +546,15 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: v8f32_no_estimate:
|
; SANDY-LABEL: v8f32_no_estimate:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [12:1.00]
|
; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:3.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: v8f32_no_estimate:
|
; HASWELL-LABEL: v8f32_no_estimate:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
|
; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: v8f32_no_estimate:
|
; HASWELL-NO-FMA-LABEL: v8f32_no_estimate:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
|
@ -565,8 +565,8 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
|
||||||
; AVX512-LABEL: v8f32_no_estimate:
|
; AVX512-LABEL: v8f32_no_estimate:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
|
; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
|
||||||
; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
|
; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:2.00]
|
||||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
; AVX512-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||||
ret <8 x float> %div
|
ret <8 x float> %div
|
||||||
}
|
}
|
||||||
|
@ -621,19 +621,19 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: v8f32_one_step:
|
; HASWELL-LABEL: v8f32_one_step:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
|
||||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: v8f32_one_step:
|
; HASWELL-NO-FMA-LABEL: v8f32_one_step:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
|
@ -647,18 +647,18 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
|
||||||
;
|
;
|
||||||
; KNL-LABEL: v8f32_one_step:
|
; KNL-LABEL: v8f32_one_step:
|
||||||
; KNL: # BB#0:
|
; KNL: # BB#0:
|
||||||
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
|
||||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||||
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: retq # sched: [1:1.00]
|
; KNL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; SKX-LABEL: v8f32_one_step:
|
; SKX-LABEL: v8f32_one_step:
|
||||||
; SKX: # BB#0:
|
; SKX: # BB#0:
|
||||||
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
|
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
|
||||||
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
|
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
|
||||||
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: retq # sched: [1:1.00]
|
; SKX-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||||
ret <8 x float> %div
|
ret <8 x float> %div
|
||||||
}
|
}
|
||||||
|
@ -737,7 +737,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
|
||||||
|
@ -745,18 +745,18 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
|
||||||
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: v8f32_two_step:
|
; HASWELL-LABEL: v8f32_two_step:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
|
||||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
|
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
|
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
|
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
|
; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: v8f32_two_step:
|
; HASWELL-NO-FMA-LABEL: v8f32_two_step:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
|
@ -774,25 +774,25 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
|
||||||
;
|
;
|
||||||
; KNL-LABEL: v8f32_two_step:
|
; KNL-LABEL: v8f32_two_step:
|
||||||
; KNL: # BB#0:
|
; KNL: # BB#0:
|
||||||
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
|
||||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||||
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
||||||
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
|
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
|
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
|
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
|
; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: retq # sched: [1:1.00]
|
; KNL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; SKX-LABEL: v8f32_two_step:
|
; SKX-LABEL: v8f32_two_step:
|
||||||
; SKX: # BB#0:
|
; SKX: # BB#0:
|
||||||
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
|
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
|
||||||
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||||
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
||||||
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
|
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
|
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
|
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
|
; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: retq # sched: [1:1.00]
|
; SKX-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||||
ret <8 x float> %div
|
ret <8 x float> %div
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,26 +39,26 @@ define float @f32_no_step_2(float %x) #3 {
|
||||||
; SANDY-LABEL: f32_no_step_2:
|
; SANDY-LABEL: f32_no_step_2:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
|
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: f32_no_step_2:
|
; HASWELL-LABEL: f32_no_step_2:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
|
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: f32_no_step_2:
|
; HASWELL-NO-FMA-LABEL: f32_no_step_2:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: f32_no_step_2:
|
; AVX512-LABEL: f32_no_step_2:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
|
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
|
||||||
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
; AVX512-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast float 1234.0, %x
|
%div = fdiv fast float 1234.0, %x
|
||||||
ret float %div
|
ret float %div
|
||||||
}
|
}
|
||||||
|
@ -110,39 +110,39 @@ define float @f32_one_step_2(float %x) #1 {
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
|
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
|
||||||
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
|
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: f32_one_step_2:
|
; HASWELL-LABEL: f32_one_step_2:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
|
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
|
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: f32_one_step_2:
|
; HASWELL-NO-FMA-LABEL: f32_one_step_2:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
|
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
|
||||||
; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: f32_one_step_2:
|
; AVX512-LABEL: f32_one_step_2:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
|
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
|
||||||
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
|
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
|
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
; AVX512-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast float 3456.0, %x
|
%div = fdiv fast float 3456.0, %x
|
||||||
ret float %div
|
ret float %div
|
||||||
}
|
}
|
||||||
|
@ -198,43 +198,43 @@ define float @f32_one_step_2_divs(float %x) #1 {
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
|
; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
|
||||||
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00]
|
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
|
||||||
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: f32_one_step_2_divs:
|
; HASWELL-LABEL: f32_one_step_2_divs:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
|
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
|
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
|
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: f32_one_step_2_divs:
|
; HASWELL-NO-FMA-LABEL: f32_one_step_2_divs:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
|
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
|
||||||
; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: f32_one_step_2_divs:
|
; AVX512-LABEL: f32_one_step_2_divs:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
|
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
|
||||||
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
|
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
|
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
|
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
; AVX512-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast float 3456.0, %x
|
%div = fdiv fast float 3456.0, %x
|
||||||
%div2 = fdiv fast float %div, %x
|
%div2 = fdiv fast float %div, %x
|
||||||
ret float %div2
|
ret float %div2
|
||||||
|
@ -305,7 +305,7 @@ define float @f32_two_step_2(float %x) #2 {
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50]
|
; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50]
|
||||||
; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
||||||
|
@ -313,26 +313,26 @@ define float @f32_two_step_2(float %x) #2 {
|
||||||
; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
|
; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: f32_two_step_2:
|
; HASWELL-LABEL: f32_two_step_2:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
|
; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
|
||||||
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3
|
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3
|
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0
|
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0
|
; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: f32_two_step_2:
|
; HASWELL-NO-FMA-LABEL: f32_two_step_2:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50]
|
; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
|
||||||
; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
||||||
|
@ -340,20 +340,20 @@ define float @f32_two_step_2(float %x) #2 {
|
||||||
; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: f32_two_step_2:
|
; AVX512-LABEL: f32_two_step_2:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
|
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
|
||||||
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
|
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01]
|
||||||
; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||||
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3
|
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3
|
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0
|
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0
|
; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; AVX512-NEXT: retq # sched: [1:1.00]
|
; AVX512-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast float 6789.0, %x
|
%div = fdiv fast float 6789.0, %x
|
||||||
ret float %div
|
ret float %div
|
||||||
}
|
}
|
||||||
|
@ -403,51 +403,51 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: v4f32_one_step2:
|
; SANDY-LABEL: v4f32_one_step2:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
|
||||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
|
||||||
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
|
; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: v4f32_one_step2:
|
; HASWELL-LABEL: v4f32_one_step2:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
|
||||||
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
|
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: v4f32_one_step2:
|
; HASWELL-NO-FMA-LABEL: v4f32_one_step2:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
|
||||||
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; KNL-LABEL: v4f32_one_step2:
|
; KNL-LABEL: v4f32_one_step2:
|
||||||
; KNL: # BB#0:
|
; KNL: # BB#0:
|
||||||
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
|
||||||
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
|
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: retq # sched: [1:1.00]
|
; KNL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; SKX-LABEL: v4f32_one_step2:
|
; SKX-LABEL: v4f32_one_step2:
|
||||||
; SKX: # BB#0:
|
; SKX: # BB#0:
|
||||||
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
|
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
|
||||||
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
|
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
|
||||||
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: retq # sched: [1:1.00]
|
; SKX-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
|
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
|
||||||
ret <4 x float> %div
|
ret <4 x float> %div
|
||||||
}
|
}
|
||||||
|
@ -501,56 +501,56 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: v4f32_one_step_2_divs:
|
; SANDY-LABEL: v4f32_one_step_2_divs:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
|
||||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
|
||||||
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00]
|
; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: v4f32_one_step_2_divs:
|
; HASWELL-LABEL: v4f32_one_step_2_divs:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
|
||||||
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
|
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
|
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: v4f32_one_step_2_divs:
|
; HASWELL-NO-FMA-LABEL: v4f32_one_step_2_divs:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
|
||||||
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; KNL-LABEL: v4f32_one_step_2_divs:
|
; KNL-LABEL: v4f32_one_step_2_divs:
|
||||||
; KNL: # BB#0:
|
; KNL: # BB#0:
|
||||||
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
|
||||||
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
|
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
|
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: retq # sched: [1:1.00]
|
; KNL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; SKX-LABEL: v4f32_one_step_2_divs:
|
; SKX-LABEL: v4f32_one_step_2_divs:
|
||||||
; SKX: # BB#0:
|
; SKX: # BB#0:
|
||||||
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
|
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
|
||||||
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
|
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
|
||||||
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
|
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
|
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: retq # sched: [1:1.00]
|
; SKX-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
|
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
|
||||||
%div2 = fdiv fast <4 x float> %div, %x
|
%div2 = fdiv fast <4 x float> %div, %x
|
||||||
ret <4 x float> %div2
|
ret <4 x float> %div2
|
||||||
|
@ -619,9 +619,9 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: v4f32_two_step2:
|
; SANDY-LABEL: v4f32_two_step2:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
|
||||||
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
|
||||||
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
||||||
|
@ -629,26 +629,26 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
|
||||||
; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
|
; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: v4f32_two_step2:
|
; HASWELL-LABEL: v4f32_two_step2:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
|
||||||
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
|
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
|
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
|
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
|
; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: v4f32_two_step2:
|
; HASWELL-NO-FMA-LABEL: v4f32_two_step2:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3 # sched: [4:0.50]
|
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3 # sched: [?:5.000000e-01]
|
||||||
; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
|
||||||
|
@ -656,32 +656,32 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
|
||||||
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; KNL-LABEL: v4f32_two_step2:
|
; KNL-LABEL: v4f32_two_step2:
|
||||||
; KNL: # BB#0:
|
; KNL: # BB#0:
|
||||||
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
|
||||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
|
||||||
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||||
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
|
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
|
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
|
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
|
; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: retq # sched: [1:1.00]
|
; KNL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; SKX-LABEL: v4f32_two_step2:
|
; SKX-LABEL: v4f32_two_step2:
|
||||||
; SKX: # BB#0:
|
; SKX: # BB#0:
|
||||||
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
|
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
|
||||||
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
|
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01]
|
||||||
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
|
||||||
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
|
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
|
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
|
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
|
; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
|
; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: retq # sched: [1:1.00]
|
; SKX-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
|
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
|
||||||
ret <4 x float> %div
|
ret <4 x float> %div
|
||||||
}
|
}
|
||||||
|
@ -741,49 +741,49 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:2.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: v8f32_one_step2:
|
; HASWELL-LABEL: v8f32_one_step2:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
|
||||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: v8f32_one_step2:
|
; HASWELL-NO-FMA-LABEL: v8f32_one_step2:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; KNL-LABEL: v8f32_one_step2:
|
; KNL-LABEL: v8f32_one_step2:
|
||||||
; KNL: # BB#0:
|
; KNL: # BB#0:
|
||||||
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
|
||||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||||
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: retq # sched: [1:1.00]
|
; KNL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; SKX-LABEL: v8f32_one_step2:
|
; SKX-LABEL: v8f32_one_step2:
|
||||||
; SKX: # BB#0:
|
; SKX: # BB#0:
|
||||||
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
|
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
|
||||||
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
|
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
|
||||||
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: retq # sched: [1:1.00]
|
; SKX-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
|
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
|
||||||
ret <8 x float> %div
|
ret <8 x float> %div
|
||||||
}
|
}
|
||||||
|
@ -848,54 +848,54 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
|
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:2.00]
|
||||||
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: v8f32_one_step_2_divs:
|
; HASWELL-LABEL: v8f32_one_step_2_divs:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
|
||||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
|
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: v8f32_one_step_2_divs:
|
; HASWELL-NO-FMA-LABEL: v8f32_one_step_2_divs:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
|
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; KNL-LABEL: v8f32_one_step_2_divs:
|
; KNL-LABEL: v8f32_one_step_2_divs:
|
||||||
; KNL: # BB#0:
|
; KNL: # BB#0:
|
||||||
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
|
||||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||||
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
|
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
|
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: retq # sched: [1:1.00]
|
; KNL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; SKX-LABEL: v8f32_one_step_2_divs:
|
; SKX-LABEL: v8f32_one_step_2_divs:
|
||||||
; SKX: # BB#0:
|
; SKX: # BB#0:
|
||||||
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
|
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
|
||||||
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
|
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
|
||||||
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
|
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
|
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: retq # sched: [1:1.00]
|
; SKX-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
|
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
|
||||||
%div2 = fdiv fast <8 x float> %div, %x
|
%div2 = fdiv fast <8 x float> %div, %x
|
||||||
ret <8 x float> %div2
|
ret <8 x float> %div2
|
||||||
|
@ -980,7 +980,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
|
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
|
||||||
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
|
||||||
|
@ -988,59 +988,59 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
|
||||||
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:2.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: v8f32_two_step2:
|
; HASWELL-LABEL: v8f32_two_step2:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
|
||||||
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
|
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
|
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
|
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
|
; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: v8f32_two_step2:
|
; HASWELL-NO-FMA-LABEL: v8f32_two_step2:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3 # sched: [5:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
|
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; KNL-LABEL: v8f32_two_step2:
|
; KNL-LABEL: v8f32_two_step2:
|
||||||
; KNL: # BB#0:
|
; KNL: # BB#0:
|
||||||
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
|
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
|
||||||
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||||
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
||||||
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
|
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
|
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
|
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
|
; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: retq # sched: [1:1.00]
|
; KNL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; SKX-LABEL: v8f32_two_step2:
|
; SKX-LABEL: v8f32_two_step2:
|
||||||
; SKX: # BB#0:
|
; SKX: # BB#0:
|
||||||
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
|
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
|
||||||
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
|
||||||
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
|
||||||
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
|
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
|
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
|
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
|
; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: retq # sched: [1:1.00]
|
; SKX-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
|
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
|
||||||
ret <8 x float> %div
|
ret <8 x float> %div
|
||||||
}
|
}
|
||||||
|
@ -1070,27 +1070,27 @@ define <8 x float> @v8f32_no_step(<8 x float> %x) #3 {
|
||||||
; SANDY-LABEL: v8f32_no_step:
|
; SANDY-LABEL: v8f32_no_step:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: v8f32_no_step:
|
; HASWELL-LABEL: v8f32_no_step:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
|
; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: v8f32_no_step:
|
; HASWELL-NO-FMA-LABEL: v8f32_no_step:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
|
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
|
||||||
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; KNL-LABEL: v8f32_no_step:
|
; KNL-LABEL: v8f32_no_step:
|
||||||
; KNL: # BB#0:
|
; KNL: # BB#0:
|
||||||
; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
|
; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
|
||||||
; KNL-NEXT: retq # sched: [1:1.00]
|
; KNL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; SKX-LABEL: v8f32_no_step:
|
; SKX-LABEL: v8f32_no_step:
|
||||||
; SKX: # BB#0:
|
; SKX: # BB#0:
|
||||||
; SKX-NEXT: vrcp14ps %ymm0, %ymm0
|
; SKX-NEXT: vrcp14ps %ymm0, %ymm0
|
||||||
; SKX-NEXT: retq # sched: [1:1.00]
|
; SKX-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
|
||||||
ret <8 x float> %div
|
ret <8 x float> %div
|
||||||
}
|
}
|
||||||
|
@ -1125,32 +1125,32 @@ define <8 x float> @v8f32_no_step2(<8 x float> %x) #3 {
|
||||||
; SANDY-LABEL: v8f32_no_step2:
|
; SANDY-LABEL: v8f32_no_step2:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00]
|
; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:2.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: v8f32_no_step2:
|
; HASWELL-LABEL: v8f32_no_step2:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
|
; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
|
||||||
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-NO-FMA-LABEL: v8f32_no_step2:
|
; HASWELL-NO-FMA-LABEL: v8f32_no_step2:
|
||||||
; HASWELL-NO-FMA: # BB#0:
|
; HASWELL-NO-FMA: # BB#0:
|
||||||
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
|
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
|
||||||
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; KNL-LABEL: v8f32_no_step2:
|
; KNL-LABEL: v8f32_no_step2:
|
||||||
; KNL: # BB#0:
|
; KNL: # BB#0:
|
||||||
; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
|
; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
|
||||||
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; KNL-NEXT: retq # sched: [1:1.00]
|
; KNL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; SKX-LABEL: v8f32_no_step2:
|
; SKX-LABEL: v8f32_no_step2:
|
||||||
; SKX: # BB#0:
|
; SKX: # BB#0:
|
||||||
; SKX-NEXT: vrcp14ps %ymm0, %ymm0
|
; SKX-NEXT: vrcp14ps %ymm0, %ymm0
|
||||||
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
|
; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50]
|
||||||
; SKX-NEXT: retq # sched: [1:1.00]
|
; SKX-NEXT: retq # sched: [2:1.00]
|
||||||
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
|
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
|
||||||
ret <8 x float> %div
|
ret <8 x float> %div
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -31,14 +31,14 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
|
||||||
; SANDY-LABEL: test_addsubpd:
|
; SANDY-LABEL: test_addsubpd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_addsubpd:
|
; HASWELL-LABEL: test_addsubpd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_addsubpd:
|
; BTVER2-LABEL: test_addsubpd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -74,14 +74,14 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float>
|
||||||
; SANDY-LABEL: test_addsubps:
|
; SANDY-LABEL: test_addsubps:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_addsubps:
|
; HASWELL-LABEL: test_addsubps:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_addsubps:
|
; BTVER2-LABEL: test_addsubps:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -116,15 +116,15 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_haddpd:
|
; SANDY-LABEL: test_haddpd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
|
||||||
; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_haddpd:
|
; HASWELL-LABEL: test_haddpd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
|
; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
|
||||||
; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
|
; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_haddpd:
|
; BTVER2-LABEL: test_haddpd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -159,15 +159,15 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_haddps:
|
; SANDY-LABEL: test_haddps:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
|
||||||
; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_haddps:
|
; HASWELL-LABEL: test_haddps:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
|
; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
|
||||||
; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
|
; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_haddps:
|
; BTVER2-LABEL: test_haddps:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -202,15 +202,15 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_hsubpd:
|
; SANDY-LABEL: test_hsubpd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
|
||||||
; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_hsubpd:
|
; HASWELL-LABEL: test_hsubpd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
|
; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
|
||||||
; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
|
; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_hsubpd:
|
; BTVER2-LABEL: test_hsubpd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -245,15 +245,15 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_hsubps:
|
; SANDY-LABEL: test_hsubps:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
|
||||||
; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_hsubps:
|
; HASWELL-LABEL: test_hsubps:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
|
; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
|
||||||
; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
|
; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_hsubps:
|
; BTVER2-LABEL: test_hsubps:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -287,13 +287,13 @@ define <16 x i8> @test_lddqu(i8* %a0) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_lddqu:
|
; SANDY-LABEL: test_lddqu:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [4:0.50]
|
; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_lddqu:
|
; HASWELL-LABEL: test_lddqu:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [4:0.50]
|
; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [?:5.000000e-01]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_lddqu:
|
; BTVER2-LABEL: test_lddqu:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -330,16 +330,16 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
|
||||||
; SANDY-LABEL: test_movddup:
|
; SANDY-LABEL: test_movddup:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
|
; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
|
||||||
; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50]
|
; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
|
||||||
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_movddup:
|
; HASWELL-LABEL: test_movddup:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
|
; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50]
|
; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [?:5.000000e-01]
|
||||||
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_movddup:
|
; BTVER2-LABEL: test_movddup:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -380,16 +380,16 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
|
||||||
; SANDY-LABEL: test_movshdup:
|
; SANDY-LABEL: test_movshdup:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
|
; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
|
||||||
; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50]
|
; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
|
||||||
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_movshdup:
|
; HASWELL-LABEL: test_movshdup:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
|
; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50]
|
; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [?:5.000000e-01]
|
||||||
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_movshdup:
|
; BTVER2-LABEL: test_movshdup:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -430,16 +430,16 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
|
||||||
; SANDY-LABEL: test_movsldup:
|
; SANDY-LABEL: test_movsldup:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
|
; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
|
||||||
; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50]
|
; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
|
||||||
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_movsldup:
|
; HASWELL-LABEL: test_movsldup:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
|
; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50]
|
; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [?:5.000000e-01]
|
||||||
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_movsldup:
|
; BTVER2-LABEL: test_movsldup:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
|
|
@ -25,17 +25,17 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_blendpd:
|
; SANDY-LABEL: test_blendpd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
|
; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00]
|
||||||
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [5:0.50]
|
; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_blendpd:
|
; HASWELL-LABEL: test_blendpd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
|
; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
|
||||||
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [5:0.50]
|
; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_blendpd:
|
; BTVER2-LABEL: test_blendpd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -65,15 +65,15 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_blendps:
|
; SANDY-LABEL: test_blendps:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
|
; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00]
|
||||||
; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [5:0.50]
|
; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_blendps:
|
; HASWELL-LABEL: test_blendps:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
|
; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
|
||||||
; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [5:0.50]
|
; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_blendps:
|
; BTVER2-LABEL: test_blendps:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -107,15 +107,15 @@ define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_blendvpd:
|
; SANDY-LABEL: test_blendvpd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
|
; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
|
||||||
; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
|
; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_blendvpd:
|
; HASWELL-LABEL: test_blendvpd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
|
; HASWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
|
||||||
; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
|
; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_blendvpd:
|
; BTVER2-LABEL: test_blendvpd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -150,15 +150,15 @@ define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float>
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_blendvps:
|
; SANDY-LABEL: test_blendvps:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
|
; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
|
||||||
; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
|
; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_blendvps:
|
; HASWELL-LABEL: test_blendvps:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
|
; HASWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
|
||||||
; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
|
; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_blendvps:
|
; BTVER2-LABEL: test_blendvps:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -187,15 +187,15 @@ define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double>
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_dppd:
|
; SANDY-LABEL: test_dppd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
|
||||||
; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_dppd:
|
; HASWELL-LABEL: test_dppd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
|
; HASWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
|
||||||
; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
|
; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_dppd:
|
; BTVER2-LABEL: test_dppd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -224,15 +224,15 @@ define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_dpps:
|
; SANDY-LABEL: test_dpps:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00]
|
||||||
; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_dpps:
|
; HASWELL-LABEL: test_dpps:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
|
; HASWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
|
||||||
; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [18:2.00]
|
; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [14:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_dpps:
|
; BTVER2-LABEL: test_dpps:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -262,14 +262,14 @@ define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2)
|
||||||
; SANDY-LABEL: test_insertps:
|
; SANDY-LABEL: test_insertps:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
|
; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
|
||||||
; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [5:1.00]
|
; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_insertps:
|
; HASWELL-LABEL: test_insertps:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
|
; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [5:1.00]
|
; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_insertps:
|
; BTVER2-LABEL: test_insertps:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -296,13 +296,13 @@ define <2 x i64> @test_movntdqa(i8* %a0) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_movntdqa:
|
; SANDY-LABEL: test_movntdqa:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [4:0.50]
|
; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_movntdqa:
|
; HASWELL-LABEL: test_movntdqa:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [4:0.50]
|
; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [?:5.000000e-01]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_movntdqa:
|
; BTVER2-LABEL: test_movntdqa:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -328,15 +328,15 @@ define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_mpsadbw:
|
; SANDY-LABEL: test_mpsadbw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
|
; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
|
; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_mpsadbw:
|
; HASWELL-LABEL: test_mpsadbw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
|
; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
|
||||||
; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
|
; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_mpsadbw:
|
; BTVER2-LABEL: test_mpsadbw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -367,14 +367,14 @@ define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
|
||||||
; SANDY-LABEL: test_packusdw:
|
; SANDY-LABEL: test_packusdw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_packusdw:
|
; HASWELL-LABEL: test_packusdw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
|
; HASWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
|
; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_packusdw:
|
; BTVER2-LABEL: test_packusdw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -411,14 +411,14 @@ define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16
|
||||||
; SANDY-LABEL: test_pblendvb:
|
; SANDY-LABEL: test_pblendvb:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
|
; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
|
||||||
; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
|
; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pblendvb:
|
; HASWELL-LABEL: test_pblendvb:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
|
; HASWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
|
||||||
; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
|
; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pblendvb:
|
; BTVER2-LABEL: test_pblendvb:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -448,14 +448,14 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
|
||||||
; SANDY-LABEL: test_pblendw:
|
; SANDY-LABEL: test_pblendw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
|
; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [5:0.50]
|
; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pblendw:
|
; HASWELL-LABEL: test_pblendw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
|
; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [4:1.00]
|
; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [1:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pblendw:
|
; BTVER2-LABEL: test_pblendw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -484,14 +484,14 @@ define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
|
||||||
; SANDY-LABEL: test_pcmpeqq:
|
; SANDY-LABEL: test_pcmpeqq:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pcmpeqq:
|
; HASWELL-LABEL: test_pcmpeqq:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pcmpeqq:
|
; BTVER2-LABEL: test_pcmpeqq:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -521,15 +521,15 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_pextrb:
|
; SANDY-LABEL: test_pextrb:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:0.50]
|
; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
|
; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pextrb:
|
; HASWELL-LABEL: test_pextrb:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:1.00]
|
; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00]
|
||||||
; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
|
; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pextrb:
|
; BTVER2-LABEL: test_pextrb:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -558,15 +558,15 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_pextrd:
|
; SANDY-LABEL: test_pextrd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.50]
|
; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
|
; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pextrd:
|
; HASWELL-LABEL: test_pextrd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:1.00]
|
; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00]
|
||||||
; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
|
; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pextrd:
|
; BTVER2-LABEL: test_pextrd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -594,15 +594,15 @@ define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_pextrq:
|
; SANDY-LABEL: test_pextrq:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:0.50]
|
; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
|
; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pextrq:
|
; HASWELL-LABEL: test_pextrq:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:1.00]
|
; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00]
|
||||||
; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
|
; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pextrq:
|
; BTVER2-LABEL: test_pextrq:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -630,15 +630,15 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_pextrw:
|
; SANDY-LABEL: test_pextrw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:0.50]
|
; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
|
; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pextrw:
|
; HASWELL-LABEL: test_pextrw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:1.00]
|
; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00]
|
||||||
; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
|
; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pextrw:
|
; BTVER2-LABEL: test_pextrw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -667,15 +667,15 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_phminposuw:
|
; SANDY-LABEL: test_phminposuw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [9:1.00]
|
; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
|
||||||
; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_phminposuw:
|
; HASWELL-LABEL: test_phminposuw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [9:1.00]
|
; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
|
; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_phminposuw:
|
; BTVER2-LABEL: test_phminposuw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -704,15 +704,15 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_pinsrb:
|
; SANDY-LABEL: test_pinsrb:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
|
||||||
; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pinsrb:
|
; HASWELL-LABEL: test_pinsrb:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:1.00]
|
; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
|
||||||
; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [5:1.00]
|
; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pinsrb:
|
; BTVER2-LABEL: test_pinsrb:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -740,15 +740,15 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_pinsrd:
|
; SANDY-LABEL: test_pinsrd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
|
||||||
; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pinsrd:
|
; HASWELL-LABEL: test_pinsrd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:1.00]
|
; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
|
||||||
; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [5:1.00]
|
; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pinsrd:
|
; BTVER2-LABEL: test_pinsrd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -778,17 +778,17 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_pinsrq:
|
; SANDY-LABEL: test_pinsrq:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00]
|
||||||
; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [5:0.50]
|
; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pinsrq:
|
; HASWELL-LABEL: test_pinsrq:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:1.00]
|
; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
|
||||||
; HASWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pinsrq:
|
; BTVER2-LABEL: test_pinsrq:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -819,14 +819,14 @@ define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
|
||||||
; SANDY-LABEL: test_pmaxsb:
|
; SANDY-LABEL: test_pmaxsb:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmaxsb:
|
; HASWELL-LABEL: test_pmaxsb:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmaxsb:
|
; BTVER2-LABEL: test_pmaxsb:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -856,14 +856,14 @@ define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
|
||||||
; SANDY-LABEL: test_pmaxsd:
|
; SANDY-LABEL: test_pmaxsd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmaxsd:
|
; HASWELL-LABEL: test_pmaxsd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmaxsd:
|
; BTVER2-LABEL: test_pmaxsd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -893,14 +893,14 @@ define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
|
||||||
; SANDY-LABEL: test_pmaxud:
|
; SANDY-LABEL: test_pmaxud:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmaxud:
|
; HASWELL-LABEL: test_pmaxud:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmaxud:
|
; BTVER2-LABEL: test_pmaxud:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -930,14 +930,14 @@ define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
|
||||||
; SANDY-LABEL: test_pmaxuw:
|
; SANDY-LABEL: test_pmaxuw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmaxuw:
|
; HASWELL-LABEL: test_pmaxuw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmaxuw:
|
; BTVER2-LABEL: test_pmaxuw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -967,14 +967,14 @@ define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
|
||||||
; SANDY-LABEL: test_pminsb:
|
; SANDY-LABEL: test_pminsb:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pminsb:
|
; HASWELL-LABEL: test_pminsb:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pminsb:
|
; BTVER2-LABEL: test_pminsb:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1004,14 +1004,14 @@ define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
|
||||||
; SANDY-LABEL: test_pminsd:
|
; SANDY-LABEL: test_pminsd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pminsd:
|
; HASWELL-LABEL: test_pminsd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pminsd:
|
; BTVER2-LABEL: test_pminsd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1041,14 +1041,14 @@ define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
|
||||||
; SANDY-LABEL: test_pminud:
|
; SANDY-LABEL: test_pminud:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pminud:
|
; HASWELL-LABEL: test_pminud:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pminud:
|
; BTVER2-LABEL: test_pminud:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1078,14 +1078,14 @@ define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
|
||||||
; SANDY-LABEL: test_pminuw:
|
; SANDY-LABEL: test_pminuw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pminuw:
|
; HASWELL-LABEL: test_pminuw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pminuw:
|
; BTVER2-LABEL: test_pminuw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1118,16 +1118,16 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
|
||||||
; SANDY-LABEL: test_pmovsxbw:
|
; SANDY-LABEL: test_pmovsxbw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [5:0.50]
|
; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmovsxbw:
|
; HASWELL-LABEL: test_pmovsxbw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
|
; HASWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmovsxbw:
|
; BTVER2-LABEL: test_pmovsxbw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1162,16 +1162,16 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
|
||||||
; SANDY-LABEL: test_pmovsxbd:
|
; SANDY-LABEL: test_pmovsxbd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [5:0.50]
|
; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmovsxbd:
|
; HASWELL-LABEL: test_pmovsxbd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
|
; HASWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmovsxbd:
|
; BTVER2-LABEL: test_pmovsxbd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1206,16 +1206,16 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
|
||||||
; SANDY-LABEL: test_pmovsxbq:
|
; SANDY-LABEL: test_pmovsxbq:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [5:0.50]
|
; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmovsxbq:
|
; HASWELL-LABEL: test_pmovsxbq:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
|
; HASWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmovsxbq:
|
; BTVER2-LABEL: test_pmovsxbq:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1250,16 +1250,16 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
|
||||||
; SANDY-LABEL: test_pmovsxdq:
|
; SANDY-LABEL: test_pmovsxdq:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [5:0.50]
|
; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmovsxdq:
|
; HASWELL-LABEL: test_pmovsxdq:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
|
; HASWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmovsxdq:
|
; BTVER2-LABEL: test_pmovsxdq:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1294,16 +1294,16 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
|
||||||
; SANDY-LABEL: test_pmovsxwd:
|
; SANDY-LABEL: test_pmovsxwd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [5:0.50]
|
; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmovsxwd:
|
; HASWELL-LABEL: test_pmovsxwd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
|
; HASWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmovsxwd:
|
; BTVER2-LABEL: test_pmovsxwd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1338,16 +1338,16 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
|
||||||
; SANDY-LABEL: test_pmovsxwq:
|
; SANDY-LABEL: test_pmovsxwq:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [5:0.50]
|
; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmovsxwq:
|
; HASWELL-LABEL: test_pmovsxwq:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
|
; HASWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [5:1.00]
|
; HASWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmovsxwq:
|
; BTVER2-LABEL: test_pmovsxwq:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1382,16 +1382,16 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
|
||||||
; SANDY-LABEL: test_pmovzxbw:
|
; SANDY-LABEL: test_pmovzxbw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
|
; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:0.50]
|
; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
|
||||||
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmovzxbw:
|
; HASWELL-LABEL: test_pmovzxbw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
|
; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
|
; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmovzxbw:
|
; BTVER2-LABEL: test_pmovzxbw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1426,16 +1426,16 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
|
||||||
; SANDY-LABEL: test_pmovzxbd:
|
; SANDY-LABEL: test_pmovzxbd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
|
; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:0.50]
|
; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
|
||||||
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmovzxbd:
|
; HASWELL-LABEL: test_pmovzxbd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
|
; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00]
|
; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmovzxbd:
|
; BTVER2-LABEL: test_pmovzxbd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1470,16 +1470,16 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
|
||||||
; SANDY-LABEL: test_pmovzxbq:
|
; SANDY-LABEL: test_pmovzxbq:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
|
; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:0.50]
|
; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
|
||||||
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmovzxbq:
|
; HASWELL-LABEL: test_pmovzxbq:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
|
; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
|
; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmovzxbq:
|
; BTVER2-LABEL: test_pmovzxbq:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1514,16 +1514,16 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
|
||||||
; SANDY-LABEL: test_pmovzxdq:
|
; SANDY-LABEL: test_pmovzxdq:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
|
; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [5:0.50]
|
; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50]
|
||||||
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmovzxdq:
|
; HASWELL-LABEL: test_pmovzxdq:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
|
; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [5:1.00]
|
; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmovzxdq:
|
; BTVER2-LABEL: test_pmovzxdq:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1558,16 +1558,16 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
|
||||||
; SANDY-LABEL: test_pmovzxwd:
|
; SANDY-LABEL: test_pmovzxwd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
|
; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:0.50]
|
; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
|
||||||
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmovzxwd:
|
; HASWELL-LABEL: test_pmovzxwd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
|
; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00]
|
; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmovzxwd:
|
; BTVER2-LABEL: test_pmovzxwd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1602,16 +1602,16 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
|
||||||
; SANDY-LABEL: test_pmovzxwq:
|
; SANDY-LABEL: test_pmovzxwq:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
|
; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:0.50]
|
; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
|
||||||
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmovzxwq:
|
; HASWELL-LABEL: test_pmovzxwq:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
|
; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:1.00]
|
; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmovzxwq:
|
; BTVER2-LABEL: test_pmovzxwq:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1642,15 +1642,15 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_pmuldq:
|
; SANDY-LABEL: test_pmuldq:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
|
; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmuldq:
|
; HASWELL-LABEL: test_pmuldq:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
|
; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmuldq:
|
; BTVER2-LABEL: test_pmuldq:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1680,15 +1680,15 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_pmulld:
|
; SANDY-LABEL: test_pmulld:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
|
; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmulld:
|
; HASWELL-LABEL: test_pmulld:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
|
; HASWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
|
||||||
; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
|
; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmulld:
|
; BTVER2-LABEL: test_pmulld:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1724,23 +1724,23 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_ptest:
|
; SANDY-LABEL: test_ptest:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [1:0.33]
|
; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
|
||||||
; SANDY-NEXT: setb %al # sched: [1:0.33]
|
; SANDY-NEXT: setb %al # sched: [1:1.00]
|
||||||
; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00]
|
||||||
; SANDY-NEXT: setb %cl # sched: [1:0.33]
|
; SANDY-NEXT: setb %cl # sched: [1:1.00]
|
||||||
; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
|
; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
|
||||||
; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33]
|
; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_ptest:
|
; HASWELL-LABEL: test_ptest:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
|
; HASWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
|
||||||
; HASWELL-NEXT: setb %al # sched: [1:0.50]
|
; HASWELL-NEXT: setb %al # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vptest (%rdi), %xmm0 # sched: [2:1.00]
|
; HASWELL-NEXT: vptest (%rdi), %xmm0 # sched: [2:1.00]
|
||||||
; HASWELL-NEXT: setb %cl # sched: [1:0.50]
|
; HASWELL-NEXT: setb %cl # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
|
; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25]
|
; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_ptest:
|
; BTVER2-LABEL: test_ptest:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1778,16 +1778,16 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
|
||||||
; SANDY-LABEL: test_roundpd:
|
; SANDY-LABEL: test_roundpd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [7:1.00]
|
; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00]
|
||||||
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_roundpd:
|
; HASWELL-LABEL: test_roundpd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:2.00]
|
; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [5:2.00]
|
||||||
; HASWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [10:2.00]
|
; HASWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [6:1.00]
|
||||||
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_roundpd:
|
; BTVER2-LABEL: test_roundpd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1822,16 +1822,16 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
|
||||||
; SANDY-LABEL: test_roundps:
|
; SANDY-LABEL: test_roundps:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [7:1.00]
|
; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00]
|
||||||
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_roundps:
|
; HASWELL-LABEL: test_roundps:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:2.00]
|
; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [5:2.00]
|
||||||
; HASWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [10:2.00]
|
; HASWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [6:1.00]
|
||||||
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_roundps:
|
; BTVER2-LABEL: test_roundps:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1867,16 +1867,16 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
|
||||||
; SANDY-LABEL: test_roundsd:
|
; SANDY-LABEL: test_roundsd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
|
; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
|
||||||
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_roundsd:
|
; HASWELL-LABEL: test_roundsd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:2.00]
|
; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [5:2.00]
|
||||||
; HASWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
|
; HASWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
|
||||||
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_roundsd:
|
; BTVER2-LABEL: test_roundsd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -1912,16 +1912,16 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *
|
||||||
; SANDY-LABEL: test_roundss:
|
; SANDY-LABEL: test_roundss:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
|
; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
|
; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
|
||||||
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_roundss:
|
; HASWELL-LABEL: test_roundss:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:2.00]
|
; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
|
; HASWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
|
||||||
; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_roundss:
|
; BTVER2-LABEL: test_roundss:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
|
|
@ -26,16 +26,16 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) {
|
||||||
; SANDY-LABEL: crc32_32_8:
|
; SANDY-LABEL: crc32_32_8:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00]
|
; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00]
|
||||||
; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
|
; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
|
||||||
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
|
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: crc32_32_8:
|
; HASWELL-LABEL: crc32_32_8:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
|
; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
|
; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
|
||||||
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
|
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: crc32_32_8:
|
; BTVER2-LABEL: crc32_32_8:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -68,16 +68,16 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) {
|
||||||
; SANDY-LABEL: crc32_32_16:
|
; SANDY-LABEL: crc32_32_16:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00]
|
; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00]
|
||||||
; SANDY-NEXT: crc32w (%rdx), %edi # sched: [7:1.00]
|
; SANDY-NEXT: crc32w (%rdx), %edi # sched: [8:1.00]
|
||||||
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
|
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: crc32_32_16:
|
; HASWELL-LABEL: crc32_32_16:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: crc32w %si, %edi # sched: [3:1.00]
|
; HASWELL-NEXT: crc32w %si, %edi # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: crc32w (%rdx), %edi # sched: [7:1.00]
|
; HASWELL-NEXT: crc32w (%rdx), %edi # sched: [7:1.00]
|
||||||
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
|
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: crc32_32_16:
|
; BTVER2-LABEL: crc32_32_16:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -112,14 +112,14 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) {
|
||||||
; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00]
|
; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00]
|
||||||
; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
|
; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
|
||||||
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
|
; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: crc32_32_32:
|
; HASWELL-LABEL: crc32_32_32:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00]
|
; HASWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
|
; HASWELL-NEXT: crc32l (%rdx), %edi # sched: [7:1.00]
|
||||||
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
|
; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: crc32_32_32:
|
; BTVER2-LABEL: crc32_32_32:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -152,16 +152,16 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind {
|
||||||
; SANDY-LABEL: crc32_64_8:
|
; SANDY-LABEL: crc32_64_8:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00]
|
; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00]
|
||||||
; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
|
; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00]
|
||||||
; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
|
; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: crc32_64_8:
|
; HASWELL-LABEL: crc32_64_8:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
|
; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
|
; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00]
|
||||||
; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: crc32_64_8:
|
; BTVER2-LABEL: crc32_64_8:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -196,14 +196,14 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) {
|
||||||
; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
|
; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
|
||||||
; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00]
|
; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00]
|
||||||
; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
|
; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: crc32_64_64:
|
; HASWELL-LABEL: crc32_64_64:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
|
; HASWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00]
|
||||||
; HASWELL-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00]
|
; HASWELL-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00]
|
||||||
; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: crc32_64_64:
|
; BTVER2-LABEL: crc32_64_64:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -256,20 +256,20 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
|
||||||
; SANDY-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
|
; SANDY-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
|
||||||
; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
|
; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
|
||||||
; SANDY-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
|
; SANDY-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pcmpestri:
|
; HASWELL-LABEL: test_pcmpestri:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
|
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
|
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [11:3.00]
|
; HASWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
|
||||||
; HASWELL-NEXT: movl %ecx, %esi # sched: [1:0.25]
|
; HASWELL-NEXT: movl %ecx, %esi # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
|
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
|
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [11:3.00]
|
; HASWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [18:4.00]
|
||||||
; HASWELL-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
|
; HASWELL-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
|
||||||
; HASWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
|
; HASWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pcmpestri:
|
; BTVER2-LABEL: test_pcmpestri:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -320,17 +320,17 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
|
||||||
; SANDY-NEXT: movl $7, %eax # sched: [1:0.33]
|
; SANDY-NEXT: movl $7, %eax # sched: [1:0.33]
|
||||||
; SANDY-NEXT: movl $7, %edx # sched: [1:0.33]
|
; SANDY-NEXT: movl $7, %edx # sched: [1:0.33]
|
||||||
; SANDY-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
|
; SANDY-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pcmpestrm:
|
; HASWELL-LABEL: test_pcmpestrm:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
|
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
|
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [10:4.00]
|
; HASWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
|
||||||
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
|
; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
|
; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
|
||||||
; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [10:3.00]
|
; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pcmpestrm:
|
; BTVER2-LABEL: test_pcmpestrm:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -369,12 +369,12 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_pcmpistri:
|
; SANDY-LABEL: test_pcmpistri:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
|
||||||
; SANDY-NEXT: movl %ecx, %eax # sched: [1:0.33]
|
; SANDY-NEXT: movl %ecx, %eax # sched: [1:0.33]
|
||||||
; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [3:1.00]
|
; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
|
||||||
; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
|
; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
|
||||||
; SANDY-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
|
; SANDY-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pcmpistri:
|
; HASWELL-LABEL: test_pcmpistri:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
|
@ -383,7 +383,7 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
|
||||||
; HASWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [11:3.00]
|
; HASWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [11:3.00]
|
||||||
; HASWELL-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
|
; HASWELL-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
|
||||||
; HASWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
|
; HASWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pcmpistri:
|
; BTVER2-LABEL: test_pcmpistri:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -416,15 +416,15 @@ define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_pcmpistrm:
|
; SANDY-LABEL: test_pcmpistrm:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:1.00]
|
; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
|
||||||
; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00]
|
; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pcmpistrm:
|
; HASWELL-LABEL: test_pcmpistrm:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
|
; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
|
||||||
; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [10:3.00]
|
; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:3.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pcmpistrm:
|
; BTVER2-LABEL: test_pcmpistrm:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -453,15 +453,15 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_pcmpgtq:
|
; SANDY-LABEL: test_pcmpgtq:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pcmpgtq:
|
; HASWELL-LABEL: test_pcmpgtq:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; HASWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
|
; HASWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pcmpgtq:
|
; BTVER2-LABEL: test_pcmpgtq:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
|
|
@ -35,16 +35,16 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) {
|
||||||
; SANDY-LABEL: test_pabsb:
|
; SANDY-LABEL: test_pabsb:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [5:0.50]
|
; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
|
; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pabsb:
|
; HASWELL-LABEL: test_pabsb:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [5:0.50]
|
; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
|
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pabsb:
|
; BTVER2-LABEL: test_pabsb:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -86,16 +86,16 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) {
|
||||||
; SANDY-LABEL: test_pabsd:
|
; SANDY-LABEL: test_pabsd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [5:0.50]
|
; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
|
; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pabsd:
|
; HASWELL-LABEL: test_pabsd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [5:0.50]
|
; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
|
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pabsd:
|
; BTVER2-LABEL: test_pabsd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -136,12 +136,12 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) {
|
||||||
; SANDY-LABEL: test_pabsw:
|
; SANDY-LABEL: test_pabsw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pabsw:
|
; HASWELL-LABEL: test_pabsw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pabsw:
|
; BTVER2-LABEL: test_pabsw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -182,14 +182,14 @@ define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
|
||||||
; SANDY-LABEL: test_palignr:
|
; SANDY-LABEL: test_palignr:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
|
; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:0.50]
|
; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_palignr:
|
; HASWELL-LABEL: test_palignr:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
|
; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:1.00]
|
; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_palignr:
|
; BTVER2-LABEL: test_palignr:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -223,15 +223,15 @@ define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_phaddd:
|
; SANDY-LABEL: test_phaddd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
|
||||||
; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_phaddd:
|
; HASWELL-LABEL: test_phaddd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
|
; HASWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
|
||||||
; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
|
; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_phaddd:
|
; BTVER2-LABEL: test_phaddd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -274,15 +274,15 @@ define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_phaddsw:
|
; SANDY-LABEL: test_phaddsw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
|
||||||
; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_phaddsw:
|
; HASWELL-LABEL: test_phaddsw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
|
; HASWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
|
||||||
; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
|
; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_phaddsw:
|
; BTVER2-LABEL: test_phaddsw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -317,15 +317,15 @@ define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_phaddw:
|
; SANDY-LABEL: test_phaddw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
|
||||||
; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_phaddw:
|
; HASWELL-LABEL: test_phaddw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
|
; HASWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
|
||||||
; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
|
; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_phaddw:
|
; BTVER2-LABEL: test_phaddw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -360,15 +360,15 @@ define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_phsubd:
|
; SANDY-LABEL: test_phsubd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
|
||||||
; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_phsubd:
|
; HASWELL-LABEL: test_phsubd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
|
; HASWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
|
||||||
; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
|
; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_phsubd:
|
; BTVER2-LABEL: test_phsubd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -411,15 +411,15 @@ define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_phsubsw:
|
; SANDY-LABEL: test_phsubsw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
|
||||||
; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_phsubsw:
|
; HASWELL-LABEL: test_phsubsw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
|
; HASWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
|
||||||
; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
|
; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_phsubsw:
|
; BTVER2-LABEL: test_phsubsw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -454,15 +454,15 @@ define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_phsubw:
|
; SANDY-LABEL: test_phsubw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
|
||||||
; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_phsubw:
|
; HASWELL-LABEL: test_phsubw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
|
; HASWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
|
||||||
; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
|
; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_phsubw:
|
; BTVER2-LABEL: test_phsubw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -497,15 +497,15 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_pmaddubsw:
|
; SANDY-LABEL: test_pmaddubsw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
|
; SANDY-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmaddubsw:
|
; HASWELL-LABEL: test_pmaddubsw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; HASWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
|
; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmaddubsw:
|
; BTVER2-LABEL: test_pmaddubsw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -538,13 +538,13 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
|
||||||
;
|
;
|
||||||
; SANDY-LABEL: test_pmulhrsw:
|
; SANDY-LABEL: test_pmulhrsw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pmulhrsw:
|
; HASWELL-LABEL: test_pmulhrsw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
; HASWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pmulhrsw:
|
; BTVER2-LABEL: test_pmulhrsw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -579,14 +579,14 @@ define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
|
||||||
; SANDY-LABEL: test_pshufb:
|
; SANDY-LABEL: test_pshufb:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_pshufb:
|
; HASWELL-LABEL: test_pshufb:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
|
; HASWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
|
; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_pshufb:
|
; BTVER2-LABEL: test_pshufb:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -630,14 +630,14 @@ define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
|
||||||
; SANDY-LABEL: test_psignb:
|
; SANDY-LABEL: test_psignb:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_psignb:
|
; HASWELL-LABEL: test_psignb:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_psignb:
|
; BTVER2-LABEL: test_psignb:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -681,14 +681,14 @@ define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
|
||||||
; SANDY-LABEL: test_psignd:
|
; SANDY-LABEL: test_psignd:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_psignd:
|
; HASWELL-LABEL: test_psignd:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_psignd:
|
; BTVER2-LABEL: test_psignd:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
@ -732,14 +732,14 @@ define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
|
||||||
; SANDY-LABEL: test_psignw:
|
; SANDY-LABEL: test_psignw:
|
||||||
; SANDY: # BB#0:
|
; SANDY: # BB#0:
|
||||||
; SANDY-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; SANDY-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
|
||||||
; SANDY-NEXT: retq # sched: [5:1.00]
|
; SANDY-NEXT: retq # sched: [1:1.00]
|
||||||
;
|
;
|
||||||
; HASWELL-LABEL: test_psignw:
|
; HASWELL-LABEL: test_psignw:
|
||||||
; HASWELL: # BB#0:
|
; HASWELL: # BB#0:
|
||||||
; HASWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
; HASWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
|
; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
|
||||||
; HASWELL-NEXT: retq # sched: [1:1.00]
|
; HASWELL-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; BTVER2-LABEL: test_psignw:
|
; BTVER2-LABEL: test_psignw:
|
||||||
; BTVER2: # BB#0:
|
; BTVER2: # BB#0:
|
||||||
|
|
|
@ -201,14 +201,14 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
|
||||||
; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5
|
; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5
|
||||||
; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8
|
; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8
|
||||||
; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
|
; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
|
||||||
|
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
|
||||||
|
; AVX512DQ-NEXT: vpsraw $4, %ymm5, %ymm9
|
||||||
|
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm9, %ymm5, %ymm3
|
||||||
; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5
|
; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5
|
||||||
; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9
|
; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9
|
||||||
; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
|
; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
|
||||||
; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
|
; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||||
; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
|
; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
|
||||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
|
|
||||||
; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
|
|
||||||
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
|
|
||||||
; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4
|
; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4
|
||||||
; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
|
; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
|
||||||
; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4
|
; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4
|
||||||
|
@ -328,14 +328,14 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
|
||||||
; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5
|
; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5
|
||||||
; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8
|
; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8
|
||||||
; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
|
; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
|
||||||
|
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
|
||||||
|
; AVX512DQ-NEXT: vpsraw $4, %ymm5, %ymm9
|
||||||
|
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm9, %ymm5, %ymm3
|
||||||
; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5
|
; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5
|
||||||
; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9
|
; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9
|
||||||
; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
|
; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
|
||||||
; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
|
; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
|
||||||
; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
|
; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
|
||||||
; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
|
|
||||||
; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
|
|
||||||
; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
|
|
||||||
; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4
|
; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4
|
||||||
; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
|
; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
|
||||||
; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4
|
; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4
|
||||||
|
|
|
@ -68,13 +68,13 @@ define <32 x i16> @shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_1
|
||||||
; KNL-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[8,9,12,13,12,13,10,11,0,1,4,5,4,5,0,1]
|
; KNL-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[8,9,12,13,12,13,10,11,0,1,4,5,4,5,0,1]
|
||||||
; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,1,0,3]
|
; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,1,0,3]
|
||||||
; KNL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[0,3,2,2,4,5,6,7]
|
; KNL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[0,3,2,2,4,5,6,7]
|
||||||
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm1
|
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm5
|
; KNL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||||
; KNL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
|
|
||||||
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,10,11,8,9,14,15,4,5,2,3,2,3,6,7]
|
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,10,11,8,9,14,15,4,5,2,3,2,3,6,7]
|
||||||
; KNL-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[6,7,2,3,4,5,6,7,2,3,2,3,0,1,14,15]
|
; KNL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,7,2,3,4,5,6,7,2,3,2,3,0,1,14,15]
|
||||||
; KNL-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
|
; KNL-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm5
|
||||||
; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
|
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
|
||||||
|
; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm5[1],ymm0[2],ymm5[3],ymm0[4],ymm5[5],ymm0[6],ymm5[7],ymm0[8],ymm5[9],ymm0[10],ymm5[11],ymm0[12],ymm5[13],ymm0[14],ymm5[15]
|
||||||
; KNL-NEXT: vextracti128 $1, %ymm3, %xmm3
|
; KNL-NEXT: vextracti128 $1, %ymm3, %xmm3
|
||||||
; KNL-NEXT: vpbroadcastw %xmm3, %ymm3
|
; KNL-NEXT: vpbroadcastw %xmm3, %ymm3
|
||||||
; KNL-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
|
; KNL-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
|
||||||
|
|
Loading…
Reference in New Issue