forked from OSchip/llvm-project
[AVX512] Add avx512bw command lines to 128-bit idiv tests.
The multiply lowering on some of the tests can take advantage of the vpmovwb to simplify the truncate. llvm-svn: 314448
This commit is contained in:
parent
3819be6cf6
commit
56bfbfb117
|
@ -2,7 +2,8 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2NOBW
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX512BW
|
||||
|
||||
;
|
||||
; sdiv by 7
|
||||
|
@ -585,32 +586,55 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
|
|||
; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_rem7_16i8:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpmovsxbw %xmm0, %ymm1
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsrlw $2, %xmm1, %xmm2
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpsrlw $7, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpaddb %xmm1, %xmm2, %xmm1
|
||||
; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
|
||||
; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2NOBW-LABEL: test_rem7_16i8:
|
||||
; AVX2NOBW: # BB#0:
|
||||
; AVX2NOBW-NEXT: vpmovsxbw %xmm0, %ymm1
|
||||
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpaddb %xmm0, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpsrlw $2, %xmm1, %xmm2
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX2NOBW-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||
; AVX2NOBW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX2NOBW-NEXT: vpsrlw $7, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpaddb %xmm1, %xmm2, %xmm1
|
||||
; AVX2NOBW-NEXT: vpmovsxbw %xmm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
|
||||
; AVX2NOBW-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX2NOBW-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; AVX2NOBW-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
; AVX2NOBW-NEXT: vzeroupper
|
||||
; AVX2NOBW-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_rem7_16i8:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm1
|
||||
; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512BW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpaddb %xmm0, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsrlw $2, %xmm1, %xmm2
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
|
||||
; AVX512BW-NEXT: vpxor %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpsrlw $7, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpaddb %xmm1, %xmm2, %xmm1
|
||||
; AVX512BW-NEXT: vpmovsxbw %xmm1, %ymm1
|
||||
; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
|
||||
; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
%res = srem <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
|
|
@ -2,7 +2,8 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX2NOBW
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=AVX512BW
|
||||
|
||||
;
|
||||
; udiv by 7
|
||||
|
@ -556,29 +557,49 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind {
|
|||
; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: test_rem7_16i8:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm2
|
||||
; AVX2-NEXT: vpsrlw $1, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpaddb %xmm1, %xmm2, %xmm1
|
||||
; AVX2-NEXT: vpsrlw $2, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1
|
||||
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
|
||||
; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2NOBW-LABEL: test_rem7_16i8:
|
||||
; AVX2NOBW: # BB#0:
|
||||
; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpsubb %xmm1, %xmm0, %xmm2
|
||||
; AVX2NOBW-NEXT: vpsrlw $1, %xmm2, %xmm2
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX2NOBW-NEXT: vpaddb %xmm1, %xmm2, %xmm1
|
||||
; AVX2NOBW-NEXT: vpsrlw $2, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpmovsxbw %xmm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX2NOBW-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
|
||||
; AVX2NOBW-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX2NOBW-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX2NOBW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
|
||||
; AVX2NOBW-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
; AVX2NOBW-NEXT: vzeroupper
|
||||
; AVX2NOBW-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_rem7_16i8:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
|
||||
; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512BW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm2
|
||||
; AVX512BW-NEXT: vpsrlw $1, %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
|
||||
; AVX512BW-NEXT: vpaddb %xmm1, %xmm2, %xmm1
|
||||
; AVX512BW-NEXT: vpsrlw $2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpmovsxbw %xmm1, %ymm1
|
||||
; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
|
||||
; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
%res = urem <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue