forked from OSchip/llvm-project
[AVX512] Use MOVZX32 instead of MOVZ16 for loading single v8/v4/v2/v1 masks when KMOVB is not available. This has better behavior with respect to partial register stalls since it won't need to preserve the upper 16-bits of the GPR.
llvm-svn: 272626
This commit is contained in:
parent
ddab395397
commit
99e30e6a66
|
@ -2050,18 +2050,18 @@ let Predicates = [HasAVX512, NoDQI] in {
|
|||
sub_8bit))>;
|
||||
|
||||
def : Pat<(v8i1 (load addr:$src)),
|
||||
(COPY_TO_REGCLASS (MOVZX16rm8 addr:$src), VK8)>;
|
||||
(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
|
||||
def : Pat<(v2i1 (load addr:$src)),
|
||||
(COPY_TO_REGCLASS (MOVZX16rm8 addr:$src), VK2)>;
|
||||
(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK2)>;
|
||||
def : Pat<(v4i1 (load addr:$src)),
|
||||
(COPY_TO_REGCLASS (MOVZX16rm8 addr:$src), VK4)>;
|
||||
(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK4)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
|
||||
(KMOVWmk addr:$dst, VK16:$src)>;
|
||||
def : Pat<(i1 (load addr:$src)),
|
||||
(COPY_TO_REGCLASS (AND16ri (MOVZX16rm8 addr:$src), (i16 1)), VK1)>;
|
||||
(COPY_TO_REGCLASS (AND32ri (MOVZX32rm8 addr:$src), (i32 1)), VK1)>;
|
||||
def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
|
||||
(KMOVWkm addr:$src)>;
|
||||
}
|
||||
|
|
|
@ -200,7 +200,7 @@ define i16 @test15(i1 *%addr) {
|
|||
}
|
||||
|
||||
;CHECK-LABEL: test16
|
||||
;CHECK: movzbw (%rdi), %ax
|
||||
;CHECK: movzbl (%rdi), %eax
|
||||
;CHECK: kmovw
|
||||
;CHECK: kshiftlw $10
|
||||
;CHECK: korw
|
||||
|
@ -214,8 +214,8 @@ define i16 @test16(i1 *%addr, i16 %a) {
|
|||
}
|
||||
|
||||
;CHECK-LABEL: test17
|
||||
;KNL: movzbw (%rdi), %ax
|
||||
;KNL: andw $1, %ax
|
||||
;KNL: movzbl (%rdi), %eax
|
||||
;KNL: andl $1, %eax
|
||||
;KNL: kshiftlw $4
|
||||
;KNL: korw
|
||||
;SKX: kshiftlb $4
|
||||
|
|
|
@ -88,7 +88,7 @@ define void @mask16_mem(i16* %ptr) {
|
|||
define void @mask8_mem(i8* %ptr) {
|
||||
; KNL-LABEL: mask8_mem:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: movzbw (%rdi), %ax
|
||||
; KNL-NEXT: movzbl (%rdi), %eax
|
||||
; KNL-NEXT: kmovw %eax, %k0
|
||||
; KNL-NEXT: knotw %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
|
@ -1341,7 +1341,7 @@ End:
|
|||
define <8 x i64> @load_8i1(<8 x i1>* %a) {
|
||||
; KNL-LABEL: load_8i1:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: movzbw (%rdi), %ax
|
||||
; KNL-NEXT: movzbl (%rdi), %eax
|
||||
; KNL-NEXT: kmovw %eax, %k1
|
||||
; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: retq
|
||||
|
@ -1376,7 +1376,7 @@ define <16 x i32> @load_16i1(<16 x i1>* %a) {
|
|||
define <2 x i16> @load_2i1(<2 x i1>* %a) {
|
||||
; KNL-LABEL: load_2i1:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: movzbw (%rdi), %ax
|
||||
; KNL-NEXT: movzbl (%rdi), %eax
|
||||
; KNL-NEXT: kmovw %eax, %k1
|
||||
; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: retq
|
||||
|
@ -1394,7 +1394,7 @@ define <2 x i16> @load_2i1(<2 x i1>* %a) {
|
|||
define <4 x i16> @load_4i1(<4 x i1>* %a) {
|
||||
; KNL-LABEL: load_4i1:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: movzbw (%rdi), %ax
|
||||
; KNL-NEXT: movzbl (%rdi), %eax
|
||||
; KNL-NEXT: kmovw %eax, %k1
|
||||
; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; KNL-NEXT: vpmovqd %zmm0, %ymm0
|
||||
|
|
|
@ -84,9 +84,9 @@ define i8 @select05(i8 %a.0, i8 %m) {
|
|||
define i8 @select05_mem(<8 x i1>* %a.0, <8 x i1>* %m) {
|
||||
; CHECK-LABEL: select05_mem:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbw (%rsi), %ax
|
||||
; CHECK-NEXT: movzbl (%rsi), %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k0
|
||||
; CHECK-NEXT: movzbw (%rdi), %ax
|
||||
; CHECK-NEXT: movzbl (%rdi), %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: korw %k1, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
|
@ -114,9 +114,9 @@ define i8 @select06(i8 %a.0, i8 %m) {
|
|||
define i8 @select06_mem(<8 x i1>* %a.0, <8 x i1>* %m) {
|
||||
; CHECK-LABEL: select06_mem:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: movzbw (%rsi), %ax
|
||||
; CHECK-NEXT: movzbl (%rsi), %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k0
|
||||
; CHECK-NEXT: movzbw (%rdi), %ax
|
||||
; CHECK-NEXT: movzbl (%rdi), %eax
|
||||
; CHECK-NEXT: kmovw %eax, %k1
|
||||
; CHECK-NEXT: kandw %k1, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
|
|
|
@ -291,7 +291,7 @@ define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) {
|
|||
; KNL_32-LABEL: test7:
|
||||
; KNL_32: # BB#0:
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; KNL_32-NEXT: movzbw {{[0-9]+}}(%esp), %cx
|
||||
; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
|
||||
; KNL_32-NEXT: kmovw %ecx, %k1
|
||||
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
|
||||
; KNL_32-NEXT: kmovw %k1, %k2
|
||||
|
|
Loading…
Reference in New Issue