forked from OSchip/llvm-project
AVX512: Add store mask patterns.
Differential Revision: http://reviews.llvm.org/D16596 llvm-svn: 258914
This commit is contained in:
parent
0747c5c808
commit
d6c187b038
|
@ -1412,9 +1412,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom);
|
||||
if (Subtarget.hasDQI()) {
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
|
||||
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
|
||||
|
@ -1708,6 +1705,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
|
||||
addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
|
||||
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
|
||||
|
|
|
@ -2059,9 +2059,6 @@ let Predicates = [HasBWI] in {
|
|||
VEX, PD, VEX_W;
|
||||
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
|
||||
VEX, XD;
|
||||
}
|
||||
|
||||
let Predicates = [HasBWI] in {
|
||||
defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
|
||||
VEX, PS, VEX_W;
|
||||
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
|
||||
|
@ -2101,8 +2098,27 @@ let Predicates = [HasDQI] in {
|
|||
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
|
||||
def : Pat<(store VK2:$src, addr:$dst),
|
||||
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
|
||||
def : Pat<(store VK1:$src, addr:$dst),
|
||||
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
|
||||
}
|
||||
let Predicates = [HasAVX512, NoDQI] in {
|
||||
def : Pat<(store VK1:$src, addr:$dst),
|
||||
(MOV8mr addr:$dst,
|
||||
(EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
|
||||
sub_8bit))>;
|
||||
def : Pat<(store VK2:$src, addr:$dst),
|
||||
(MOV8mr addr:$dst,
|
||||
(EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK2:$src, VK16)),
|
||||
sub_8bit))>;
|
||||
def : Pat<(store VK4:$src, addr:$dst),
|
||||
(MOV8mr addr:$dst,
|
||||
(EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK4:$src, VK16)),
|
||||
sub_8bit))>;
|
||||
def : Pat<(store VK8:$src, addr:$dst),
|
||||
(MOV8mr addr:$dst,
|
||||
(EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
|
||||
sub_8bit))>;
|
||||
|
||||
def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
|
||||
(KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK8:$src, VK16))>;
|
||||
def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
|
||||
|
@ -2182,6 +2198,17 @@ def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
|
|||
def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK64)>;
|
||||
|
||||
def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
|
||||
def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
|
||||
def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
|
||||
|
||||
def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
|
||||
}]>;
|
||||
|
||||
def : Pat<(truncstorei1 GR8:$src, addr:$dst),
|
||||
(MOV8mr addr:$dst, GR8:$src)>;
|
||||
|
||||
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
|
||||
let Predicates = [HasAVX512, NoDQI] in {
|
||||
|
@ -6562,28 +6589,6 @@ defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd
|
|||
def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
|
||||
def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
|
||||
|
||||
def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
|
||||
def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
|
||||
def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
|
||||
|
||||
def : Pat<(store VK1:$src, addr:$dst),
|
||||
(MOV8mr addr:$dst,
|
||||
(EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
|
||||
sub_8bit))>, Requires<[HasAVX512, NoDQI]>;
|
||||
|
||||
def : Pat<(store VK8:$src, addr:$dst),
|
||||
(MOV8mr addr:$dst,
|
||||
(EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
|
||||
sub_8bit))>, Requires<[HasAVX512, NoDQI]>;
|
||||
|
||||
def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
|
||||
}]>;
|
||||
|
||||
def : Pat<(truncstorei1 GR8:$src, addr:$dst),
|
||||
(MOV8mr addr:$dst, GR8:$src)>;
|
||||
|
||||
multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
|
||||
def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
|
||||
!strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
|
||||
|
|
|
@ -1442,3 +1442,183 @@ define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
|
|||
store <2 x i1> %a, <2 x i1>* %addr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
|
||||
; KNL-LABEL: store_v1i1:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: andl $1, %edi
|
||||
; KNL-NEXT: kmovw %edi, %k0
|
||||
; KNL-NEXT: kxnorw %k0, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: movb %al, (%rsi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: store_v1i1:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: andl $1, %edi
|
||||
; SKX-NEXT: kmovw %edi, %k0
|
||||
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
||||
; SKX-NEXT: kshiftrw $15, %k1, %k1
|
||||
; SKX-NEXT: kxorw %k1, %k0, %k0
|
||||
; SKX-NEXT: kmovb %k0, (%rsi)
|
||||
; SKX-NEXT: retq
|
||||
%x = xor <1 x i1> %c, <i1 1>
|
||||
store <1 x i1> %x, <1 x i1>* %ptr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
|
||||
; KNL-LABEL: store_v2i1:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
|
||||
; KNL-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: movb %al, (%rdi)
|
||||
; KNL-NEXT: vmovq %xmm0, %rax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: movb %al, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: store_v2i1:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovq2m %xmm0, %k0
|
||||
; SKX-NEXT: knotw %k0, %k0
|
||||
; SKX-NEXT: kmovb %k0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = xor <2 x i1> %c, <i1 1, i1 1>
|
||||
store <2 x i1> %x, <2 x i1>* %ptr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
|
||||
; KNL-LABEL: store_v4i1:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
|
||||
; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpextrd $3, %xmm0, %eax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: movb %al, (%rdi)
|
||||
; KNL-NEXT: vpextrd $2, %xmm0, %eax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: movb %al, (%rdi)
|
||||
; KNL-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: movb %al, (%rdi)
|
||||
; KNL-NEXT: vmovd %xmm0, %eax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: movb %al, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: store_v4i1:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
||||
; SKX-NEXT: knotw %k0, %k0
|
||||
; SKX-NEXT: kmovb %k0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
|
||||
store <4 x i1> %x, <4 x i1>* %ptr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
|
||||
; KNL-LABEL: store_v8i1:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
|
||||
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
|
||||
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: knotw %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: movb %al, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: store_v8i1:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovw2m %xmm0, %k0
|
||||
; SKX-NEXT: knotb %k0, %k0
|
||||
; SKX-NEXT: kmovb %k0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
|
||||
store <8 x i1> %x, <8 x i1>* %ptr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
|
||||
; KNL-LABEL: store_v16i1:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: knotw %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: store_v16i1:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovb2m %xmm0, %k0
|
||||
; SKX-NEXT: knotw %k0, %k0
|
||||
; SKX-NEXT: kmovw %k0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
%x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
|
||||
store <16 x i1> %x, <16 x i1>* %ptr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
;void f2(int);
|
||||
;void f1(int c)
|
||||
;{
|
||||
; static int v = 0;
|
||||
; if (v == 0)
|
||||
; v = 1;
|
||||
; else
|
||||
; v = 0;
|
||||
; f2(v);
|
||||
;}
|
||||
|
||||
@f1.v = internal unnamed_addr global i1 false, align 4
|
||||
|
||||
define void @f1(i32 %c) {
|
||||
; KNL-LABEL: f1:
|
||||
; KNL: ## BB#0: ## %entry
|
||||
; KNL-NEXT: movzbl {{.*}}(%rip), %edi
|
||||
; KNL-NEXT: andl $1, %edi
|
||||
; KNL-NEXT: movl %edi, %eax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: kmovw %eax, %k0
|
||||
; KNL-NEXT: kxnorw %k0, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kxorw %k1, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: movb %al, {{.*}}(%rip)
|
||||
; KNL-NEXT: xorl $1, %edi
|
||||
; KNL-NEXT: jmp _f2 ## TAILCALL
|
||||
;
|
||||
; SKX-LABEL: f1:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: movzbl {{.*}}(%rip), %edi
|
||||
; SKX-NEXT: andl $1, %edi
|
||||
; SKX-NEXT: movl %edi, %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: kmovw %eax, %k0
|
||||
; SKX-NEXT: kxnorw %k0, %k0, %k1
|
||||
; SKX-NEXT: kshiftrw $15, %k1, %k1
|
||||
; SKX-NEXT: kxorw %k1, %k0, %k0
|
||||
; SKX-NEXT: kmovb %k0, {{.*}}(%rip)
|
||||
; SKX-NEXT: xorl $1, %edi
|
||||
; SKX-NEXT: jmp _f2 ## TAILCALL
|
||||
entry:
|
||||
%.b1 = load i1, i1* @f1.v, align 4
|
||||
%not..b1 = xor i1 %.b1, true
|
||||
store i1 %not..b1, i1* @f1.v, align 4
|
||||
%0 = zext i1 %not..b1 to i32
|
||||
tail call void @f2(i32 %0) #2
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @f2(i32) #1
|
||||
|
||||
|
|
Loading…
Reference in New Issue