[X86] Add 128 and 256-bit VPOPCNTDQ instructions. Adjust some tablegen classes LZCNT/POPCNT.

I think when this instruction was first published it was only for a Knights CPU and thus VLX version was missing.

llvm-svn: 320910
This commit is contained in:
Craig Topper 2017-12-16 02:40:28 +00:00
parent 12f9b8cf24
commit c08960597c
5 changed files with 324 additions and 64 deletions

View File

@ -9563,82 +9563,50 @@ let Predicates = [HasAVX512, NoVLX] in {
sub_xmm)>;
}
multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, OpndItins itins,
Predicate prd> {
defm NAME : avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, itins, prd>;
// Use 512bit version to implement 128/256 bit.
multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
AVX512VLVectorVTInfo _, Predicate prd> {
let Predicates = [prd, NoVLX] in {
def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
(EXTRACT_SUBREG
(!cast<Instruction>(InstrStr # "Zrr")
(INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
_.info256.RC:$src1,
_.info256.SubRegIdx)),
_.info256.SubRegIdx)>;
def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
(EXTRACT_SUBREG
(!cast<Instruction>(InstrStr # "Zrr")
(INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
_.info128.RC:$src1,
_.info128.SubRegIdx)),
_.info128.SubRegIdx)>;
}
}
// FIXME: Is there a better scheduler itinerary for VPLZCNT?
defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", SSE_INTALU_ITINS_P, HasCDI>;
defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
SSE_INTALU_ITINS_P, HasCDI>;
// FIXME: Is there a better scheduler itinerary for VPCONFLICT?
defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
SSE_INTALU_ITINS_P, HasCDI>;
// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
let Predicates = [HasCDI, NoVLX] in {
def : Pat<(v4i64 (ctlz VR256X:$src)),
(EXTRACT_SUBREG
(VPLZCNTQZrr
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
sub_ymm)>;
def : Pat<(v2i64 (ctlz VR128X:$src)),
(EXTRACT_SUBREG
(VPLZCNTQZrr
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
sub_xmm)>;
def : Pat<(v8i32 (ctlz VR256X:$src)),
(EXTRACT_SUBREG
(VPLZCNTDZrr
(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
sub_ymm)>;
def : Pat<(v4i32 (ctlz VR128X:$src)),
(EXTRACT_SUBREG
(VPLZCNTDZrr
(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
sub_xmm)>;
}
defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
//===---------------------------------------------------------------------===//
// Counts number of ones - VPOPCNTD and VPOPCNTQ
//===---------------------------------------------------------------------===//
multiclass avx512_unary_rmb_popcnt<bits<8> opc, string OpcodeStr,
OpndItins itins, X86VectorVTInfo VTInfo> {
let Predicates = [HasVPOPCNTDQ] in
defm Z : avx512_unary_rmb<opc, OpcodeStr, ctpop, itins, VTInfo>, EVEX_V512;
}
// Use 512bit version to implement 128/256 bit.
multiclass avx512_unary_lowering<SDNode OpNode, AVX512VLVectorVTInfo _, Predicate prd> {
let Predicates = [prd] in {
def Z256_Alt : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
(EXTRACT_SUBREG
(!cast<Instruction>(NAME # "Zrr")
(INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
_.info256.RC:$src1,
_.info256.SubRegIdx)),
_.info256.SubRegIdx)>;
def Z128_Alt : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
(EXTRACT_SUBREG
(!cast<Instruction>(NAME # "Zrr")
(INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
_.info128.RC:$src1,
_.info128.SubRegIdx)),
_.info128.SubRegIdx)>;
}
}
// FIXME: Is there a better scheduler itinerary for VPOPCNTD/VPOPCNTQ?
defm VPOPCNTD : avx512_unary_rmb_popcnt<0x55, "vpopcntd", SSE_INTALU_ITINS_P,
v16i32_info>,
avx512_unary_lowering<ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
SSE_INTALU_ITINS_P, HasVPOPCNTDQ>;
defm VPOPCNTQ : avx512_unary_rmb_popcnt<0x55, "vpopcntq", SSE_INTALU_ITINS_P,
v8i64_info>,
avx512_unary_lowering<ctpop, avx512vl_i64_info, HasVPOPCNTDQ>, VEX_W;
defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
//===---------------------------------------------------------------------===//
// Replicate Single FP - MOVSHDUP and MOVSLDUP
@ -10631,11 +10599,12 @@ defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SSE_PMADD>;
// FIXME: Is there a better scheduler itinerary for VPOPCNTB/VPOPCNTW?
defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SSE_INTALU_ITINS_P,
avx512vl_i8_info, HasBITALG>,
avx512_unary_lowering<ctpop, avx512vl_i8_info, HasBITALG>;
avx512vl_i8_info, HasBITALG>;
defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SSE_INTALU_ITINS_P,
avx512vl_i16_info, HasBITALG>,
avx512_unary_lowering<ctpop, avx512vl_i16_info, HasBITALG>, VEX_W;
avx512vl_i16_info, HasBITALG>, VEX_W;
defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
multiclass VPSHUFBITQMB_rm<OpndItins itins, X86VectorVTInfo VTI> {
defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),

View File

@ -6,6 +6,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=BITALG_NOVLX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG
@ -120,6 +121,11 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv2i64:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv2i64:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
@ -290,6 +296,11 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv4i32:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv4i32:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
@ -448,6 +459,14 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv8i16:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512VPOPCNTDQVL-NEXT: vpopcntq %zmm0, %zmm0
; AVX512VPOPCNTDQVL-NEXT: vpmovqw %zmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vzeroupper
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv8i16:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
@ -565,6 +584,14 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv16i8:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vzeroupper
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv16i8:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0

View File

@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=BITALG_NOVLX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG
@ -50,6 +51,11 @@ define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv4i64:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv4i64:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
@ -138,6 +144,11 @@ define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv8i32:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv8i32:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
@ -226,6 +237,13 @@ define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv16i16:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv16i16:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
@ -286,6 +304,18 @@ define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv32i8:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2
; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv32i8:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0

View File

@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CDVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,-avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CD
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VPOPCNTDQ
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VPOPCNTDQVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=BITALG_NOVLX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG
;
@ -137,6 +138,16 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv2i64:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubq %xmm0, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv2i64:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
@ -358,6 +369,16 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv2i64u:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubq %xmm0, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpopcntq %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv2i64u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
@ -629,6 +650,16 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv4i32:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubd %xmm0, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv4i32:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
@ -890,6 +921,16 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv4i32u:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubd %xmm0, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpopcntd %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv4i32u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
@ -1096,6 +1137,19 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv8i16:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubw %xmm0, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512VPOPCNTDQVL-NEXT: vpopcntq %zmm0, %zmm0
; AVX512VPOPCNTDQVL-NEXT: vpmovqw %zmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vzeroupper
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv8i16:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
@ -1278,6 +1332,19 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv8i16u:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubw %xmm0, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512VPOPCNTDQVL-NEXT: vpopcntq %zmm0, %zmm0
; AVX512VPOPCNTDQVL-NEXT: vpmovqw %zmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vzeroupper
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv8i16u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
@ -1441,6 +1508,19 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv16i8:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubb %xmm0, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vzeroupper
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv16i8:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
@ -1600,6 +1680,19 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vzeroupper
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv16i8u:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubb %xmm0, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
; AVX512VPOPCNTDQVL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512VPOPCNTDQVL-NEXT: vzeroupper
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv16i8u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
@ -1663,6 +1756,12 @@ define <2 x i64> @foldv2i64() nounwind {
; AVX512VPOPCNTDQ-NEXT: vmovq %rax, %xmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: foldv2i64:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: movl $8, %eax
; AVX512VPOPCNTDQVL-NEXT: vmovq %rax, %xmm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: foldv2i64:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: movl $8, %eax
@ -1703,6 +1802,12 @@ define <2 x i64> @foldv2i64u() nounwind {
; AVX512VPOPCNTDQ-NEXT: vmovq %rax, %xmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: foldv2i64u:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: movl $8, %eax
; AVX512VPOPCNTDQVL-NEXT: vmovq %rax, %xmm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: foldv2i64u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: movl $8, %eax
@ -1740,6 +1845,11 @@ define <4 x i32> @foldv4i32() nounwind {
; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: foldv4i32:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: foldv4i32:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
@ -1774,6 +1884,11 @@ define <4 x i32> @foldv4i32u() nounwind {
; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: foldv4i32u:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: foldv4i32u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
@ -1808,6 +1923,11 @@ define <8 x i16> @foldv8i16() nounwind {
; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: foldv8i16:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: foldv8i16:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
@ -1842,6 +1962,11 @@ define <8 x i16> @foldv8i16u() nounwind {
; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: foldv8i16u:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: foldv8i16u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
@ -1876,6 +2001,11 @@ define <16 x i8> @foldv16i8() nounwind {
; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: foldv16i8:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: foldv16i8:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
@ -1910,6 +2040,11 @@ define <16 x i8> @foldv16i8u() nounwind {
; AVX512VPOPCNTDQ-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: foldv16i8u:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: foldv16i8u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CDVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,-avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CD
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQVL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=BITALG_NOVLX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG
;
@ -106,6 +107,16 @@ define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv4i64:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubq %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv4i64:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
@ -242,6 +253,16 @@ define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv4i64u:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubq %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpopcntq %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv4i64u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
@ -414,6 +435,16 @@ define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv8i32:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubd %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv8i32:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
@ -575,6 +606,16 @@ define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv8i32u:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubd %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpopcntd %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv8i32u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
@ -751,6 +792,18 @@ define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv16i16:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubw %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv16i16:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
@ -902,6 +955,18 @@ define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv16i16u:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubw %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512VPOPCNTDQVL-NEXT: vpopcntd %zmm0, %zmm0
; AVX512VPOPCNTDQVL-NEXT: vpmovdw %zmm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv16i16u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
@ -1043,6 +1108,23 @@ define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv32i8:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubb %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2
; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv32i8:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
@ -1181,6 +1263,23 @@ define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: testv32i8u:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQVL-NEXT: vpsubb %ymm0, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm2
; AVX512VPOPCNTDQVL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm2, %ymm3, %ymm2
; AVX512VPOPCNTDQVL-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX512VPOPCNTDQVL-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: testv32i8u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1