forked from OSchip/llvm-project
[AVX512] Don't use 32-bit elements version of AND/OR/XOR/ANDN during isel unless we're matching a masked op or broadcast
Selecting 32-bit element logical ops without a select or broadcast requires matching a bitconvert on the inputs to the and. But that's a weird thing to rely on. It's entirely possible that one of the inputs doesn't have a bitcast and one does. Since there's no functional difference, just remove the extra patterns and save some isel table size. Differential Revision: https://reviews.llvm.org/D36854 llvm-svn: 312138
This commit is contained in:
parent
89e8d5e955
commit
afce0baacd
|
@ -5094,41 +5094,51 @@ let Predicates = [HasDQI, NoVLX] in {
|
||||||
// AVX-512 Logical Instructions
|
// AVX-512 Logical Instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
// OpNodeMsk is the OpNode to use when element size is important. OpNode will
|
||||||
X86VectorVTInfo _, bit IsCommutable = 0> {
|
// be set to null_frag for 32-bit elements.
|
||||||
|
multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
|
||||||
|
SDPatternOperator OpNode,
|
||||||
|
SDNode OpNodeMsk, X86VectorVTInfo _,
|
||||||
|
bit IsCommutable = 0> {
|
||||||
|
let hasSideEffects = 0 in
|
||||||
defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||||
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
|
||||||
"$src2, $src1", "$src1, $src2",
|
"$src2, $src1", "$src1, $src2",
|
||||||
(_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
|
(_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
|
||||||
(bitconvert (_.VT _.RC:$src2)))),
|
(bitconvert (_.VT _.RC:$src2)))),
|
||||||
(_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
|
(_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
|
||||||
_.RC:$src2)))),
|
_.RC:$src2)))),
|
||||||
IIC_SSE_BIT_P_RR, IsCommutable>,
|
IIC_SSE_BIT_P_RR, IsCommutable>,
|
||||||
AVX512BIBase, EVEX_4V;
|
AVX512BIBase, EVEX_4V;
|
||||||
|
|
||||||
|
let hasSideEffects = 0, mayLoad = 1 in
|
||||||
defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||||
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
|
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
|
||||||
"$src2, $src1", "$src1, $src2",
|
"$src2, $src1", "$src1, $src2",
|
||||||
(_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
|
(_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
|
||||||
(bitconvert (_.LdFrag addr:$src2)))),
|
(bitconvert (_.LdFrag addr:$src2)))),
|
||||||
(_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
|
(_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
|
||||||
(bitconvert (_.LdFrag addr:$src2)))))),
|
(bitconvert (_.LdFrag addr:$src2)))))),
|
||||||
IIC_SSE_BIT_P_RM>,
|
IIC_SSE_BIT_P_RM>,
|
||||||
AVX512BIBase, EVEX_4V;
|
AVX512BIBase, EVEX_4V;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
// OpNodeMsk is the OpNode to use where element size is important. So use
|
||||||
X86VectorVTInfo _, bit IsCommutable = 0> :
|
// for all of the broadcast patterns.
|
||||||
avx512_logic_rm<opc, OpcodeStr, OpNode, _, IsCommutable> {
|
multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
|
||||||
|
SDPatternOperator OpNode,
|
||||||
|
SDNode OpNodeMsk, X86VectorVTInfo _,
|
||||||
|
bit IsCommutable = 0> :
|
||||||
|
avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, _, IsCommutable> {
|
||||||
defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||||
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
|
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
|
||||||
"${src2}"##_.BroadcastStr##", $src1",
|
"${src2}"##_.BroadcastStr##", $src1",
|
||||||
"$src1, ${src2}"##_.BroadcastStr,
|
"$src1, ${src2}"##_.BroadcastStr,
|
||||||
(_.i64VT (OpNode _.RC:$src1,
|
(_.i64VT (OpNodeMsk _.RC:$src1,
|
||||||
(bitconvert
|
(bitconvert
|
||||||
(_.VT (X86VBroadcast
|
(_.VT (X86VBroadcast
|
||||||
(_.ScalarLdFrag addr:$src2)))))),
|
(_.ScalarLdFrag addr:$src2)))))),
|
||||||
(_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
|
(_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
|
||||||
(bitconvert
|
(bitconvert
|
||||||
(_.VT (X86VBroadcast
|
(_.VT (X86VBroadcast
|
||||||
(_.ScalarLdFrag addr:$src2)))))))),
|
(_.ScalarLdFrag addr:$src2)))))))),
|
||||||
|
@ -5136,38 +5146,30 @@ multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||||
AVX512BIBase, EVEX_4V, EVEX_B;
|
AVX512BIBase, EVEX_4V, EVEX_B;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
|
||||||
AVX512VLVectorVTInfo VTInfo,
|
SDPatternOperator OpNode,
|
||||||
|
SDNode OpNodeMsk, AVX512VLVectorVTInfo VTInfo,
|
||||||
bit IsCommutable = 0> {
|
bit IsCommutable = 0> {
|
||||||
let Predicates = [HasAVX512] in
|
let Predicates = [HasAVX512] in
|
||||||
defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info512,
|
defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, VTInfo.info512,
|
||||||
IsCommutable>, EVEX_V512;
|
IsCommutable>, EVEX_V512;
|
||||||
|
|
||||||
let Predicates = [HasAVX512, HasVLX] in {
|
let Predicates = [HasAVX512, HasVLX] in {
|
||||||
defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
|
defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk,
|
||||||
IsCommutable>, EVEX_V256;
|
VTInfo.info256, IsCommutable>, EVEX_V256;
|
||||||
defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
|
defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk,
|
||||||
IsCommutable>, EVEX_V128;
|
VTInfo.info128, IsCommutable>, EVEX_V128;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass avx512_logic_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|
||||||
bit IsCommutable = 0> {
|
|
||||||
defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
|
|
||||||
IsCommutable>, EVEX_CD8<32, CD8VF>;
|
|
||||||
}
|
|
||||||
|
|
||||||
multiclass avx512_logic_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|
||||||
bit IsCommutable = 0> {
|
|
||||||
defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
|
|
||||||
IsCommutable>,
|
|
||||||
VEX_W, EVEX_CD8<64, CD8VF>;
|
|
||||||
}
|
|
||||||
|
|
||||||
multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
|
multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
|
||||||
SDNode OpNode, bit IsCommutable = 0> {
|
SDNode OpNode, bit IsCommutable = 0> {
|
||||||
defm Q : avx512_logic_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, IsCommutable>;
|
defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode,
|
||||||
defm D : avx512_logic_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, IsCommutable>;
|
avx512vl_i64_info, IsCommutable>,
|
||||||
|
VEX_W, EVEX_CD8<64, CD8VF>;
|
||||||
|
defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode,
|
||||||
|
avx512vl_i32_info, IsCommutable>,
|
||||||
|
EVEX_CD8<32, CD8VF>;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, 1>;
|
defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, 1>;
|
||||||
|
|
|
@ -607,17 +607,17 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
|
||||||
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
|
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
|
||||||
; AVX512F-LABEL: andd512fold:
|
; AVX512F-LABEL: andd512fold:
|
||||||
; AVX512F: # BB#0: # %entry
|
; AVX512F: # BB#0: # %entry
|
||||||
; AVX512F-NEXT: vpandd (%rdi), %zmm0, %zmm0
|
; AVX512F-NEXT: vpandq (%rdi), %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: andd512fold:
|
; AVX512VL-LABEL: andd512fold:
|
||||||
; AVX512VL: # BB#0: # %entry
|
; AVX512VL: # BB#0: # %entry
|
||||||
; AVX512VL-NEXT: vpandd (%rdi), %zmm0, %zmm0
|
; AVX512VL-NEXT: vpandq (%rdi), %zmm0, %zmm0
|
||||||
; AVX512VL-NEXT: retq
|
; AVX512VL-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512BW-LABEL: andd512fold:
|
; AVX512BW-LABEL: andd512fold:
|
||||||
; AVX512BW: # BB#0: # %entry
|
; AVX512BW: # BB#0: # %entry
|
||||||
; AVX512BW-NEXT: vpandd (%rdi), %zmm0, %zmm0
|
; AVX512BW-NEXT: vpandq (%rdi), %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: retq
|
; AVX512BW-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512DQ-LABEL: andd512fold:
|
; AVX512DQ-LABEL: andd512fold:
|
||||||
|
|
|
@ -959,7 +959,7 @@ define void @test_storent_ps_512(<16 x float> %data, i8* %ptr) {
|
||||||
define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
|
define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
|
||||||
; CHECK-LABEL: test_xor_epi32:
|
; CHECK-LABEL: test_xor_epi32:
|
||||||
; CHECK: ## BB#0:
|
; CHECK: ## BB#0:
|
||||||
; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0
|
; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
|
||||||
ret < 16 x i32> %res
|
ret < 16 x i32> %res
|
||||||
|
@ -981,7 +981,7 @@ declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16
|
||||||
define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
|
define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
|
||||||
; CHECK-LABEL: test_or_epi32:
|
; CHECK-LABEL: test_or_epi32:
|
||||||
; CHECK: ## BB#0:
|
; CHECK: ## BB#0:
|
||||||
; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
|
; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
|
%res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
|
||||||
ret < 16 x i32> %res
|
ret < 16 x i32> %res
|
||||||
|
@ -1003,7 +1003,7 @@ declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x
|
||||||
define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
|
define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
|
||||||
; CHECK-LABEL: test_and_epi32:
|
; CHECK-LABEL: test_and_epi32:
|
||||||
; CHECK: ## BB#0:
|
; CHECK: ## BB#0:
|
||||||
; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0
|
; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
|
%res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
|
||||||
ret < 16 x i32> %res
|
ret < 16 x i32> %res
|
||||||
|
|
|
@ -7,7 +7,7 @@ define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnon
|
||||||
; ALL-LABEL: vpandd:
|
; ALL-LABEL: vpandd:
|
||||||
; ALL: ## BB#0: ## %entry
|
; ALL: ## BB#0: ## %entry
|
||||||
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||||
; ALL-NEXT: vpandd %zmm1, %zmm0, %zmm0
|
; ALL-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||||
; ALL-NEXT: retq
|
; ALL-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
; Force the execution domain with an add.
|
; Force the execution domain with an add.
|
||||||
|
@ -21,7 +21,7 @@ define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readno
|
||||||
; ALL-LABEL: vpandnd:
|
; ALL-LABEL: vpandnd:
|
||||||
; ALL: ## BB#0: ## %entry
|
; ALL: ## BB#0: ## %entry
|
||||||
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||||
; ALL-NEXT: vpandnd %zmm0, %zmm1, %zmm0
|
; ALL-NEXT: vpandnq %zmm0, %zmm1, %zmm0
|
||||||
; ALL-NEXT: retq
|
; ALL-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
; Force the execution domain with an add.
|
; Force the execution domain with an add.
|
||||||
|
@ -37,7 +37,7 @@ define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone
|
||||||
; ALL-LABEL: vpord:
|
; ALL-LABEL: vpord:
|
||||||
; ALL: ## BB#0: ## %entry
|
; ALL: ## BB#0: ## %entry
|
||||||
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||||
; ALL-NEXT: vpord %zmm1, %zmm0, %zmm0
|
; ALL-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; ALL-NEXT: retq
|
; ALL-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
; Force the execution domain with an add.
|
; Force the execution domain with an add.
|
||||||
|
@ -51,7 +51,7 @@ define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnon
|
||||||
; ALL-LABEL: vpxord:
|
; ALL-LABEL: vpxord:
|
||||||
; ALL: ## BB#0: ## %entry
|
; ALL: ## BB#0: ## %entry
|
||||||
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||||
; ALL-NEXT: vpxord %zmm1, %zmm0, %zmm0
|
; ALL-NEXT: vpxorq %zmm1, %zmm0, %zmm0
|
||||||
; ALL-NEXT: retq
|
; ALL-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
; Force the execution domain with an add.
|
; Force the execution domain with an add.
|
||||||
|
@ -132,7 +132,7 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
|
||||||
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
|
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
|
||||||
; KNL-LABEL: andd512fold:
|
; KNL-LABEL: andd512fold:
|
||||||
; KNL: ## BB#0: ## %entry
|
; KNL: ## BB#0: ## %entry
|
||||||
; KNL-NEXT: vpandd (%rdi), %zmm0, %zmm0
|
; KNL-NEXT: vpandq (%rdi), %zmm0, %zmm0
|
||||||
; KNL-NEXT: retq
|
; KNL-NEXT: retq
|
||||||
;
|
;
|
||||||
; SKX-LABEL: andd512fold:
|
; SKX-LABEL: andd512fold:
|
||||||
|
|
|
@ -1335,7 +1335,7 @@ define <16 x i32> @f16xi32_i128(<16 x i32> %a) {
|
||||||
; AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
; AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||||
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||||
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
|
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: retl
|
; AVX512-NEXT: retl
|
||||||
;
|
;
|
||||||
; AVX-64-LABEL: f16xi32_i128:
|
; AVX-64-LABEL: f16xi32_i128:
|
||||||
|
@ -1369,7 +1369,7 @@ define <16 x i32> @f16xi32_i128(<16 x i32> %a) {
|
||||||
; AVX512F-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
; AVX512F-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||||
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
|
||||||
; AVX512F-64-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
; AVX512F-64-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||||
; AVX512F-64-NEXT: vpandd %zmm1, %zmm0, %zmm0
|
; AVX512F-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512F-64-NEXT: retq
|
; AVX512F-64-NEXT: retq
|
||||||
%res1 = add <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %a
|
%res1 = add <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %a
|
||||||
%res2 = and <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %res1
|
%res2 = and <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %res1
|
||||||
|
|
|
@ -2051,17 +2051,17 @@ define <16 x i32> @test_bitreverse_v16i32(<16 x i32> %a) nounwind {
|
||||||
; AVX512F-NEXT: vpslld $4, %zmm1, %zmm1
|
; AVX512F-NEXT: vpslld $4, %zmm1, %zmm1
|
||||||
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vpsrld $4, %zmm0, %zmm0
|
; AVX512F-NEXT: vpsrld $4, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
|
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1
|
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1
|
||||||
; AVX512F-NEXT: vpslld $2, %zmm1, %zmm1
|
; AVX512F-NEXT: vpslld $2, %zmm1, %zmm1
|
||||||
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vpsrld $2, %zmm0, %zmm0
|
; AVX512F-NEXT: vpsrld $2, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
|
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1
|
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1
|
||||||
; AVX512F-NEXT: vpslld $1, %zmm1, %zmm1
|
; AVX512F-NEXT: vpslld $1, %zmm1, %zmm1
|
||||||
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vpsrld $1, %zmm0, %zmm0
|
; AVX512F-NEXT: vpsrld $1, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
|
; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512F-NEXT: retq
|
; AVX512F-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512BW-LABEL: test_bitreverse_v16i32:
|
; AVX512BW-LABEL: test_bitreverse_v16i32:
|
||||||
|
|
|
@ -176,7 +176,7 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
|
||||||
; AVX512BW-LABEL: testv16i32:
|
; AVX512BW-LABEL: testv16i32:
|
||||||
; AVX512BW: # BB#0:
|
; AVX512BW: # BB#0:
|
||||||
; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm1
|
; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm1
|
||||||
; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
|
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpsrld $2, %zmm0, %zmm1
|
; AVX512BW-NEXT: vpsrld $2, %zmm0, %zmm1
|
||||||
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1
|
; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1
|
||||||
|
@ -206,7 +206,7 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
|
||||||
; AVX512DQ-LABEL: testv16i32:
|
; AVX512DQ-LABEL: testv16i32:
|
||||||
; AVX512DQ: # BB#0:
|
; AVX512DQ: # BB#0:
|
||||||
; AVX512DQ-NEXT: vpsrld $1, %zmm0, %zmm1
|
; AVX512DQ-NEXT: vpsrld $1, %zmm0, %zmm1
|
||||||
; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
|
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512DQ-NEXT: vpsrld $2, %zmm0, %zmm1
|
; AVX512DQ-NEXT: vpsrld $2, %zmm0, %zmm1
|
||||||
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
|
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1
|
; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1
|
||||||
|
@ -263,7 +263,7 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
|
||||||
; AVX512BW-LABEL: testv16i32u:
|
; AVX512BW-LABEL: testv16i32u:
|
||||||
; AVX512BW: # BB#0:
|
; AVX512BW: # BB#0:
|
||||||
; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm1
|
; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm1
|
||||||
; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
|
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpsrld $2, %zmm0, %zmm1
|
; AVX512BW-NEXT: vpsrld $2, %zmm0, %zmm1
|
||||||
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1
|
; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1
|
||||||
|
@ -293,7 +293,7 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
|
||||||
; AVX512DQ-LABEL: testv16i32u:
|
; AVX512DQ-LABEL: testv16i32u:
|
||||||
; AVX512DQ: # BB#0:
|
; AVX512DQ: # BB#0:
|
||||||
; AVX512DQ-NEXT: vpsrld $1, %zmm0, %zmm1
|
; AVX512DQ-NEXT: vpsrld $1, %zmm0, %zmm1
|
||||||
; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
|
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512DQ-NEXT: vpsrld $2, %zmm0, %zmm1
|
; AVX512DQ-NEXT: vpsrld $2, %zmm0, %zmm1
|
||||||
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
|
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1
|
; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1
|
||||||
|
|
|
@ -696,7 +696,7 @@ define <16 x i32> @splatconstant_rotate_mask_v16i32(<16 x i32> %a) nounwind {
|
||||||
; AVX512-LABEL: splatconstant_rotate_mask_v16i32:
|
; AVX512-LABEL: splatconstant_rotate_mask_v16i32:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vprold $4, %zmm0, %zmm0
|
; AVX512-NEXT: vprold $4, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
|
; AVX512-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
%shl = shl <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
%shl = shl <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
||||||
%lshr = lshr <16 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
|
%lshr = lshr <16 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
|
||||||
|
|
|
@ -3116,7 +3116,7 @@ define <16 x i8> @trunc_and_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: trunc_and_v16i32_v16i8:
|
; AVX512-LABEL: trunc_and_v16i32_v16i8:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
|
; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
|
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
|
||||||
; AVX512-NEXT: vzeroupper
|
; AVX512-NEXT: vzeroupper
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
|
@ -3830,7 +3830,7 @@ define <16 x i8> @trunc_xor_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: trunc_xor_v16i32_v16i8:
|
; AVX512-LABEL: trunc_xor_v16i32_v16i8:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
|
; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
|
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
|
||||||
; AVX512-NEXT: vzeroupper
|
; AVX512-NEXT: vzeroupper
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
|
@ -4544,7 +4544,7 @@ define <16 x i8> @trunc_or_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind
|
||||||
;
|
;
|
||||||
; AVX512-LABEL: trunc_or_v16i32_v16i8:
|
; AVX512-LABEL: trunc_or_v16i32_v16i8:
|
||||||
; AVX512: # BB#0:
|
; AVX512: # BB#0:
|
||||||
; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
|
; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
|
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
|
||||||
; AVX512-NEXT: vzeroupper
|
; AVX512-NEXT: vzeroupper
|
||||||
; AVX512-NEXT: retq
|
; AVX512-NEXT: retq
|
||||||
|
|
|
@ -139,7 +139,7 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
|
||||||
; AVX512CD: # BB#0:
|
; AVX512CD: # BB#0:
|
||||||
; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm1
|
; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm1
|
||||||
; AVX512CD-NEXT: vpandd %zmm1, %zmm0, %zmm0
|
; AVX512CD-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512CD-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
|
; AVX512CD-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
|
||||||
; AVX512CD-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
; AVX512CD-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||||
; AVX512CD-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
; AVX512CD-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||||
|
@ -175,7 +175,7 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
|
||||||
; AVX512CDBW: # BB#0:
|
; AVX512CDBW: # BB#0:
|
||||||
; AVX512CDBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
; AVX512CDBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm2
|
; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm2
|
||||||
; AVX512CDBW-NEXT: vpandd %zmm2, %zmm0, %zmm0
|
; AVX512CDBW-NEXT: vpandq %zmm2, %zmm0, %zmm0
|
||||||
; AVX512CDBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
|
; AVX512CDBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
|
||||||
; AVX512CDBW-NEXT: vpaddd %zmm2, %zmm0, %zmm0
|
; AVX512CDBW-NEXT: vpaddd %zmm2, %zmm0, %zmm0
|
||||||
; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||||
|
@ -197,7 +197,7 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
|
||||||
; AVX512BW: # BB#0:
|
; AVX512BW: # BB#0:
|
||||||
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
; AVX512BW-NEXT: vpsubd %zmm0, %zmm1, %zmm2
|
; AVX512BW-NEXT: vpsubd %zmm0, %zmm1, %zmm2
|
||||||
; AVX512BW-NEXT: vpandd %zmm2, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
|
; AVX512BW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
|
||||||
; AVX512BW-NEXT: vpaddd %zmm2, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpaddd %zmm2, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||||
|
@ -219,7 +219,7 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
|
||||||
; AVX512VPOPCNTDQ: # BB#0:
|
; AVX512VPOPCNTDQ: # BB#0:
|
||||||
; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
; AVX512VPOPCNTDQ-NEXT: vpsubd %zmm0, %zmm1, %zmm1
|
; AVX512VPOPCNTDQ-NEXT: vpsubd %zmm0, %zmm1, %zmm1
|
||||||
; AVX512VPOPCNTDQ-NEXT: vpandd %zmm1, %zmm0, %zmm0
|
; AVX512VPOPCNTDQ-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
|
; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
|
||||||
; AVX512VPOPCNTDQ-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
; AVX512VPOPCNTDQ-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||||
; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
|
; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
|
||||||
|
@ -233,7 +233,7 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
|
||||||
; AVX512CD: # BB#0:
|
; AVX512CD: # BB#0:
|
||||||
; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm1
|
; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm1
|
||||||
; AVX512CD-NEXT: vpandd %zmm1, %zmm0, %zmm0
|
; AVX512CD-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
|
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
|
||||||
; AVX512CD-NEXT: vpbroadcastd {{.*#+}} zmm1 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
|
; AVX512CD-NEXT: vpbroadcastd {{.*#+}} zmm1 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
|
||||||
; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm0
|
; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm0
|
||||||
|
@ -243,7 +243,7 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
|
||||||
; AVX512CDBW: # BB#0:
|
; AVX512CDBW: # BB#0:
|
||||||
; AVX512CDBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
; AVX512CDBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm1
|
; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm1
|
||||||
; AVX512CDBW-NEXT: vpandd %zmm1, %zmm0, %zmm0
|
; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512CDBW-NEXT: vplzcntd %zmm0, %zmm0
|
; AVX512CDBW-NEXT: vplzcntd %zmm0, %zmm0
|
||||||
; AVX512CDBW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
|
; AVX512CDBW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
|
||||||
; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm0
|
; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm0
|
||||||
|
@ -253,7 +253,7 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
|
||||||
; AVX512BW: # BB#0:
|
; AVX512BW: # BB#0:
|
||||||
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
; AVX512BW-NEXT: vpsubd %zmm0, %zmm1, %zmm2
|
; AVX512BW-NEXT: vpsubd %zmm0, %zmm1, %zmm2
|
||||||
; AVX512BW-NEXT: vpandd %zmm2, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
|
; AVX512BW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
|
||||||
; AVX512BW-NEXT: vpaddd %zmm2, %zmm0, %zmm0
|
; AVX512BW-NEXT: vpaddd %zmm2, %zmm0, %zmm0
|
||||||
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||||
|
@ -275,7 +275,7 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
|
||||||
; AVX512VPOPCNTDQ: # BB#0:
|
; AVX512VPOPCNTDQ: # BB#0:
|
||||||
; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||||
; AVX512VPOPCNTDQ-NEXT: vpsubd %zmm0, %zmm1, %zmm1
|
; AVX512VPOPCNTDQ-NEXT: vpsubd %zmm0, %zmm1, %zmm1
|
||||||
; AVX512VPOPCNTDQ-NEXT: vpandd %zmm1, %zmm0, %zmm0
|
; AVX512VPOPCNTDQ-NEXT: vpandq %zmm1, %zmm0, %zmm0
|
||||||
; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
|
; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
|
||||||
; AVX512VPOPCNTDQ-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
; AVX512VPOPCNTDQ-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||||
; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
|
; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
|
||||||
|
|
Loading…
Reference in New Issue