forked from OSchip/llvm-project
[AVX512] In some cases KORTEST instruction may be used instead of ZEXT + TEST sequence.
Differential Revision: http://reviews.llvm.org/D23490 llvm-svn: 279960
This commit is contained in:
parent
407f275894
commit
1a388871b9
|
@ -14900,15 +14900,29 @@ static SDValue EmitKTEST(SDValue Op, SelectionDAG &DAG,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
/// Emit nodes that will be selected as "test Op0,Op0", or something
|
||||
/// equivalent.
|
||||
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
|
||||
SelectionDAG &DAG) const {
|
||||
if (Op.getValueType() == MVT::i1) {
|
||||
static SDValue EmitTEST_i1(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) {
|
||||
|
||||
// Most probably the value is in GPR, use ZEXT + CMP.
|
||||
if(Op.getOpcode() == ISD::TRUNCATE ||
|
||||
Op.getOpcode() == ISD::LOAD ||
|
||||
Op.getOpcode() == ISD::CopyFromReg) {
|
||||
SDValue ExtOp = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Op);
|
||||
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, ExtOp,
|
||||
DAG.getConstant(0, dl, MVT::i8));
|
||||
}
|
||||
|
||||
// Create cmp i1 that should be mapped to KORTEST.
|
||||
return DAG.getNode(X86ISD::CMP, dl, MVT::i1, Op,
|
||||
DAG.getConstant(0, dl, MVT::i8));
|
||||
}
|
||||
|
||||
/// Emit nodes that will be selected as "test Op0,Op0", or something
|
||||
/// equivalent.
|
||||
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
|
||||
SelectionDAG &DAG) const {
|
||||
if (Op.getValueType() == MVT::i1)
|
||||
return EmitTEST_i1(Op, DAG, dl);
|
||||
|
||||
// CF and OF aren't always set the way we want. Determine which
|
||||
// of these we need.
|
||||
bool NeedCF = false;
|
||||
|
|
|
@ -2476,6 +2476,10 @@ multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
|
||||
defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>;
|
||||
|
||||
def : Pat<(X86cmp VK1:$src, 0),
|
||||
(KORTESTWrr (COPY_TO_REGCLASS VK1:$src, VK16),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK16))>, Requires<[HasAVX512]>;
|
||||
|
||||
// Mask shift
|
||||
multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
|
||||
SDNode OpNode> {
|
||||
|
|
|
@ -167,9 +167,7 @@ define i32 @test10(i64 %b, i64 %c, i1 %d) {
|
|||
; ALL-NEXT: kmovw %eax, %k1
|
||||
; ALL-NEXT: korw %k1, %k0, %k1
|
||||
; ALL-NEXT: kxorw %k1, %k0, %k0
|
||||
; ALL-NEXT: kmovw %k0, %eax
|
||||
; ALL-NEXT: andl $1, %eax
|
||||
; ALL-NEXT: testb %al, %al
|
||||
; ALL-NEXT: kortestw %k0, %k0
|
||||
; ALL-NEXT: je LBB8_1
|
||||
; ALL-NEXT: ## BB#2: ## %if.end.i
|
||||
; ALL-NEXT: movl $6, %eax
|
||||
|
|
|
@ -8,23 +8,19 @@ target triple = "x86_64-unknown-linux-gnu"
|
|||
define void @func() {
|
||||
; CHECK-LABEL: func:
|
||||
; CHECK: ## BB#0: ## %L_10
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: kortestw %k0, %k0
|
||||
; CHECK-NEXT: je LBB0_1
|
||||
; CHECK-NEXT: ## BB#4: ## %L_30
|
||||
; CHECK-NEXT: ## BB#3: ## %L_30
|
||||
; CHECK-NEXT: retq
|
||||
; CHECK-NEXT: LBB0_1: ## %bb56
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: jmp LBB0_2
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: LBB0_3: ## %bb35
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_2 Depth=1
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: LBB0_2: ## %bb33
|
||||
; CHECK-NEXT: LBB0_2: ## %bb35
|
||||
; CHECK-NEXT: ## in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: kortestw %k0, %k0
|
||||
; CHECK-NEXT: LBB0_1: ## %bb33
|
||||
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: jne LBB0_2
|
||||
; CHECK-NEXT: jmp LBB0_3
|
||||
; CHECK-NEXT: kortestw %k0, %k0
|
||||
; CHECK-NEXT: jne LBB0_1
|
||||
; CHECK-NEXT: jmp LBB0_2
|
||||
bb1:
|
||||
br i1 undef, label %L_10, label %L_10
|
||||
|
||||
|
|
|
@ -200,9 +200,7 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
|
|||
; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
||||
; KNL-NEXT: kshiftlw $11, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: testb %al, %al
|
||||
; KNL-NEXT: kortestw %k0, %k0
|
||||
; KNL-NEXT: je LBB10_2
|
||||
; KNL-NEXT: ## BB#1: ## %A
|
||||
; KNL-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
|
@ -216,9 +214,7 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
|
|||
; SKX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
|
||||
; SKX-NEXT: kshiftlw $11, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: testb %al, %al
|
||||
; SKX-NEXT: kortestw %k0, %k0
|
||||
; SKX-NEXT: je LBB10_2
|
||||
; SKX-NEXT: ## BB#1: ## %A
|
||||
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
|
@ -244,9 +240,7 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
|
|||
; KNL-NEXT: kunpckbw %k0, %k1, %k0
|
||||
; KNL-NEXT: kshiftlw $15, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: testb %al, %al
|
||||
; KNL-NEXT: kortestw %k0, %k0
|
||||
; KNL-NEXT: cmoveq %rsi, %rdi
|
||||
; KNL-NEXT: movq %rdi, %rax
|
||||
; KNL-NEXT: retq
|
||||
|
@ -258,9 +252,7 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
|
|||
; SKX-NEXT: kunpckbw %k0, %k1, %k0
|
||||
; SKX-NEXT: kshiftlw $15, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: testb %al, %al
|
||||
; SKX-NEXT: kortestw %k0, %k0
|
||||
; SKX-NEXT: cmoveq %rsi, %rdi
|
||||
; SKX-NEXT: movq %rdi, %rax
|
||||
; SKX-NEXT: retq
|
||||
|
@ -310,9 +302,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
|
|||
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
||||
; KNL-NEXT: kshiftlw $11, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: testb %al, %al
|
||||
; KNL-NEXT: kortestw %k0, %k0
|
||||
; KNL-NEXT: cmoveq %rsi, %rdi
|
||||
; KNL-NEXT: movq %rdi, %rax
|
||||
; KNL-NEXT: retq
|
||||
|
@ -322,9 +312,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
|
|||
; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
|
||||
; SKX-NEXT: kshiftlb $3, %k0, %k0
|
||||
; SKX-NEXT: kshiftrb $7, %k0, %k0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: testb %al, %al
|
||||
; SKX-NEXT: kortestw %k0, %k0
|
||||
; SKX-NEXT: cmoveq %rsi, %rdi
|
||||
; SKX-NEXT: movq %rdi, %rax
|
||||
; SKX-NEXT: retq
|
||||
|
@ -1356,9 +1344,7 @@ define zeroext i8 @test_extractelement_v2i1(<2 x i64> %a, <2 x i64> %b) {
|
|||
; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0
|
||||
; SKX-NEXT: kshiftlw $15, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: testb %al, %al
|
||||
; SKX-NEXT: kortestw %k0, %k0
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: addb $3, %al
|
||||
; SKX-NEXT: movzbl %al, %eax
|
||||
|
@ -1438,9 +1424,7 @@ define zeroext i8 @test_extractelement_v64i1(<64 x i8> %a, <64 x i8> %b) {
|
|||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
|
||||
; SKX-NEXT: kshiftrq $63, %k0, %k0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: testb %al, %al
|
||||
; SKX-NEXT: kortestw %k0, %k0
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: addb $3, %al
|
||||
; SKX-NEXT: movzbl %al, %eax
|
||||
|
|
|
@ -1648,38 +1648,32 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
|||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: vpmovsxdq %xmm1, %ymm1
|
||||
; SKX-NEXT: vpsllq $2, %ymm1, %ymm1
|
||||
; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: # implicit-def: %XMM1
|
||||
; SKX-NEXT: testb %al, %al
|
||||
; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
|
||||
; SKX-NEXT: kortestw %k0, %k0
|
||||
; SKX-NEXT: # implicit-def: %XMM0
|
||||
; SKX-NEXT: je .LBB29_2
|
||||
; SKX-NEXT: # BB#1: # %cond.load
|
||||
; SKX-NEXT: vmovq %xmm0, %rax
|
||||
; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; SKX-NEXT: vmovq %xmm1, %rax
|
||||
; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SKX-NEXT: .LBB29_2: # %else
|
||||
; SKX-NEXT: kshiftlw $14, %k1, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: testb %al, %al
|
||||
; SKX-NEXT: kortestw %k0, %k0
|
||||
; SKX-NEXT: je .LBB29_4
|
||||
; SKX-NEXT: # BB#3: # %cond.load1
|
||||
; SKX-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; SKX-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm1
|
||||
; SKX-NEXT: vpextrq $1, %xmm1, %rax
|
||||
; SKX-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
|
||||
; SKX-NEXT: .LBB29_4: # %else2
|
||||
; SKX-NEXT: kshiftlw $13, %k1, %k0
|
||||
; SKX-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX-NEXT: kmovw %k0, %eax
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: testb %al, %al
|
||||
; SKX-NEXT: kortestw %k0, %k0
|
||||
; SKX-NEXT: je .LBB29_6
|
||||
; SKX-NEXT: # BB#5: # %cond.load4
|
||||
; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0
|
||||
; SKX-NEXT: vmovq %xmm0, %rax
|
||||
; SKX-NEXT: vpinsrd $2, (%rax), %xmm1, %xmm1
|
||||
; SKX-NEXT: vextracti64x2 $1, %ymm1, %xmm1
|
||||
; SKX-NEXT: vmovq %xmm1, %rax
|
||||
; SKX-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
|
||||
; SKX-NEXT: .LBB29_6: # %else5
|
||||
; SKX-NEXT: vpblendmd %xmm1, %xmm3, %xmm0 {%k1}
|
||||
; SKX-NEXT: vpblendmd %xmm0, %xmm3, %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; SKX_32-LABEL: test30:
|
||||
|
@ -1692,38 +1686,32 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
|||
; SKX_32-NEXT: kshiftlw $15, %k1, %k0
|
||||
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1
|
||||
; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; SKX_32-NEXT: kmovw %k0, %eax
|
||||
; SKX_32-NEXT: andl $1, %eax
|
||||
; SKX_32-NEXT: # implicit-def: %XMM1
|
||||
; SKX_32-NEXT: testb %al, %al
|
||||
; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1
|
||||
; SKX_32-NEXT: kortestw %k0, %k0
|
||||
; SKX_32-NEXT: # implicit-def: %XMM0
|
||||
; SKX_32-NEXT: je .LBB29_2
|
||||
; SKX_32-NEXT: # BB#1: # %cond.load
|
||||
; SKX_32-NEXT: vmovd %xmm0, %eax
|
||||
; SKX_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; SKX_32-NEXT: vmovd %xmm1, %eax
|
||||
; SKX_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SKX_32-NEXT: .LBB29_2: # %else
|
||||
; SKX_32-NEXT: kshiftlw $14, %k1, %k0
|
||||
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX_32-NEXT: kmovw %k0, %eax
|
||||
; SKX_32-NEXT: andl $1, %eax
|
||||
; SKX_32-NEXT: testb %al, %al
|
||||
; SKX_32-NEXT: kortestw %k0, %k0
|
||||
; SKX_32-NEXT: je .LBB29_4
|
||||
; SKX_32-NEXT: # BB#3: # %cond.load1
|
||||
; SKX_32-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1
|
||||
; SKX_32-NEXT: vpextrd $1, %xmm1, %eax
|
||||
; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0
|
||||
; SKX_32-NEXT: .LBB29_4: # %else2
|
||||
; SKX_32-NEXT: vmovdqa32 {{[0-9]+}}(%esp), %xmm2
|
||||
; SKX_32-NEXT: kshiftlw $13, %k1, %k0
|
||||
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
|
||||
; SKX_32-NEXT: kmovw %k0, %eax
|
||||
; SKX_32-NEXT: andl $1, %eax
|
||||
; SKX_32-NEXT: testb %al, %al
|
||||
; SKX_32-NEXT: kortestw %k0, %k0
|
||||
; SKX_32-NEXT: je .LBB29_6
|
||||
; SKX_32-NEXT: # BB#5: # %cond.load4
|
||||
; SKX_32-NEXT: vpextrd $2, %xmm0, %eax
|
||||
; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1
|
||||
; SKX_32-NEXT: vpextrd $2, %xmm1, %eax
|
||||
; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0
|
||||
; SKX_32-NEXT: .LBB29_6: # %else5
|
||||
; SKX_32-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
|
||||
; SKX_32-NEXT: vpblendmd %xmm0, %xmm2, %xmm0 {%k1}
|
||||
; SKX_32-NEXT: addl $12, %esp
|
||||
; SKX_32-NEXT: retl
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue