forked from OSchip/llvm-project
[X86] Don't emit KTEST instructions unless only the Z flag is being used
Summary: KTEST has weird flag behavior. The Z flag is set for all bits in the AND of the k-registers being 0, and the C flag is set for all bits being 1. All other flags are cleared. We currently emit this instruction in EmitTEST and don't check the condition code. This can lead to strange things like using the S flag after a KTEST for a signed compare. The domain reassignment pass can also transform TEST instructions into KTEST and is not protected against the flag usage either. For now I've disabled this part of the domain reassignment pass. I tried to comment out the checks in the mir test so that we could recover them later, but I couldn't figure out how to get that to work. This patch moves the KTEST handling into LowerSETCC and now creates a ktest+x86setcc. I've chosen this approach because I'd like to add support for the C flag for all ones in a followup patch. To do that requires that I can rewrite the condition code going in the x86setcc to be different than the original SETCC condition code. This fixes PR36182. I'll file a PR to fix domain reassignment once this goes in. Should this be merged to 6.0? Reviewers: spatel, guyblank, RKSimon, zvi Reviewed By: guyblank Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D42770 llvm-svn: 324576
This commit is contained in:
parent
7e5ee26d1a
commit
f5465f98d2
|
@ -663,8 +663,10 @@ void X86DomainReassignment::initConverters() {
|
|||
createReplacer(X86::XOR32rr, X86::KXORDrr);
|
||||
createReplacer(X86::XOR64rr, X86::KXORQrr);
|
||||
|
||||
createReplacer(X86::TEST32rr, X86::KTESTDrr);
|
||||
createReplacer(X86::TEST64rr, X86::KTESTQrr);
|
||||
// TODO: KTEST is not a replacement for TEST due to flag differences. Need
|
||||
// to prove only Z flag is used.
|
||||
//createReplacer(X86::TEST32rr, X86::KTESTDrr);
|
||||
//createReplacer(X86::TEST64rr, X86::KTESTQrr);
|
||||
}
|
||||
|
||||
if (STI->hasDQI()) {
|
||||
|
@ -684,8 +686,10 @@ void X86DomainReassignment::initConverters() {
|
|||
createReplacer(X86::SHR8ri, X86::KSHIFTRBri);
|
||||
createReplacer(X86::SHL8ri, X86::KSHIFTLBri);
|
||||
|
||||
createReplacer(X86::TEST8rr, X86::KTESTBrr);
|
||||
createReplacer(X86::TEST16rr, X86::KTESTWrr);
|
||||
// TODO: KTEST is not a replacement for TEST due to flag differences. Need
|
||||
// to prove only Z flag is used.
|
||||
//createReplacer(X86::TEST8rr, X86::KTESTBrr);
|
||||
//createReplacer(X86::TEST16rr, X86::KTESTWrr);
|
||||
|
||||
createReplacer(X86::XOR8rr, X86::KXORBrr);
|
||||
}
|
||||
|
|
|
@ -17114,24 +17114,6 @@ static bool hasNonFlagsUse(SDValue Op) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Emit KTEST instruction for bit vectors on AVX-512
|
||||
static SDValue EmitKTEST(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
if (Op.getOpcode() == ISD::BITCAST) {
|
||||
auto hasKTEST = [&](MVT VT) {
|
||||
unsigned SizeInBits = VT.getSizeInBits();
|
||||
return (Subtarget.hasDQI() && (SizeInBits == 8 || SizeInBits == 16)) ||
|
||||
(Subtarget.hasBWI() && (SizeInBits == 32 || SizeInBits == 64));
|
||||
};
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
MVT Op0VT = Op0.getValueType().getSimpleVT();
|
||||
if (Op0VT.isVector() && Op0VT.getVectorElementType() == MVT::i1 &&
|
||||
hasKTEST(Op0VT))
|
||||
return DAG.getNode(X86ISD::KTEST, SDLoc(Op), Op0VT, Op0, Op0);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// Emit nodes that will be selected as "test Op0,Op0", or something
|
||||
/// equivalent.
|
||||
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
|
||||
|
@ -17171,9 +17153,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
|
|||
// doing a separate TEST. TEST always sets OF and CF to 0, so unless
|
||||
// we prove that the arithmetic won't overflow, we can't use OF or CF.
|
||||
if (Op.getResNo() != 0 || NeedOF || NeedCF) {
|
||||
// Emit KTEST for bit vectors
|
||||
if (auto Node = EmitKTEST(Op, DAG, Subtarget))
|
||||
return Node;
|
||||
// Emit a CMP with 0, which is the TEST pattern.
|
||||
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
|
||||
DAG.getConstant(0, dl, Op.getValueType()));
|
||||
|
@ -17396,10 +17375,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
|
|||
}
|
||||
|
||||
if (Opcode == 0) {
|
||||
// Emit KTEST for bit vectors
|
||||
if (auto Node = EmitKTEST(Op, DAG, Subtarget))
|
||||
return Node;
|
||||
|
||||
// Emit a CMP with 0, which is the TEST pattern.
|
||||
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
|
||||
DAG.getConstant(0, dl, Op.getValueType()));
|
||||
|
@ -18146,6 +18121,34 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
|
|||
return Result;
|
||||
}
|
||||
|
||||
// Try to select this as a KTEST+SETCC if possible.
|
||||
static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
|
||||
const SDLoc &dl, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
// Only support equality comparisons.
|
||||
if (CC != ISD::SETEQ && CC != ISD::SETNE)
|
||||
return SDValue();
|
||||
|
||||
// Must be a bitcast from vXi1.
|
||||
if (Op0.getOpcode() != ISD::BITCAST)
|
||||
return SDValue();
|
||||
|
||||
Op0 = Op0.getOperand(0);
|
||||
MVT VT = Op0.getSimpleValueType();
|
||||
if (!(Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1)) &&
|
||||
!(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)))
|
||||
return SDValue();
|
||||
|
||||
X86::CondCode X86CC;
|
||||
if (isNullConstant(Op1)) {
|
||||
X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
|
||||
} else
|
||||
return SDValue();
|
||||
|
||||
SDValue KTEST = DAG.getNode(X86ISD::KTEST, dl, MVT::i32, Op0, Op0);
|
||||
return getSETCC(X86CC, KTEST, dl, DAG);
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
|
@ -18176,6 +18179,10 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
|||
return NewSetCC;
|
||||
}
|
||||
|
||||
// Try to lower using KTEST.
|
||||
if (SDValue NewSetCC = EmitKTEST(Op0, Op1, CC, dl, DAG, Subtarget))
|
||||
return NewSetCC;
|
||||
|
||||
// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
|
||||
// these.
|
||||
if ((isOneConstant(Op1) || isNullConstant(Op1)) &&
|
||||
|
|
|
@ -2694,3 +2694,95 @@ define i8 @test_v8i1_mul(i8 %x, i8 %y) {
|
|||
%ret = bitcast <8 x i1> %m2 to i8
|
||||
ret i8 %ret
|
||||
}
|
||||
|
||||
; Make sure we don't emit a ktest for signed comparisons.
|
||||
define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
|
||||
; KNL-LABEL: ktest_signed:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: pushq %rax
|
||||
; KNL-NEXT: .cfi_def_cfa_offset 16
|
||||
; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: testw %ax, %ax
|
||||
; KNL-NEXT: jle LBB64_1
|
||||
; KNL-NEXT: ## %bb.2: ## %bb.2
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
; KNL-NEXT: LBB64_1: ## %bb.1
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: callq _foo
|
||||
; KNL-NEXT: popq %rax
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: ktest_signed:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: pushq %rax
|
||||
; SKX-NEXT: .cfi_def_cfa_offset 16
|
||||
; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testw %ax, %ax
|
||||
; SKX-NEXT: jle LBB64_1
|
||||
; SKX-NEXT: ## %bb.2: ## %bb.2
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; SKX-NEXT: LBB64_1: ## %bb.1
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: callq _foo
|
||||
; SKX-NEXT: popq %rax
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: ktest_signed:
|
||||
; AVX512BW: ## %bb.0:
|
||||
; AVX512BW-NEXT: pushq %rax
|
||||
; AVX512BW-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; AVX512BW-NEXT: kmovd %k0, %eax
|
||||
; AVX512BW-NEXT: testw %ax, %ax
|
||||
; AVX512BW-NEXT: jle LBB64_1
|
||||
; AVX512BW-NEXT: ## %bb.2: ## %bb.2
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512BW-NEXT: LBB64_1: ## %bb.1
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: callq _foo
|
||||
; AVX512BW-NEXT: popq %rax
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: ktest_signed:
|
||||
; AVX512DQ: ## %bb.0:
|
||||
; AVX512DQ-NEXT: pushq %rax
|
||||
; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
|
||||
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, %eax
|
||||
; AVX512DQ-NEXT: testw %ax, %ax
|
||||
; AVX512DQ-NEXT: jle LBB64_1
|
||||
; AVX512DQ-NEXT: ## %bb.2: ## %bb.2
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: retq
|
||||
; AVX512DQ-NEXT: LBB64_1: ## %bb.1
|
||||
; AVX512DQ-NEXT: vzeroupper
|
||||
; AVX512DQ-NEXT: callq _foo
|
||||
; AVX512DQ-NEXT: popq %rax
|
||||
; AVX512DQ-NEXT: retq
|
||||
%a = icmp eq <16 x i32> %x, zeroinitializer
|
||||
%b = icmp eq <16 x i32> %y, zeroinitializer
|
||||
%c = and <16 x i1> %a, %b
|
||||
%d = bitcast <16 x i1> %c to i16
|
||||
%e = icmp sgt i16 %d, 0
|
||||
br i1 %e, label %bb.2, label %bb.1
|
||||
bb.1:
|
||||
call void @foo()
|
||||
br label %bb.2
|
||||
bb.2:
|
||||
ret void
|
||||
}
|
||||
declare void @foo()
|
||||
|
||||
|
|
|
@ -256,7 +256,7 @@ constants:
|
|||
body: |
|
||||
; CHECK-LABEL: name: test_8bitops
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $rdi, $zmm0, $zmm1, $zmm2, $zmm3
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||
|
@ -277,9 +277,6 @@ body: |
|
|||
; CHECK: [[COPY8:%[0-9]+]]:vk8wm = COPY [[COPY7]]
|
||||
; CHECK: [[VMOVAPDZrrk:%[0-9]+]]:vr512 = VMOVAPDZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
|
||||
; CHECK: VMOVAPDZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPDZrrk]]
|
||||
; CHECK: KTESTBrr [[KADDBrr]], [[KADDBrr]], implicit-def $eflags
|
||||
; CHECK: JE_1 %bb.1, implicit $eflags
|
||||
; CHECK: JMP_1 %bb.2
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: bb.2:
|
||||
|
@ -311,9 +308,10 @@ body: |
|
|||
%11 = VMOVAPDZrrk %2, killed %10, %1
|
||||
VMOVAPDZmr %0, 1, $noreg, 0, $noreg, killed %11
|
||||
|
||||
TEST8rr %18, %18, implicit-def $eflags
|
||||
JE_1 %bb.1, implicit $eflags
|
||||
JMP_1 %bb.2
|
||||
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||
; TEST8rr %18, %18, implicit-def $eflags
|
||||
; JE_1 %bb.1, implicit $eflags
|
||||
; JMP_1 %bb.2
|
||||
|
||||
bb.1:
|
||||
|
||||
|
@ -377,7 +375,7 @@ constants:
|
|||
body: |
|
||||
; CHECK-LABEL: name: test_16bitops
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $rdi, $zmm0, $zmm1, $zmm2, $zmm3
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||
|
@ -397,9 +395,6 @@ body: |
|
|||
; CHECK: [[COPY8:%[0-9]+]]:vk16wm = COPY [[COPY7]]
|
||||
; CHECK: [[VMOVAPSZrrk:%[0-9]+]]:vr512 = VMOVAPSZrrk [[COPY2]], killed [[COPY8]], [[COPY1]]
|
||||
; CHECK: VMOVAPSZmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVAPSZrrk]]
|
||||
; CHECK: KTESTWrr [[KXORWrr]], [[KXORWrr]], implicit-def $eflags
|
||||
; CHECK: JE_1 %bb.1, implicit $eflags
|
||||
; CHECK: JMP_1 %bb.2
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: bb.2:
|
||||
|
@ -430,9 +425,10 @@ body: |
|
|||
%11 = VMOVAPSZrrk %2, killed %10, %1
|
||||
VMOVAPSZmr %0, 1, $noreg, 0, $noreg, killed %11
|
||||
|
||||
TEST16rr %17, %17, implicit-def $eflags
|
||||
JE_1 %bb.1, implicit $eflags
|
||||
JMP_1 %bb.2
|
||||
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||
; FIXME TEST16rr %17, %17, implicit-def $eflags
|
||||
; FIXME JE_1 %bb.1, implicit $eflags
|
||||
; FIXME JMP_1 %bb.2
|
||||
|
||||
bb.1:
|
||||
|
||||
|
@ -490,7 +486,7 @@ constants:
|
|||
body: |
|
||||
; CHECK-LABEL: name: test_32bitops
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $rdi, $zmm0, $zmm1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||
|
@ -507,9 +503,6 @@ body: |
|
|||
; CHECK: [[COPY3:%[0-9]+]]:vk32wm = COPY [[KADDDrr]]
|
||||
; CHECK: [[VMOVDQU16Zrrk:%[0-9]+]]:vr512 = VMOVDQU16Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
|
||||
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU16Zrrk]]
|
||||
; CHECK: KTESTDrr [[KADDDrr]], [[KADDDrr]], implicit-def $eflags
|
||||
; CHECK: JE_1 %bb.1, implicit $eflags
|
||||
; CHECK: JMP_1 %bb.2
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: bb.2:
|
||||
|
@ -535,9 +528,10 @@ body: |
|
|||
%4 = VMOVDQU16Zrrk %2, killed %3, %1
|
||||
VMOVDQA32Zmr %0, 1, $noreg, 0, $noreg, killed %4
|
||||
|
||||
TEST32rr %13, %13, implicit-def $eflags
|
||||
JE_1 %bb.1, implicit $eflags
|
||||
JMP_1 %bb.2
|
||||
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||
; FIXME TEST32rr %13, %13, implicit-def $eflags
|
||||
; FIXME JE_1 %bb.1, implicit $eflags
|
||||
; FIXME JMP_1 %bb.2
|
||||
|
||||
bb.1:
|
||||
|
||||
|
@ -595,7 +589,7 @@ constants:
|
|||
body: |
|
||||
; CHECK-LABEL: name: test_64bitops
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $rdi, $zmm0, $zmm1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vr512 = COPY $zmm0
|
||||
|
@ -612,9 +606,6 @@ body: |
|
|||
; CHECK: [[COPY3:%[0-9]+]]:vk64wm = COPY [[KADDQrr]]
|
||||
; CHECK: [[VMOVDQU8Zrrk:%[0-9]+]]:vr512 = VMOVDQU8Zrrk [[COPY2]], killed [[COPY3]], [[COPY1]]
|
||||
; CHECK: VMOVDQA32Zmr [[COPY]], 1, $noreg, 0, $noreg, killed [[VMOVDQU8Zrrk]]
|
||||
; CHECK: KTESTQrr [[KADDQrr]], [[KADDQrr]], implicit-def $eflags
|
||||
; CHECK: JE_1 %bb.1, implicit $eflags
|
||||
; CHECK: JMP_1 %bb.2
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: bb.2:
|
||||
|
@ -640,9 +631,10 @@ body: |
|
|||
%4 = VMOVDQU8Zrrk %2, killed %3, %1
|
||||
VMOVDQA32Zmr %0, 1, $noreg, 0, $noreg, killed %4
|
||||
|
||||
TEST64rr %13, %13, implicit-def $eflags
|
||||
JE_1 %bb.1, implicit $eflags
|
||||
JMP_1 %bb.2
|
||||
; FIXME We can't replace TEST with KTEST due to flag differences
|
||||
; FIXME TEST64rr %13, %13, implicit-def $eflags
|
||||
; FIXME JE_1 %bb.1, implicit $eflags
|
||||
; FIXME JMP_1 %bb.2
|
||||
|
||||
bb.1:
|
||||
|
||||
|
|
Loading…
Reference in New Issue