forked from OSchip/llvm-project
[X86] Add isel patterns to match BMI/TBMI instructions when lowering has turned the root nodes into one of the flag producing binops.
This fixes the patterns that have or/and as a root. 'and' is handled differently since thy usually have a CMP wrapped around them. I had to look for uses of the CF flag because all these nodes have non-standard CF flag behavior. A real or/xor would always clear CF. In practice we shouldn't be using the CF flag from these nodes as far as I know. Differential Revision: https://reviews.llvm.org/D55813 llvm-svn: 349962
This commit is contained in:
parent
c6027e20d4
commit
e58cd9cbc6
|
@ -472,6 +472,9 @@ namespace {
|
|||
SDValue &InFlag);
|
||||
|
||||
bool tryOptimizeRem8Extend(SDNode *N);
|
||||
|
||||
bool hasNoSignFlagUses(SDValue Flags) const;
|
||||
bool hasNoCarryFlagUses(SDValue Flags) const;
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -2225,7 +2228,7 @@ static X86::CondCode getCondFromOpc(unsigned Opc) {
|
|||
|
||||
/// Test whether the given X86ISD::CMP node has any uses which require the SF
|
||||
/// flag to be accurate.
|
||||
static bool hasNoSignFlagUses(SDValue Flags) {
|
||||
bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
|
||||
// Examine each user of the node.
|
||||
for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
|
||||
UI != UE; ++UI) {
|
||||
|
@ -2265,7 +2268,7 @@ static bool hasNoSignFlagUses(SDValue Flags) {
|
|||
|
||||
/// Test whether the given node which sets flags has any uses which require the
|
||||
/// CF flag to be accurate.
|
||||
static bool hasNoCarryFlagUses(SDValue Flags) {
|
||||
bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
|
||||
// Examine each user of the node.
|
||||
for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
|
||||
UI != UE; ++UI) {
|
||||
|
|
|
@ -2387,6 +2387,16 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
|
|||
// Pattern fragments to auto generate BMI instructions.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def or_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(X86or_flag node:$lhs, node:$rhs), [{
|
||||
return hasNoCarryFlagUses(SDValue(N, 1));
|
||||
}]>;
|
||||
|
||||
def xor_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(X86xor_flag node:$lhs, node:$rhs), [{
|
||||
return hasNoCarryFlagUses(SDValue(N, 1));
|
||||
}]>;
|
||||
|
||||
let Predicates = [HasBMI] in {
|
||||
// FIXME: patterns for the load versions are not implemented
|
||||
def : Pat<(and GR32:$src, (add GR32:$src, -1)),
|
||||
|
@ -2403,6 +2413,14 @@ let Predicates = [HasBMI] in {
|
|||
(BLSI32rr GR32:$src)>;
|
||||
def : Pat<(and GR64:$src, (ineg GR64:$src)),
|
||||
(BLSI64rr GR64:$src)>;
|
||||
|
||||
// Versions to match flag producing ops.
|
||||
// X86and_flag nodes are rarely created. Those should use CMP+AND. We do
|
||||
// TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed.
|
||||
def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, -1)),
|
||||
(BLSMSK32rr GR32:$src)>;
|
||||
def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, -1)),
|
||||
(BLSMSK64rr GR64:$src)>;
|
||||
}
|
||||
|
||||
multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
|
||||
|
@ -2801,6 +2819,45 @@ let Predicates = [HasTBM] in {
|
|||
(TZMSK32rr GR32:$src)>;
|
||||
def : Pat<(and (not GR64:$src), (add GR64:$src, -1)),
|
||||
(TZMSK64rr GR64:$src)>;
|
||||
|
||||
// Patterns to match flag producing ops.
|
||||
// X86and_flag nodes are rarely created. Those should use CMP+AND. We do
|
||||
// TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed.
|
||||
def : Pat<(or_flag_nocf GR32:$src, (not (add GR32:$src, 1))),
|
||||
(BLCI32rr GR32:$src)>;
|
||||
def : Pat<(or_flag_nocf GR64:$src, (not (add GR64:$src, 1))),
|
||||
(BLCI64rr GR64:$src)>;
|
||||
|
||||
// Extra patterns because opt can optimize the above patterns to this.
|
||||
def : Pat<(or_flag_nocf GR32:$src, (sub -2, GR32:$src)),
|
||||
(BLCI32rr GR32:$src)>;
|
||||
def : Pat<(or_flag_nocf GR64:$src, (sub -2, GR64:$src)),
|
||||
(BLCI64rr GR64:$src)>;
|
||||
|
||||
def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, 1)),
|
||||
(BLCMSK32rr GR32:$src)>;
|
||||
def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, 1)),
|
||||
(BLCMSK64rr GR64:$src)>;
|
||||
|
||||
def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, 1)),
|
||||
(BLCS32rr GR32:$src)>;
|
||||
def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, 1)),
|
||||
(BLCS64rr GR64:$src)>;
|
||||
|
||||
def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, -1)),
|
||||
(BLSFILL32rr GR32:$src)>;
|
||||
def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, -1)),
|
||||
(BLSFILL64rr GR64:$src)>;
|
||||
|
||||
def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, -1)),
|
||||
(BLSIC32rr GR32:$src)>;
|
||||
def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, -1)),
|
||||
(BLSIC64rr GR64:$src)>;
|
||||
|
||||
def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, 1)),
|
||||
(T1MSKC32rr GR32:$src)>;
|
||||
def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, 1)),
|
||||
(T1MSKC64rr GR64:$src)>;
|
||||
} // HasTBM
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -691,9 +691,7 @@ define i32 @blsmsk32_z(i32 %a, i32 %b) nounwind {
|
|||
define i32 @blsmsk32_z2(i32 %a, i32 %b, i32 %c) nounwind {
|
||||
; X86-LABEL: blsmsk32_z2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: leal -1(%eax), %ecx
|
||||
; X86-NEXT: xorl %eax, %ecx
|
||||
; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: cmovel %eax, %ecx
|
||||
|
@ -703,9 +701,7 @@ define i32 @blsmsk32_z2(i32 %a, i32 %b, i32 %c) nounwind {
|
|||
; X64-LABEL: blsmsk32_z2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movl %esi, %eax
|
||||
; X64-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; X64-NEXT: leal -1(%rdi), %ecx
|
||||
; X64-NEXT: xorl %edi, %ecx
|
||||
; X64-NEXT: blsmskl %edi, %ecx
|
||||
; X64-NEXT: cmovnel %edx, %eax
|
||||
; X64-NEXT: retq
|
||||
%t0 = sub i32 %a, 1
|
||||
|
@ -800,8 +796,7 @@ define i64 @blsmsk64_z2(i64 %a, i64 %b, i64 %c) nounwind {
|
|||
; X64-LABEL: blsmsk64_z2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: movq %rsi, %rax
|
||||
; X64-NEXT: leaq -1(%rdi), %rcx
|
||||
; X64-NEXT: xorq %rdi, %rcx
|
||||
; X64-NEXT: blsmskq %rdi, %rcx
|
||||
; X64-NEXT: cmovneq %rdx, %rax
|
||||
; X64-NEXT: retq
|
||||
%t0 = sub i64 %a, 1
|
||||
|
|
|
@ -226,10 +226,7 @@ define i32 @test_x86_tbm_blci_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
|
|||
; CHECK-LABEL: test_x86_tbm_blci_u32_z2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; CHECK-NEXT: leal 1(%rdi), %ecx
|
||||
; CHECK-NEXT: notl %ecx
|
||||
; CHECK-NEXT: orl %edi, %ecx
|
||||
; CHECK-NEXT: blcil %edi, %ecx
|
||||
; CHECK-NEXT: cmovnel %edx, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = add i32 1, %a
|
||||
|
@ -269,9 +266,7 @@ define i64 @test_x86_tbm_blci_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
|
|||
; CHECK-LABEL: test_x86_tbm_blci_u64_z2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rsi, %rax
|
||||
; CHECK-NEXT: leaq 1(%rdi), %rcx
|
||||
; CHECK-NEXT: notq %rcx
|
||||
; CHECK-NEXT: orq %rdi, %rcx
|
||||
; CHECK-NEXT: blciq %rdi, %rcx
|
||||
; CHECK-NEXT: cmovneq %rdx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = add i64 1, %a
|
||||
|
@ -409,9 +404,7 @@ define i32 @test_x86_tbm_blcmsk_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
|
|||
; CHECK-LABEL: test_x86_tbm_blcmsk_u32_z2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; CHECK-NEXT: leal 1(%rdi), %ecx
|
||||
; CHECK-NEXT: xorl %edi, %ecx
|
||||
; CHECK-NEXT: blcmskl %edi, %ecx
|
||||
; CHECK-NEXT: cmovnel %edx, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = add i32 %a, 1
|
||||
|
@ -448,8 +441,7 @@ define i64 @test_x86_tbm_blcmsk_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
|
|||
; CHECK-LABEL: test_x86_tbm_blcmsk_u64_z2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rsi, %rax
|
||||
; CHECK-NEXT: leaq 1(%rdi), %rcx
|
||||
; CHECK-NEXT: xorq %rdi, %rcx
|
||||
; CHECK-NEXT: blcmskq %rdi, %rcx
|
||||
; CHECK-NEXT: cmovneq %rdx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = add i64 %a, 1
|
||||
|
@ -486,9 +478,7 @@ define i32 @test_x86_tbm_blcs_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
|
|||
; CHECK-LABEL: test_x86_tbm_blcs_u32_z2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; CHECK-NEXT: leal 1(%rdi), %ecx
|
||||
; CHECK-NEXT: orl %edi, %ecx
|
||||
; CHECK-NEXT: blcsl %edi, %ecx
|
||||
; CHECK-NEXT: cmovnel %edx, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = add i32 %a, 1
|
||||
|
@ -525,8 +515,7 @@ define i64 @test_x86_tbm_blcs_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
|
|||
; CHECK-LABEL: test_x86_tbm_blcs_u64_z2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rsi, %rax
|
||||
; CHECK-NEXT: leaq 1(%rdi), %rcx
|
||||
; CHECK-NEXT: orq %rdi, %rcx
|
||||
; CHECK-NEXT: blcsq %rdi, %rcx
|
||||
; CHECK-NEXT: cmovneq %rdx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = add i64 %a, 1
|
||||
|
@ -563,9 +552,7 @@ define i32 @test_x86_tbm_blsfill_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
|
|||
; CHECK-LABEL: test_x86_tbm_blsfill_u32_z2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; CHECK-NEXT: leal -1(%rdi), %ecx
|
||||
; CHECK-NEXT: orl %edi, %ecx
|
||||
; CHECK-NEXT: blsfilll %edi, %ecx
|
||||
; CHECK-NEXT: cmovnel %edx, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = add i32 %a, -1
|
||||
|
@ -602,8 +589,7 @@ define i64 @test_x86_tbm_blsfill_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
|
|||
; CHECK-LABEL: test_x86_tbm_blsfill_u64_z2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rsi, %rax
|
||||
; CHECK-NEXT: leaq -1(%rdi), %rcx
|
||||
; CHECK-NEXT: orq %rdi, %rcx
|
||||
; CHECK-NEXT: blsfillq %rdi, %rcx
|
||||
; CHECK-NEXT: cmovneq %rdx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = add i64 %a, -1
|
||||
|
@ -642,10 +628,7 @@ define i32 @test_x86_tbm_blsic_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
|
|||
; CHECK-LABEL: test_x86_tbm_blsic_u32_z2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: movl %edi, %ecx
|
||||
; CHECK-NEXT: notl %ecx
|
||||
; CHECK-NEXT: decl %edi
|
||||
; CHECK-NEXT: orl %ecx, %edi
|
||||
; CHECK-NEXT: blsicl %edi, %ecx
|
||||
; CHECK-NEXT: cmovnel %edx, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = xor i32 %a, -1
|
||||
|
@ -685,10 +668,7 @@ define i64 @test_x86_tbm_blsic_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
|
|||
; CHECK-LABEL: test_x86_tbm_blsic_u64_z2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rsi, %rax
|
||||
; CHECK-NEXT: movq %rdi, %rcx
|
||||
; CHECK-NEXT: notq %rcx
|
||||
; CHECK-NEXT: decq %rdi
|
||||
; CHECK-NEXT: orq %rcx, %rdi
|
||||
; CHECK-NEXT: blsicq %rdi, %rcx
|
||||
; CHECK-NEXT: cmovneq %rdx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = xor i64 %a, -1
|
||||
|
@ -728,10 +708,7 @@ define i32 @test_x86_tbm_t1mskc_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
|
|||
; CHECK-LABEL: test_x86_tbm_t1mskc_u32_z2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %esi, %eax
|
||||
; CHECK-NEXT: movl %edi, %ecx
|
||||
; CHECK-NEXT: notl %ecx
|
||||
; CHECK-NEXT: incl %edi
|
||||
; CHECK-NEXT: orl %ecx, %edi
|
||||
; CHECK-NEXT: t1mskcl %edi, %ecx
|
||||
; CHECK-NEXT: cmovnel %edx, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = xor i32 %a, -1
|
||||
|
@ -771,10 +748,7 @@ define i64 @test_x86_tbm_t1mskc_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
|
|||
; CHECK-LABEL: test_x86_tbm_t1mskc_u64_z2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rsi, %rax
|
||||
; CHECK-NEXT: movq %rdi, %rcx
|
||||
; CHECK-NEXT: notq %rcx
|
||||
; CHECK-NEXT: incq %rdi
|
||||
; CHECK-NEXT: orq %rcx, %rdi
|
||||
; CHECK-NEXT: t1mskcq %rdi, %rcx
|
||||
; CHECK-NEXT: cmovneq %rdx, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = xor i64 %a, -1
|
||||
|
|
Loading…
Reference in New Issue