forked from OSchip/llvm-project
Select an OR with immediate as an ADD if the input bits are known zero. This allow the instruction to be 3address-fied if needed.
llvm-svn: 93152
This commit is contained in:
parent
206351a1ff
commit
64d9f40557
|
@ -1106,13 +1106,13 @@ def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst),
|
|||
def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst),
|
||||
(ins GR64:$src1, i64i8imm:$src2),
|
||||
"or{q}\t{$src2, $dst|$dst, $src2}",
|
||||
[(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)),
|
||||
(implicit EFLAGS)]>;
|
||||
[(set GR64:$dst, (or_not_add GR64:$src1, i64immSExt8:$src2)),
|
||||
(implicit EFLAGS)]>;
|
||||
def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst),
|
||||
(ins GR64:$src1, i64i32imm:$src2),
|
||||
"or{q}\t{$src2, $dst|$dst, $src2}",
|
||||
[(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)),
|
||||
(implicit EFLAGS)]>;
|
||||
[(set GR64:$dst, (or_not_add GR64:$src1, i64immSExt32:$src2)),
|
||||
(implicit EFLAGS)]>;
|
||||
} // isTwoAddress
|
||||
|
||||
def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
|
||||
|
@ -2114,6 +2114,14 @@ def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),
|
|||
GR64:$src2, (i8 imm:$amt2)), addr:$dst),
|
||||
(SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
|
||||
|
||||
// (or x, c) -> (add x, c) if masked bits are known zero.
|
||||
def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt8:$src2),
|
||||
(implicit EFLAGS)),
|
||||
(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
|
||||
def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt32:$src2),
|
||||
(implicit EFLAGS)),
|
||||
(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
|
||||
|
||||
// X86 specific add which produces a flag.
|
||||
def : Pat<(addc GR64:$src1, GR64:$src2),
|
||||
(ADD64rr GR64:$src1, GR64:$src2)>;
|
||||
|
|
|
@ -493,6 +493,18 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
|
|||
return N->hasOneUse();
|
||||
}]>;
|
||||
|
||||
// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
|
||||
def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
|
||||
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
|
||||
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
|
||||
return false;
|
||||
}]>;
|
||||
def or_not_add : PatFrag<(ops node:$lhs, node:$rhs),(or node:$lhs, node:$rhs),[{
|
||||
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
||||
if (!CN) return true;
|
||||
return !CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
|
||||
}]>;
|
||||
|
||||
// 'shld' and 'shrd' instruction patterns. Note that even though these have
|
||||
// the srl and shl in their patterns, the C++ code must still check for them,
|
||||
// because predicates are tested before children nodes are explored.
|
||||
|
@ -1880,28 +1892,28 @@ def OR32rm : I<0x0B, MRMSrcMem , (outs GR32:$dst),
|
|||
def OR8ri : Ii8 <0x80, MRM1r, (outs GR8 :$dst),
|
||||
(ins GR8 :$src1, i8imm:$src2),
|
||||
"or{b}\t{$src2, $dst|$dst, $src2}",
|
||||
[(set GR8:$dst, (or GR8:$src1, imm:$src2)),
|
||||
[(set GR8:$dst, (or_not_add GR8:$src1, imm:$src2)),
|
||||
(implicit EFLAGS)]>;
|
||||
def OR16ri : Ii16<0x81, MRM1r, (outs GR16:$dst),
|
||||
(ins GR16:$src1, i16imm:$src2),
|
||||
"or{w}\t{$src2, $dst|$dst, $src2}",
|
||||
[(set GR16:$dst, (or GR16:$src1, imm:$src2)),
|
||||
[(set GR16:$dst, (or_not_add GR16:$src1, imm:$src2)),
|
||||
(implicit EFLAGS)]>, OpSize;
|
||||
def OR32ri : Ii32<0x81, MRM1r, (outs GR32:$dst),
|
||||
(ins GR32:$src1, i32imm:$src2),
|
||||
"or{l}\t{$src2, $dst|$dst, $src2}",
|
||||
[(set GR32:$dst, (or GR32:$src1, imm:$src2)),
|
||||
[(set GR32:$dst, (or_not_add GR32:$src1, imm:$src2)),
|
||||
(implicit EFLAGS)]>;
|
||||
|
||||
def OR16ri8 : Ii8<0x83, MRM1r, (outs GR16:$dst),
|
||||
(ins GR16:$src1, i16i8imm:$src2),
|
||||
"or{w}\t{$src2, $dst|$dst, $src2}",
|
||||
[(set GR16:$dst, (or GR16:$src1, i16immSExt8:$src2)),
|
||||
[(set GR16:$dst, (or_not_add GR16:$src1, i16immSExt8:$src2)),
|
||||
(implicit EFLAGS)]>, OpSize;
|
||||
def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst),
|
||||
(ins GR32:$src1, i32i8imm:$src2),
|
||||
"or{l}\t{$src2, $dst|$dst, $src2}",
|
||||
[(set GR32:$dst, (or GR32:$src1, i32immSExt8:$src2)),
|
||||
[(set GR32:$dst, (or_not_add GR32:$src1, i32immSExt8:$src2)),
|
||||
(implicit EFLAGS)]>;
|
||||
let isTwoAddress = 0 in {
|
||||
def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
|
||||
|
@ -4647,6 +4659,23 @@ def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
|||
def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
|
||||
(SETB_C32r)>;
|
||||
|
||||
// (or x, c) -> (add x, c) if masked bits are known zero.
|
||||
def : Pat<(parallel (or_is_add GR8:$src1, imm:$src2),
|
||||
(implicit EFLAGS)),
|
||||
(ADD8ri GR8:$src1, imm:$src2)>;
|
||||
def : Pat<(parallel (or_is_add GR16:$src1, imm:$src2),
|
||||
(implicit EFLAGS)),
|
||||
(ADD16ri GR16:$src1, imm:$src2)>;
|
||||
def : Pat<(parallel (or_is_add GR32:$src1, imm:$src2),
|
||||
(implicit EFLAGS)),
|
||||
(ADD32ri GR32:$src1, imm:$src2)>;
|
||||
def : Pat<(parallel (or_is_add GR16:$src1, i16immSExt8:$src2),
|
||||
(implicit EFLAGS)),
|
||||
(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
|
||||
def : Pat<(parallel (or_is_add GR32:$src1, i32immSExt8:$src2),
|
||||
(implicit EFLAGS)),
|
||||
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// EFLAGS-defining Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -1,12 +1,18 @@
|
|||
; RUN: llc < %s | grep -E {sar|shl|mov|or} | count 4
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
; Check that the shr(shl X, 56), 48) is not mistakenly turned into
|
||||
; a shr (X, -8) that gets subsequently "optimized away" as undef
|
||||
; PR4254
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i64 @foo(i64 %b) nounwind readnone {
|
||||
entry:
|
||||
; CHECK: foo:
|
||||
; CHECK: shlq $56, %rdi
|
||||
; CHECK: sarq $48, %rdi
|
||||
; CHECK: leaq 1(%rdi), %rax
|
||||
%shl = shl i64 %b, 56 ; <i64> [#uses=1]
|
||||
%shr = ashr i64 %shl, 48 ; <i64> [#uses=1]
|
||||
%add5 = or i64 %shr, 1 ; <i64> [#uses=1]
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
|
||||
; rdar://7527734
|
||||
|
||||
define i32 @test(i32 %x) nounwind readnone ssp {
|
||||
entry:
|
||||
; CHECK: test:
|
||||
; CHECK: leal 3(%rdi), %eax
|
||||
%0 = shl i32 %x, 5 ; <i32> [#uses=1]
|
||||
%1 = or i32 %0, 3 ; <i32> [#uses=1]
|
||||
ret i32 %1
|
||||
}
|
Loading…
Reference in New Issue