forked from OSchip/llvm-project
Don't use INSERT_SUBREG to model anyext operations on x86-64, as it
leads to partial-register definitions. To help avoid redundant zero-extensions, also teach the h-register matching patterns that use movzbl to match anyext as well as zext. llvm-svn: 80099
This commit is contained in:
parent
d926b985df
commit
6c23fa2442
|
@ -1599,30 +1599,15 @@ def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
|
|||
// For other extloads, use subregs, since the high contents of the register are
|
||||
// defined after an extload.
|
||||
def : Pat<(extloadi64i32 addr:$src),
|
||||
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), (MOV32rm addr:$src),
|
||||
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
|
||||
x86_subreg_32bit)>;
|
||||
def : Pat<(extloadi16i1 addr:$src),
|
||||
(INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src),
|
||||
x86_subreg_8bit)>,
|
||||
Requires<[In64BitMode]>;
|
||||
def : Pat<(extloadi16i8 addr:$src),
|
||||
(INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src),
|
||||
x86_subreg_8bit)>,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
// anyext
|
||||
def : Pat<(i64 (anyext GR8:$src)),
|
||||
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>;
|
||||
def : Pat<(i64 (anyext GR16:$src)),
|
||||
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
|
||||
def : Pat<(i64 (anyext GR32:$src)),
|
||||
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, x86_subreg_32bit)>;
|
||||
def : Pat<(i16 (anyext GR8:$src)),
|
||||
(INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>,
|
||||
Requires<[In64BitMode]>;
|
||||
def : Pat<(i32 (anyext GR8:$src)),
|
||||
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>,
|
||||
Requires<[In64BitMode]>;
|
||||
// anyext. Define these to do an explicit zero-extend to
|
||||
// avoid partial-register updates.
|
||||
def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
|
||||
def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
|
||||
def : Pat<(i64 (anyext GR32:$src)),
|
||||
(SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Some peepholes
|
||||
|
@ -1720,6 +1705,11 @@ def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
|
|||
(EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
|
||||
x86_subreg_8bit_hi))>,
|
||||
Requires<[In64BitMode]>;
|
||||
def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
|
||||
(MOVZX32_NOREXrr8
|
||||
(EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
|
||||
x86_subreg_8bit_hi))>,
|
||||
Requires<[In64BitMode]>;
|
||||
def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
|
||||
(SUBREG_TO_REG
|
||||
(i64 0),
|
||||
|
@ -1727,6 +1717,13 @@ def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
|
|||
(EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
|
||||
x86_subreg_8bit_hi)),
|
||||
x86_subreg_32bit)>;
|
||||
def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
|
||||
(SUBREG_TO_REG
|
||||
(i64 0),
|
||||
(MOVZX32_NOREXrr8
|
||||
(EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
|
||||
x86_subreg_8bit_hi)),
|
||||
x86_subreg_32bit)>;
|
||||
|
||||
// h-register extract and store.
|
||||
def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
|
||||
|
|
|
@ -3630,21 +3630,17 @@ def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
|
|||
|
||||
// extload bool -> extload byte
|
||||
def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
|
||||
def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>,
|
||||
Requires<[In32BitMode]>;
|
||||
def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
|
||||
def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
|
||||
def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>,
|
||||
Requires<[In32BitMode]>;
|
||||
def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
|
||||
def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
|
||||
def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
|
||||
|
||||
// anyext
|
||||
def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>,
|
||||
Requires<[In32BitMode]>;
|
||||
def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>,
|
||||
Requires<[In32BitMode]>;
|
||||
def : Pat<(i32 (anyext GR16:$src)),
|
||||
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
|
||||
// anyext. Define these to do an explicit zero-extend to
|
||||
// avoid partial-register updates.
|
||||
def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
|
||||
def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
|
||||
def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>;
|
||||
|
||||
// (and (i32 load), 255) -> (zextload i8)
|
||||
def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))),
|
||||
|
@ -3725,6 +3721,10 @@ def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
|
|||
(MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
|
||||
x86_subreg_8bit_hi))>,
|
||||
Requires<[In32BitMode]>;
|
||||
def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
|
||||
(MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD),
|
||||
x86_subreg_8bit_hi))>,
|
||||
Requires<[In32BitMode]>;
|
||||
def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
|
||||
(MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD),
|
||||
x86_subreg_8bit_hi))>,
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
; RUN: llvm-as < %s | llc -march=x86-64 | grep movzbl | count 2
|
||||
|
||||
; Use movzbl to avoid partial-register updates.
|
||||
|
||||
define i32 @foo(i32 %p, i8 zeroext %x) nounwind {
|
||||
%q = trunc i32 %p to i8
|
||||
%r = udiv i8 %q, %x
|
||||
%s = zext i8 %r to i32
|
||||
%t = and i32 %s, 1
|
||||
ret i32 %t
|
||||
}
|
||||
define i32 @bar(i32 %p, i16 zeroext %x) nounwind {
|
||||
%q = trunc i32 %p to i16
|
||||
%r = udiv i16 %q, %x
|
||||
%s = zext i16 %r to i32
|
||||
%t = and i32 %s, 1
|
||||
ret i32 %t
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 2
|
||||
; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 3
|
||||
|
||||
define fastcc i32 @sqlite3ExprResolveNames() nounwind {
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue