[X86] Use (SUBREG_TO_REG (MOV32rm)) for extloadi64i8/extloadi64i16 when the load is 4 byte aligned or better and not volatile.

Summary:
Previously we would use MOVZXrm8/MOVZXrm16, but those are longer encodings.

This is similar to what we do in the loadi32 predicate.

Reviewers: RKSimon, spatel

Reviewed By: RKSimon

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D60341

llvm-svn: 357875
This commit is contained in:
Craig Topper 2019-04-07 19:19:44 +00:00
parent c664c2a5ec
commit 424417da79
6 changed files with 35 additions and 21 deletions

View File

@ -1279,14 +1279,16 @@ def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
// For other extloads, use subregs, since the high contents of the register are // For other extloads, use subregs, since the high contents of the register are
// defined after an extload. // defined after an extload.
// NOTE: The extloadi64i32 pattern needs to be first as it will try to form
// 32-bit loads for 4 byte aligned i8/i16 loads.
def : Pat<(extloadi64i32 addr:$src),
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
def : Pat<(extloadi64i1 addr:$src), def : Pat<(extloadi64i1 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
def : Pat<(extloadi64i8 addr:$src), def : Pat<(extloadi64i8 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
def : Pat<(extloadi64i16 addr:$src), def : Pat<(extloadi64i16 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>; (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
def : Pat<(extloadi64i32 addr:$src),
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
// anyext. Define these to do an explicit zero-extend to // anyext. Define these to do an explicit zero-extend to
// avoid partial-register updates. // avoid partial-register updates.

View File

@ -1121,7 +1121,19 @@ def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>; def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>; def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>; def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
// We can treat an i8/i16 extending load to i64 as a 32 bit load if its known
// to be 4 byte aligned or better.
def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (unindexedload node:$ptr)), [{
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType != ISD::EXTLOAD)
return false;
if (LD->getMemoryVT() == MVT::i32)
return true;
return LD->getAlignment() >= 4 && !LD->isVolatile();
}]>;
// An 'and' node with a single use. // An 'and' node with a single use.

View File

@ -413,7 +413,7 @@ define void @TestFPTruncF128_F80() nounwind {
; X64-NEXT: fstpt (%rsp) ; X64-NEXT: fstpt (%rsp)
; X64-NEXT: movq (%rsp), %rax ; X64-NEXT: movq (%rsp), %rax
; X64-NEXT: movq %rax, {{.*}}(%rip) ; X64-NEXT: movq %rax, {{.*}}(%rip)
; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movw %ax, vf80+{{.*}}(%rip) ; X64-NEXT: movw %ax, vf80+{{.*}}(%rip)
; X64-NEXT: addq $24, %rsp ; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq ; X64-NEXT: retq

View File

@ -1494,7 +1494,7 @@ entry:
define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; SSE2-LABEL: load_sext_4i1_to_4i32: ; SSE2-LABEL: load_sext_4i1_to_4i32:
; SSE2: # %bb.0: # %entry ; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movzbl (%rdi), %eax ; SSE2-NEXT: movl (%rdi), %eax
; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shlq $60, %rcx ; SSE2-NEXT: shlq $60, %rcx
; SSE2-NEXT: sarq $63, %rcx ; SSE2-NEXT: sarq $63, %rcx
@ -1517,7 +1517,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; ;
; SSSE3-LABEL: load_sext_4i1_to_4i32: ; SSSE3-LABEL: load_sext_4i1_to_4i32:
; SSSE3: # %bb.0: # %entry ; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movzbl (%rdi), %eax ; SSSE3-NEXT: movl (%rdi), %eax
; SSSE3-NEXT: movq %rax, %rcx ; SSSE3-NEXT: movq %rax, %rcx
; SSSE3-NEXT: shlq $60, %rcx ; SSSE3-NEXT: shlq $60, %rcx
; SSSE3-NEXT: sarq $63, %rcx ; SSSE3-NEXT: sarq $63, %rcx
@ -1540,7 +1540,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; ;
; SSE41-LABEL: load_sext_4i1_to_4i32: ; SSE41-LABEL: load_sext_4i1_to_4i32:
; SSE41: # %bb.0: # %entry ; SSE41: # %bb.0: # %entry
; SSE41-NEXT: movzbl (%rdi), %eax ; SSE41-NEXT: movl (%rdi), %eax
; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: movq %rax, %rcx
; SSE41-NEXT: shlq $62, %rcx ; SSE41-NEXT: shlq $62, %rcx
; SSE41-NEXT: sarq $63, %rcx ; SSE41-NEXT: sarq $63, %rcx
@ -1560,7 +1560,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; ;
; AVX1-LABEL: load_sext_4i1_to_4i32: ; AVX1-LABEL: load_sext_4i1_to_4i32:
; AVX1: # %bb.0: # %entry ; AVX1: # %bb.0: # %entry
; AVX1-NEXT: movzbl (%rdi), %eax ; AVX1-NEXT: movl (%rdi), %eax
; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shlq $62, %rcx ; AVX1-NEXT: shlq $62, %rcx
; AVX1-NEXT: sarq $63, %rcx ; AVX1-NEXT: sarq $63, %rcx
@ -1580,7 +1580,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; ;
; AVX2-LABEL: load_sext_4i1_to_4i32: ; AVX2-LABEL: load_sext_4i1_to_4i32:
; AVX2: # %bb.0: # %entry ; AVX2: # %bb.0: # %entry
; AVX2-NEXT: movzbl (%rdi), %eax ; AVX2-NEXT: movl (%rdi), %eax
; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shlq $62, %rcx ; AVX2-NEXT: shlq $62, %rcx
; AVX2-NEXT: sarq $63, %rcx ; AVX2-NEXT: sarq $63, %rcx
@ -1781,7 +1781,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
; ;
; AVX1-LABEL: load_sext_4i1_to_4i64: ; AVX1-LABEL: load_sext_4i1_to_4i64:
; AVX1: # %bb.0: # %entry ; AVX1: # %bb.0: # %entry
; AVX1-NEXT: movzbl (%rdi), %eax ; AVX1-NEXT: movl (%rdi), %eax
; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shlq $62, %rcx ; AVX1-NEXT: shlq $62, %rcx
; AVX1-NEXT: sarq $63, %rcx ; AVX1-NEXT: sarq $63, %rcx
@ -1805,7 +1805,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
; ;
; AVX2-LABEL: load_sext_4i1_to_4i64: ; AVX2-LABEL: load_sext_4i1_to_4i64:
; AVX2: # %bb.0: # %entry ; AVX2: # %bb.0: # %entry
; AVX2-NEXT: movzbl (%rdi), %eax ; AVX2-NEXT: movl (%rdi), %eax
; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shlq $60, %rcx ; AVX2-NEXT: shlq $60, %rcx
; AVX2-NEXT: sarq $63, %rcx ; AVX2-NEXT: sarq $63, %rcx

View File

@ -1494,7 +1494,7 @@ entry:
define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; SSE2-LABEL: load_sext_4i1_to_4i32: ; SSE2-LABEL: load_sext_4i1_to_4i32:
; SSE2: # %bb.0: # %entry ; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movzbl (%rdi), %eax ; SSE2-NEXT: movl (%rdi), %eax
; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shlq $60, %rcx ; SSE2-NEXT: shlq $60, %rcx
; SSE2-NEXT: sarq $63, %rcx ; SSE2-NEXT: sarq $63, %rcx
@ -1517,7 +1517,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; ;
; SSSE3-LABEL: load_sext_4i1_to_4i32: ; SSSE3-LABEL: load_sext_4i1_to_4i32:
; SSSE3: # %bb.0: # %entry ; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movzbl (%rdi), %eax ; SSSE3-NEXT: movl (%rdi), %eax
; SSSE3-NEXT: movq %rax, %rcx ; SSSE3-NEXT: movq %rax, %rcx
; SSSE3-NEXT: shlq $60, %rcx ; SSSE3-NEXT: shlq $60, %rcx
; SSSE3-NEXT: sarq $63, %rcx ; SSSE3-NEXT: sarq $63, %rcx
@ -1540,7 +1540,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; ;
; SSE41-LABEL: load_sext_4i1_to_4i32: ; SSE41-LABEL: load_sext_4i1_to_4i32:
; SSE41: # %bb.0: # %entry ; SSE41: # %bb.0: # %entry
; SSE41-NEXT: movzbl (%rdi), %eax ; SSE41-NEXT: movl (%rdi), %eax
; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: movq %rax, %rcx
; SSE41-NEXT: shlq $62, %rcx ; SSE41-NEXT: shlq $62, %rcx
; SSE41-NEXT: sarq $63, %rcx ; SSE41-NEXT: sarq $63, %rcx
@ -1560,7 +1560,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; ;
; AVX1-LABEL: load_sext_4i1_to_4i32: ; AVX1-LABEL: load_sext_4i1_to_4i32:
; AVX1: # %bb.0: # %entry ; AVX1: # %bb.0: # %entry
; AVX1-NEXT: movzbl (%rdi), %eax ; AVX1-NEXT: movl (%rdi), %eax
; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shlq $62, %rcx ; AVX1-NEXT: shlq $62, %rcx
; AVX1-NEXT: sarq $63, %rcx ; AVX1-NEXT: sarq $63, %rcx
@ -1580,7 +1580,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; ;
; AVX2-LABEL: load_sext_4i1_to_4i32: ; AVX2-LABEL: load_sext_4i1_to_4i32:
; AVX2: # %bb.0: # %entry ; AVX2: # %bb.0: # %entry
; AVX2-NEXT: movzbl (%rdi), %eax ; AVX2-NEXT: movl (%rdi), %eax
; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shlq $62, %rcx ; AVX2-NEXT: shlq $62, %rcx
; AVX2-NEXT: sarq $63, %rcx ; AVX2-NEXT: sarq $63, %rcx
@ -1781,7 +1781,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
; ;
; AVX1-LABEL: load_sext_4i1_to_4i64: ; AVX1-LABEL: load_sext_4i1_to_4i64:
; AVX1: # %bb.0: # %entry ; AVX1: # %bb.0: # %entry
; AVX1-NEXT: movzbl (%rdi), %eax ; AVX1-NEXT: movl (%rdi), %eax
; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shlq $62, %rcx ; AVX1-NEXT: shlq $62, %rcx
; AVX1-NEXT: sarq $63, %rcx ; AVX1-NEXT: sarq $63, %rcx
@ -1805,7 +1805,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
; ;
; AVX2-LABEL: load_sext_4i1_to_4i64: ; AVX2-LABEL: load_sext_4i1_to_4i64:
; AVX2: # %bb.0: # %entry ; AVX2: # %bb.0: # %entry
; AVX2-NEXT: movzbl (%rdi), %eax ; AVX2-NEXT: movl (%rdi), %eax
; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shlq $60, %rcx ; AVX2-NEXT: shlq $60, %rcx
; AVX2-NEXT: sarq $63, %rcx ; AVX2-NEXT: sarq $63, %rcx

View File

@ -5,7 +5,7 @@
define i64 @test1(i8* %data) { define i64 @test1(i8* %data) {
; CHECK-LABEL: test1: ; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movzbl (%rdi), %eax ; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: shlq $2, %rax ; CHECK-NEXT: shlq $2, %rax
; CHECK-NEXT: andl $60, %eax ; CHECK-NEXT: andl $60, %eax
; CHECK-NEXT: retq ; CHECK-NEXT: retq
@ -20,7 +20,7 @@ entry:
define i8* @test2(i8* %data) { define i8* @test2(i8* %data) {
; CHECK-LABEL: test2: ; CHECK-LABEL: test2:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movzbl (%rdi), %eax ; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: andl $15, %eax ; CHECK-NEXT: andl $15, %eax
; CHECK-NEXT: leaq (%rdi,%rax,4), %rax ; CHECK-NEXT: leaq (%rdi,%rax,4), %rax
; CHECK-NEXT: retq ; CHECK-NEXT: retq
@ -53,7 +53,7 @@ entry:
define i64 @test4(i8* %data) { define i64 @test4(i8* %data) {
; CHECK-LABEL: test4: ; CHECK-LABEL: test4:
; CHECK: # %bb.0: # %entry ; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movzbl (%rdi), %eax ; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: shrq $2, %rax ; CHECK-NEXT: shrq $2, %rax
; CHECK-NEXT: andl $60, %eax ; CHECK-NEXT: andl $60, %eax
; CHECK-NEXT: retq ; CHECK-NEXT: retq