forked from OSchip/llvm-project
X86: Add patterns for X86ISD::VSEXT in registers.
Those can occur when something between the sextload and the store is on the same chain and blocks isel. Fixes PR14887. llvm-svn: 172353
This commit is contained in:
parent
7ab4fbf5c2
commit
bcd14a0f26
|
@ -5590,6 +5590,30 @@ defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
|
||||||
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
|
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
|
||||||
|
|
||||||
let Predicates = [HasAVX2] in {
|
let Predicates = [HasAVX2] in {
|
||||||
|
def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
|
||||||
|
def : Pat<(v8i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr VR128:$src)>;
|
||||||
|
def : Pat<(v4i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr VR128:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v8i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
|
||||||
|
def : Pat<(v4i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr VR128:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v4i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))),
|
||||||
|
(VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
|
||||||
|
def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))),
|
||||||
|
(VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
|
||||||
|
def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))),
|
||||||
|
(VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
|
||||||
|
|
||||||
|
def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))),
|
||||||
|
(VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
|
||||||
|
def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))),
|
||||||
|
(VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
|
||||||
|
|
||||||
|
def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))),
|
||||||
|
(VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
|
||||||
|
|
||||||
def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))),
|
def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))),
|
||||||
(VPMOVSXWDYrm addr:$src)>;
|
(VPMOVSXWDYrm addr:$src)>;
|
||||||
def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
|
def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
|
||||||
|
@ -5628,6 +5652,15 @@ let Predicates = [HasAVX] in {
|
||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [UseSSE41] in {
|
let Predicates = [UseSSE41] in {
|
||||||
|
def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>;
|
||||||
|
def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr VR128:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr VR128:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr VR128:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
|
||||||
|
|
||||||
// Common patterns involving scalar load
|
// Common patterns involving scalar load
|
||||||
def : Pat<(int_x86_sse41_pmovsxbq
|
def : Pat<(int_x86_sse41_pmovsxbq
|
||||||
(bitconvert (v4i32 (X86vzmovl
|
(bitconvert (v4i32 (X86vzmovl
|
||||||
|
@ -5727,6 +5760,15 @@ let Predicates = [HasAVX] in {
|
||||||
def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
|
def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))),
|
||||||
(VPMOVZXDQrm addr:$src)>;
|
(VPMOVZXDQrm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>;
|
||||||
|
def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr VR128:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr VR128:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
|
||||||
|
def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr VR128:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
|
||||||
|
|
||||||
def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
|
def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64
|
||||||
(scalar_to_vector (loadi64 addr:$src))))))),
|
(scalar_to_vector (loadi64 addr:$src))))))),
|
||||||
(VPMOVSXWDrm addr:$src)>;
|
(VPMOVSXWDrm addr:$src)>;
|
||||||
|
|
|
@ -0,0 +1,176 @@
|
||||||
|
; RUN: llc < %s -march=x86-64 -mcpu=penryn | FileCheck -check-prefix=SSE41 %s
|
||||||
|
; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck -check-prefix=AVX1 %s
|
||||||
|
; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck -check-prefix=AVX2 %s
|
||||||
|
|
||||||
|
; PR14887
|
||||||
|
; These tests inject a store into the chain to test the inreg versions of pmovsx
|
||||||
|
|
||||||
|
define void @test1(<2 x i8>* %in, <2 x i64>* %out) nounwind {
|
||||||
|
%wide.load35 = load <2 x i8>* %in, align 1
|
||||||
|
%sext = sext <2 x i8> %wide.load35 to <2 x i64>
|
||||||
|
store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
|
||||||
|
store <2 x i64> %sext, <2 x i64>* %out, align 8
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; SSE41: test1:
|
||||||
|
; SSE41: pmovsxbq
|
||||||
|
|
||||||
|
; AVX1: test1:
|
||||||
|
; AVX1: vpmovsxbq
|
||||||
|
|
||||||
|
; AVX2: test1:
|
||||||
|
; AVX2: vpmovsxbq
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test2(<4 x i8>* %in, <4 x i64>* %out) nounwind {
|
||||||
|
%wide.load35 = load <4 x i8>* %in, align 1
|
||||||
|
%sext = sext <4 x i8> %wide.load35 to <4 x i64>
|
||||||
|
store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
|
||||||
|
store <4 x i64> %sext, <4 x i64>* %out, align 8
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; AVX2: test2:
|
||||||
|
; AVX2: vpmovsxbq
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test3(<4 x i8>* %in, <4 x i32>* %out) nounwind {
|
||||||
|
%wide.load35 = load <4 x i8>* %in, align 1
|
||||||
|
%sext = sext <4 x i8> %wide.load35 to <4 x i32>
|
||||||
|
store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
|
||||||
|
store <4 x i32> %sext, <4 x i32>* %out, align 8
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; SSE41: test3:
|
||||||
|
; SSE41: pmovsxbd
|
||||||
|
|
||||||
|
; AVX1: test3:
|
||||||
|
; AVX1: vpmovsxbd
|
||||||
|
|
||||||
|
; AVX2: test3:
|
||||||
|
; AVX2: vpmovsxbd
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test4(<8 x i8>* %in, <8 x i32>* %out) nounwind {
|
||||||
|
%wide.load35 = load <8 x i8>* %in, align 1
|
||||||
|
%sext = sext <8 x i8> %wide.load35 to <8 x i32>
|
||||||
|
store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
|
||||||
|
store <8 x i32> %sext, <8 x i32>* %out, align 8
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; AVX2: test4:
|
||||||
|
; AVX2: vpmovsxbd
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test5(<8 x i8>* %in, <8 x i16>* %out) nounwind {
|
||||||
|
%wide.load35 = load <8 x i8>* %in, align 1
|
||||||
|
%sext = sext <8 x i8> %wide.load35 to <8 x i16>
|
||||||
|
store <8 x i16> zeroinitializer, <8 x i16>* undef, align 8
|
||||||
|
store <8 x i16> %sext, <8 x i16>* %out, align 8
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; SSE41: test5:
|
||||||
|
; SSE41: pmovsxbw
|
||||||
|
|
||||||
|
; AVX1: test5:
|
||||||
|
; AVX1: vpmovsxbw
|
||||||
|
|
||||||
|
; AVX2: test5:
|
||||||
|
; AVX2: vpmovsxbw
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind {
|
||||||
|
%wide.load35 = load <16 x i8>* %in, align 1
|
||||||
|
%sext = sext <16 x i8> %wide.load35 to <16 x i16>
|
||||||
|
store <16 x i16> zeroinitializer, <16 x i16>* undef, align 8
|
||||||
|
store <16 x i16> %sext, <16 x i16>* %out, align 8
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; AVX2: test6:
|
||||||
|
; FIXME: v16i8 -> v16i16 is scalarized.
|
||||||
|
; AVX2-NOT: pmovsx
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind {
|
||||||
|
%wide.load35 = load <2 x i16>* %in, align 1
|
||||||
|
%sext = sext <2 x i16> %wide.load35 to <2 x i64>
|
||||||
|
store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
|
||||||
|
store <2 x i64> %sext, <2 x i64>* %out, align 8
|
||||||
|
ret void
|
||||||
|
|
||||||
|
|
||||||
|
; SSE41: test7:
|
||||||
|
; SSE41: pmovsxwq
|
||||||
|
|
||||||
|
; AVX1: test7:
|
||||||
|
; AVX1: vpmovsxwq
|
||||||
|
|
||||||
|
; AVX2: test7:
|
||||||
|
; AVX2: vpmovsxwq
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test8(<4 x i16>* %in, <4 x i64>* %out) nounwind {
|
||||||
|
%wide.load35 = load <4 x i16>* %in, align 1
|
||||||
|
%sext = sext <4 x i16> %wide.load35 to <4 x i64>
|
||||||
|
store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
|
||||||
|
store <4 x i64> %sext, <4 x i64>* %out, align 8
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; AVX2: test8:
|
||||||
|
; AVX2: vpmovsxwq
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test9(<4 x i16>* %in, <4 x i32>* %out) nounwind {
|
||||||
|
%wide.load35 = load <4 x i16>* %in, align 1
|
||||||
|
%sext = sext <4 x i16> %wide.load35 to <4 x i32>
|
||||||
|
store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
|
||||||
|
store <4 x i32> %sext, <4 x i32>* %out, align 8
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; SSE41: test9:
|
||||||
|
; SSE41: pmovsxwd
|
||||||
|
|
||||||
|
; AVX1: test9:
|
||||||
|
; AVX1: vpmovsxwd
|
||||||
|
|
||||||
|
; AVX2: test9:
|
||||||
|
; AVX2: vpmovsxwd
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test10(<8 x i16>* %in, <8 x i32>* %out) nounwind {
|
||||||
|
%wide.load35 = load <8 x i16>* %in, align 1
|
||||||
|
%sext = sext <8 x i16> %wide.load35 to <8 x i32>
|
||||||
|
store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
|
||||||
|
store <8 x i32> %sext, <8 x i32>* %out, align 8
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; AVX2: test10:
|
||||||
|
; AVX2: vpmovsxwd
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test11(<2 x i32>* %in, <2 x i64>* %out) nounwind {
|
||||||
|
%wide.load35 = load <2 x i32>* %in, align 1
|
||||||
|
%sext = sext <2 x i32> %wide.load35 to <2 x i64>
|
||||||
|
store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
|
||||||
|
store <2 x i64> %sext, <2 x i64>* %out, align 8
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; SSE41: test11:
|
||||||
|
; SSE41: pmovsxdq
|
||||||
|
|
||||||
|
; AVX1: test11:
|
||||||
|
; AVX1: vpmovsxdq
|
||||||
|
|
||||||
|
; AVX2: test11:
|
||||||
|
; AVX2: vpmovsxdq
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test12(<4 x i32>* %in, <4 x i64>* %out) nounwind {
|
||||||
|
%wide.load35 = load <4 x i32>* %in, align 1
|
||||||
|
%sext = sext <4 x i32> %wide.load35 to <4 x i64>
|
||||||
|
store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
|
||||||
|
store <4 x i64> %sext, <4 x i64>* %out, align 8
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; AVX2: test12:
|
||||||
|
; AVX2: vpmovsxdq
|
||||||
|
}
|
Loading…
Reference in New Issue