forked from OSchip/llvm-project
Fix a couple of shuffle patterns to use movhlps instead
of movhps as the constraint. Changes optimizations so update testcases as appropriate as well. llvm-svn: 86360
This commit is contained in:
parent
b6a3dd48f4
commit
bd05185ef1
|
@ -3051,15 +3051,15 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
|
|||
|
||||
let AddedComplexity = 20 in {
|
||||
// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
|
||||
// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
|
||||
// vector_shuffle v1, (load v2) <6, 7, 2, 3> using MOVHPS
|
||||
def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
|
||||
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
|
||||
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))),
|
||||
(MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))),
|
||||
(MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v4f32 (movhlps (load addr:$src1), VR128:$src2)),
|
||||
(MOVHPSrm VR128:$src2, addr:$src1)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(v2f64 (movhlps (load addr:$src1), VR128:$src2)),
|
||||
(MOVHPDrm VR128:$src2, addr:$src1)>, Requires<[HasSSE2]>;
|
||||
|
||||
def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
|
||||
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||
|
@ -3077,9 +3077,9 @@ def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
|
|||
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
def : Pat<(store (v4f32 (movhlps (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
|
||||
def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
def : Pat<(store (v2f64 (movhlps (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
|
||||
|
||||
def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
|
||||
|
|
|
@ -145,7 +145,9 @@ define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
|
|||
ret void
|
||||
; X64: t9:
|
||||
; X64: movsd (%rsi), %xmm0
|
||||
; X64: movhps %xmm0, (%rdi)
|
||||
; X64: movaps (%rdi), %xmm1
|
||||
; X64: movlhps %xmm0, %xmm1
|
||||
; X64: movaps %xmm1, (%rdi)
|
||||
; X64: ret
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
|
||||
; RUN: grep movlhps %t | count 1
|
||||
; RUN: grep movhlps %t | count 1
|
||||
; RUN: grep movhps %t | count 1
|
||||
|
||||
define <4 x float> @test1(<4 x float>* %x, <4 x float>* %y) {
|
||||
%tmp = load <4 x float>* %y ; <<4 x float>> [#uses=2]
|
||||
|
@ -18,4 +18,3 @@ entry:
|
|||
%tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %tmp4
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue