From 648e48d02eafd538719f5115ed3bf8832383112a Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 29 Nov 2011 22:48:34 +0000 Subject: [PATCH] Add another missing pattern. llvm-gcc likes f64 but clang likes i64 so it was generating poor code for some SSE builtins. llvm-svn: 145448 --- llvm/lib/Target/X86/X86InstrSSE.td | 6 ++++++ llvm/test/CodeGen/X86/vec_shuffle-38.ll | 21 ++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 22a7fa478509..0dca0007d247 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1150,6 +1150,9 @@ let Predicates = [HasAVX] in { def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), (VMOVHPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlhps VR128:$src1, + (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), + (VMOVHPSrm VR128:$src1, addr:$src2)>; def : Pat<(X86Movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), (VMOVHPSrm VR128:$src1, addr:$src2)>; @@ -1183,6 +1186,9 @@ let Predicates = [HasSSE1] in { def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), (MOVHPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlhps VR128:$src1, + (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), + (MOVHPSrm VR128:$src1, addr:$src2)>; def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2i64 (X86vzload addr:$src2)))), (MOVHPSrm VR128:$src1, addr:$src2)>; diff --git a/llvm/test/CodeGen/X86/vec_shuffle-38.ll b/llvm/test/CodeGen/X86/vec_shuffle-38.ll index 66da013665f6..96ef883c4e1e 100644 --- a/llvm/test/CodeGen/X86/vec_shuffle-38.ll +++ b/llvm/test/CodeGen/X86/vec_shuffle-38.ll @@ -46,7 +46,7 @@ entry: ; rdar://10119696 ; CHECK: f -define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind uwtable readonly ssp { +define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind readonly ssp { entry: ; CHECK: movlps (%{{rdi|rdx}}), %xmm0 %u110.i = load double* %y, align 1 @@ -56,3 +56,22 @@ entry: ret <4 x float> %shuffle.i } +define <4 x float> @loadhpi2(%struct.Float2* nocapture %vHiCoefPtr_0, %struct.Float2* nocapture %vLoCoefPtr_0, i32 %s) nounwind readonly ssp { +entry: +; CHECK: loadhpi2 +; CHECK: movhps ( +; CHECK-NOT: movlhps + %0 = bitcast %struct.Float2* %vHiCoefPtr_0 to <1 x i64>* + %idx.ext = sext i32 %s to i64 + %add.ptr = getelementptr inbounds <1 x i64>* %0, i64 %idx.ext + %add.ptr.val = load <1 x i64>* %add.ptr, align 1 + %1 = bitcast <1 x i64> %add.ptr.val to <2 x float> + %shuffle.i = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> + %2 = bitcast %struct.Float2* %vLoCoefPtr_0 to <1 x i64>* + %add.ptr2 = getelementptr inbounds <1 x i64>* %2, i64 %idx.ext + %add.ptr2.val = load <1 x i64>* %add.ptr2, align 1 + %3 = bitcast <1 x i64> %add.ptr2.val to <2 x float> + %shuffle.i4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> + %shuffle1.i5 = shufflevector <4 x float> %shuffle.i, <4 x float> %shuffle.i4, <4 x i32> + ret <4 x float> %shuffle1.i5 +}