forked from OSchip/llvm-project
Add another missing pattern. llvm-gcc likes f64 but clang likes i64 so it was generating poor code for some SSE builtins.
llvm-svn: 145448
This commit is contained in:
parent
507840dbff
commit
648e48d02e
|
@ -1150,6 +1150,9 @@ let Predicates = [HasAVX] in {
|
|||
def : Pat<(X86Movlhps VR128:$src1,
|
||||
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
|
||||
(VMOVHPSrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(X86Movlhps VR128:$src1,
|
||||
(bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
|
||||
(VMOVHPSrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(X86Movlhps VR128:$src1,
|
||||
(bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
|
||||
(VMOVHPSrm VR128:$src1, addr:$src2)>;
|
||||
|
@ -1183,6 +1186,9 @@ let Predicates = [HasSSE1] in {
|
|||
def : Pat<(X86Movlhps VR128:$src1,
|
||||
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
|
||||
(MOVHPSrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(X86Movlhps VR128:$src1,
|
||||
(bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
|
||||
(MOVHPSrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(X86Movlhps VR128:$src1,
|
||||
(bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
|
||||
(MOVHPSrm VR128:$src1, addr:$src2)>;
|
||||
|
|
|
@ -46,7 +46,7 @@ entry:
|
|||
|
||||
; rdar://10119696
|
||||
; CHECK: f
|
||||
define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind uwtable readonly ssp {
|
||||
define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind readonly ssp {
|
||||
entry:
|
||||
; CHECK: movlps (%{{rdi|rdx}}), %xmm0
|
||||
%u110.i = load double* %y, align 1
|
||||
|
@ -56,3 +56,22 @@ entry:
|
|||
ret <4 x float> %shuffle.i
|
||||
}
|
||||
|
||||
define <4 x float> @loadhpi2(%struct.Float2* nocapture %vHiCoefPtr_0, %struct.Float2* nocapture %vLoCoefPtr_0, i32 %s) nounwind readonly ssp {
|
||||
entry:
|
||||
; CHECK: loadhpi2
|
||||
; CHECK: movhps (
|
||||
; CHECK-NOT: movlhps
|
||||
%0 = bitcast %struct.Float2* %vHiCoefPtr_0 to <1 x i64>*
|
||||
%idx.ext = sext i32 %s to i64
|
||||
%add.ptr = getelementptr inbounds <1 x i64>* %0, i64 %idx.ext
|
||||
%add.ptr.val = load <1 x i64>* %add.ptr, align 1
|
||||
%1 = bitcast <1 x i64> %add.ptr.val to <2 x float>
|
||||
%shuffle.i = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
%2 = bitcast %struct.Float2* %vLoCoefPtr_0 to <1 x i64>*
|
||||
%add.ptr2 = getelementptr inbounds <1 x i64>* %2, i64 %idx.ext
|
||||
%add.ptr2.val = load <1 x i64>* %add.ptr2, align 1
|
||||
%3 = bitcast <1 x i64> %add.ptr2.val to <2 x float>
|
||||
%shuffle.i4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
%shuffle1.i5 = shufflevector <4 x float> %shuffle.i, <4 x float> %shuffle.i4, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
ret <4 x float> %shuffle1.i5
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue