[X86] Prefer unpckhpd over movhlps in isel for fake unary cases

In r337348, I changed lowering to prefer X86ISD::UNPCKL/UNPCKH opcodes over MOVLHPS/MOVHLPS for v2f64 {0,0} and {1,1} shuffles when we have SSE2. This enabled the removal of a bunch of weirdly bitcasted isel patterns in r337349. To avoid changing the tests I placed a gross hack in isel to still emit movhlps instructions for fake unary unpckh nodes. A similar hack was not needed for unpckl and movlhps because we do execution domain switching for those. But unpckh and movhlps have swapped operand order.

This patch removes the hack.

This is a code size increase since unpckhpd requires a 0x66 prefix and movhlps does not. But if that's a big concern we should be using movhlps for all unpckhpd opcodes and let commuteInstruction turnit into unpckhpd when its an advantage.

Differential Revision: https://reviews.llvm.org/D49499

llvm-svn: 341973
This commit is contained in:
Craig Topper 2018-09-11 17:57:27 +00:00
parent cc9efaffad
commit 8238580aae
22 changed files with 250 additions and 263 deletions

View File

@ -820,19 +820,6 @@ let Constraints = "$src1 = $dst" in {
Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable;
}
// TODO: This is largely to trick fastisel into ignoring the pattern.
def UnpckhUnary : PatFrag<(ops node:$src1, node:$src2),
(X86Unpckh node:$src1, node:$src2), [{
return N->getOperand(0) == N->getOperand(1);
}]>;
let Predicates = [UseSSE2] in {
// TODO: This is a hack pattern to allow lowering to emit unpckh instead of
// movhlps for sse2 without changing a bunch of tests.
def : Pat<(v2f64 (UnpckhUnary VR128:$src, VR128:$src)),
(MOVHLPSrr VR128:$src, VR128:$src)>;
}
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Conversion Instructions
//===----------------------------------------------------------------------===//

View File

@ -38,7 +38,7 @@ define <4 x float> @test_negative_zero_1(<4 x float> %A) {
; SSE2-LABEL: test_negative_zero_1:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: xorps %xmm2, %xmm2
; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero

View File

@ -197,7 +197,7 @@ define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float
; SSE-NEXT: cvtss2sd %xmm2, %xmm4
; SSE-NEXT: movshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
; SSE-NEXT: movaps %xmm2, %xmm6
; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm2[1],xmm6[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1],xmm2[1]
; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm2[2,3]
; SSE-NEXT: movaps {{.*#+}} xmm7
; SSE-NEXT: movaps %xmm0, %xmm2
@ -213,7 +213,7 @@ define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float
; SSE-NEXT: orps %xmm0, %xmm4
; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: andps %xmm7, %xmm0
; SSE-NEXT: cvtss2sd %xmm3, %xmm3
; SSE-NEXT: andps %xmm8, %xmm3
@ -260,7 +260,7 @@ define <4 x float> @combine_vec_fcopysign_fptrunc_sgn(<4 x float> %x, <4 x doubl
; SSE-NEXT: orps %xmm6, %xmm1
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: movaps %xmm3, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE-NEXT: andps %xmm5, %xmm1
; SSE-NEXT: xorps %xmm6, %xmm6
; SSE-NEXT: cvtsd2ss %xmm2, %xmm6

View File

@ -57,9 +57,9 @@ define <2 x float> @complex_square_f32(<2 x float>) #0 {
define <2 x double> @complex_square_f64(<2 x double>) #0 {
; SSE-LABEL: complex_square_f64:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: movapd %xmm0, %xmm2
; SSE-NEXT: addsd %xmm0, %xmm2
; SSE-NEXT: mulsd %xmm1, %xmm2
; SSE-NEXT: mulsd %xmm0, %xmm0
@ -160,11 +160,11 @@ define <2 x float> @complex_mul_f32(<2 x float>, <2 x float>) #0 {
define <2 x double> @complex_mul_f64(<2 x double>, <2 x double>) #0 {
; SSE-LABEL: complex_mul_f64:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE-NEXT: movaps %xmm1, %xmm3
; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
; SSE-NEXT: movaps %xmm3, %xmm4
; SSE-NEXT: movapd %xmm0, %xmm2
; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE-NEXT: movapd %xmm1, %xmm3
; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE-NEXT: movapd %xmm3, %xmm4
; SSE-NEXT: mulsd %xmm0, %xmm4
; SSE-NEXT: mulsd %xmm1, %xmm0
; SSE-NEXT: mulsd %xmm2, %xmm1

View File

@ -67,7 +67,7 @@ define <4 x float> @trunc_unsigned_v4f32(<4 x float> %x) #0 {
; SSE2-NEXT: cvttss2si %xmm1, %rax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: movaps %xmm0, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE2-NEXT: cvttss2si %xmm2, %rax
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
@ -105,10 +105,10 @@ define <4 x float> @trunc_unsigned_v4f32(<4 x float> %x) #0 {
define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 {
; SSE2-LABEL: trunc_unsigned_v2f64:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: movapd %xmm0, %xmm1
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; SSE2-NEXT: movaps %xmm1, %xmm3
; SSE2-NEXT: movapd %xmm1, %xmm3
; SSE2-NEXT: subsd %xmm2, %xmm3
; SSE2-NEXT: cvttsd2si %xmm3, %rax
; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
@ -116,7 +116,7 @@ define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 {
; SSE2-NEXT: cvttsd2si %xmm1, %rdx
; SSE2-NEXT: ucomisd %xmm2, %xmm1
; SSE2-NEXT: cmovaeq %rax, %rdx
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm0, %xmm1
; SSE2-NEXT: subsd %xmm2, %xmm1
; SSE2-NEXT: cvttsd2si %xmm1, %rax
; SSE2-NEXT: xorq %rcx, %rax
@ -155,10 +155,10 @@ define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 {
define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) #0 {
; SSE2-LABEL: trunc_unsigned_v4f64:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm1, %xmm3
; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
; SSE2-NEXT: movapd %xmm1, %xmm3
; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; SSE2-NEXT: movaps %xmm3, %xmm4
; SSE2-NEXT: movapd %xmm3, %xmm4
; SSE2-NEXT: subsd %xmm2, %xmm4
; SSE2-NEXT: cvttsd2si %xmm4, %rcx
; SSE2-NEXT: movabsq $-9223372036854775808, %rdx # imm = 0x8000000000000000
@ -166,23 +166,23 @@ define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) #0 {
; SSE2-NEXT: cvttsd2si %xmm3, %rax
; SSE2-NEXT: ucomisd %xmm2, %xmm3
; SSE2-NEXT: cmovaeq %rcx, %rax
; SSE2-NEXT: movaps %xmm1, %xmm3
; SSE2-NEXT: movapd %xmm1, %xmm3
; SSE2-NEXT: subsd %xmm2, %xmm3
; SSE2-NEXT: cvttsd2si %xmm3, %rsi
; SSE2-NEXT: xorq %rdx, %rsi
; SSE2-NEXT: cvttsd2si %xmm1, %rcx
; SSE2-NEXT: ucomisd %xmm2, %xmm1
; SSE2-NEXT: cmovaeq %rsi, %rcx
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: movaps %xmm1, %xmm3
; SSE2-NEXT: movapd %xmm0, %xmm1
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: movapd %xmm1, %xmm3
; SSE2-NEXT: subsd %xmm2, %xmm3
; SSE2-NEXT: cvttsd2si %xmm3, %rsi
; SSE2-NEXT: xorq %rdx, %rsi
; SSE2-NEXT: cvttsd2si %xmm1, %rdi
; SSE2-NEXT: ucomisd %xmm2, %xmm1
; SSE2-NEXT: cmovaeq %rsi, %rdi
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm0, %xmm1
; SSE2-NEXT: subsd %xmm2, %xmm1
; SSE2-NEXT: cvttsd2si %xmm1, %rsi
; SSE2-NEXT: xorq %rdx, %rsi

View File

@ -902,7 +902,7 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) {
; SSE-LABEL: not_a_hsub_2:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE-NEXT: movaps %xmm0, %xmm3
; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3]
; SSE-NEXT: subss %xmm3, %xmm2
@ -912,7 +912,7 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: movaps %xmm1, %xmm2
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1],xmm1[2,3]
; SSE-NEXT: movaps %xmm1, %xmm3
; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE-NEXT: subss %xmm3, %xmm2
; SSE-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
; SSE-NEXT: subss %xmm3, %xmm1
@ -958,11 +958,11 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) {
define <2 x double> @not_a_hsub_3(<2 x double> %A, <2 x double> %B) {
; SSE-LABEL: not_a_hsub_3:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm1, %xmm2
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE-NEXT: movapd %xmm1, %xmm2
; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE-NEXT: subsd %xmm2, %xmm1
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE-NEXT: movapd %xmm0, %xmm2
; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE-NEXT: subsd %xmm0, %xmm2
; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; SSE-NEXT: movapd %xmm2, %xmm0

View File

@ -10,7 +10,7 @@ define float @pr26491(<4 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
; SSE2-NEXT: addps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: retq
;
@ -19,7 +19,7 @@ define float @pr26491(<4 x float> %a0) {
; SSSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSSE3-NEXT: addps %xmm0, %xmm1
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSSE3-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSSE3-NEXT: addss %xmm1, %xmm0
; SSSE3-NEXT: retq
;

View File

@ -102,8 +102,8 @@ define <4 x float> @test4_undef(<4 x float> %a, <4 x float> %b) {
define <2 x double> @test5_undef(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: test5_undef:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: addsd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
@ -168,7 +168,7 @@ define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) {
; SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE-NEXT: addss %xmm0, %xmm1
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE-NEXT: addss %xmm2, %xmm0
; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]

View File

@ -25,7 +25,7 @@ define <3 x double> @v3f2d_ext_vec(<3 x float> %v1) nounwind {
; SSE-NEXT: cvtps2pd %xmm0, %xmm0
; SSE-NEXT: movlps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm2, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE-NEXT: fldl -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq

View File

@ -432,9 +432,9 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: subss %xmm3, %xmm2
; SSE-NEXT: movaps %xmm0, %xmm4
; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
; SSE-NEXT: movaps %xmm1, %xmm5
; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1]
; SSE-NEXT: subss %xmm5, %xmm4
; SSE-NEXT: movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
; SSE-NEXT: addss %xmm3, %xmm5

View File

@ -1575,7 +1575,7 @@ define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
; SSE-NEXT: cvttss2si %xmm1, %rax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE-NEXT: cvttss2si %xmm2, %rax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
@ -1858,7 +1858,7 @@ define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
; SSE-NEXT: cvttss2si %xmm0, %rax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movaps %xmm2, %xmm3
; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm2[1],xmm3[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1]
; SSE-NEXT: cvttss2si %xmm3, %rax
; SSE-NEXT: movd %eax, %xmm3
; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
@ -1874,7 +1874,7 @@ define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
; SSE-NEXT: cvttss2si %xmm2, %rax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: movaps %xmm1, %xmm3
; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE-NEXT: cvttss2si %xmm3, %rax
; SSE-NEXT: movd %eax, %xmm3
; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]

View File

@ -88,7 +88,7 @@ define <3 x double> @constrained_vector_fdiv_v3f64() {
; NO-FMA-NEXT: divsd {{.*}}(%rip), %xmm1
; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: movapd %xmm0, %xmm1
; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: retq
;
@ -473,7 +473,7 @@ define <3 x double> @constrained_vector_fmul_v3f64() {
; NO-FMA-NEXT: mulsd {{.*}}(%rip), %xmm1
; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: movapd %xmm0, %xmm1
; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: retq
;
@ -604,7 +604,7 @@ define <3 x double> @constrained_vector_fadd_v3f64() {
; NO-FMA-NEXT: addsd {{.*}}(%rip), %xmm1
; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: movapd %xmm0, %xmm1
; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: retq
;
@ -737,7 +737,7 @@ define <3 x double> @constrained_vector_fsub_v3f64() {
; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0
; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: movapd %xmm0, %xmm1
; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: retq
;
@ -1210,7 +1210,7 @@ define <3 x double> @constrained_vector_sqrt_v3f64() {
; NO-FMA-NEXT: sqrtpd {{.*}}(%rip), %xmm0
; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: movapd %xmm0, %xmm1
; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: retq
;

View File

@ -41,7 +41,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
; SSE2-LABEL: test_v4f32:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm1, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: addps %xmm1, %xmm2
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3]
@ -51,7 +51,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
; SSE41-LABEL: test_v4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE41-NEXT: addps %xmm1, %xmm0
; SSE41-NEXT: haddps %xmm0, %xmm0
; SSE41-NEXT: retq
@ -78,7 +78,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; SSE2: # %bb.0:
; SSE2-NEXT: addps %xmm2, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: addps %xmm1, %xmm2
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3]
@ -89,7 +89,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; SSE41: # %bb.0:
; SSE41-NEXT: addps %xmm2, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE41-NEXT: addps %xmm1, %xmm0
; SSE41-NEXT: haddps %xmm0, %xmm0
; SSE41-NEXT: retq
@ -126,7 +126,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE2-NEXT: addps %xmm3, %xmm1
; SSE2-NEXT: addps %xmm2, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: addps %xmm1, %xmm2
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3]
@ -139,7 +139,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE41-NEXT: addps %xmm3, %xmm1
; SSE41-NEXT: addps %xmm2, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE41-NEXT: addps %xmm1, %xmm0
; SSE41-NEXT: haddps %xmm0, %xmm0
; SSE41-NEXT: retq
@ -208,7 +208,7 @@ define float @test_v4f32_zero(<4 x float> %a0) {
; SSE2-LABEL: test_v4f32_zero:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
@ -218,7 +218,7 @@ define float @test_v4f32_zero(<4 x float> %a0) {
; SSE41-LABEL: test_v4f32_zero:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: addps %xmm0, %xmm1
; SSE41-NEXT: haddps %xmm1, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
@ -246,7 +246,7 @@ define float @test_v8f32_zero(<8 x float> %a0) {
; SSE2: # %bb.0:
; SSE2-NEXT: addps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
@ -257,7 +257,7 @@ define float @test_v8f32_zero(<8 x float> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: addps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: addps %xmm0, %xmm1
; SSE41-NEXT: haddps %xmm1, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
@ -295,7 +295,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
; SSE2-NEXT: addps %xmm2, %xmm0
; SSE2-NEXT: addps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
@ -308,7 +308,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
; SSE41-NEXT: addps %xmm2, %xmm0
; SSE41-NEXT: addps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: addps %xmm0, %xmm1
; SSE41-NEXT: haddps %xmm1, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
@ -378,7 +378,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
; SSE2-LABEL: test_v4f32_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
@ -388,7 +388,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
; SSE41-LABEL: test_v4f32_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: addps %xmm0, %xmm1
; SSE41-NEXT: haddps %xmm1, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
@ -416,7 +416,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; SSE2: # %bb.0:
; SSE2-NEXT: addps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
@ -427,7 +427,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: addps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: addps %xmm0, %xmm1
; SSE41-NEXT: haddps %xmm1, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
@ -465,7 +465,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE2-NEXT: addps %xmm2, %xmm0
; SSE2-NEXT: addps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
@ -478,7 +478,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE41-NEXT: addps %xmm2, %xmm0
; SSE41-NEXT: addps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: addps %xmm0, %xmm1
; SSE41-NEXT: haddps %xmm1, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
@ -520,8 +520,8 @@ define float @test_v16f32_undef(<16 x float> %a0) {
define double @test_v2f64(double %a0, <2 x double> %a1) {
; SSE2-LABEL: test_v2f64:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: retq
;
@ -549,7 +549,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) {
; SSE2: # %bb.0:
; SSE2-NEXT: addpd %xmm2, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: retq
;
@ -588,7 +588,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) {
; SSE2-NEXT: addpd %xmm3, %xmm1
; SSE2-NEXT: addpd %xmm2, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: retq
;
@ -637,7 +637,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) {
; SSE2-NEXT: addpd %xmm2, %xmm4
; SSE2-NEXT: addpd %xmm1, %xmm4
; SSE2-NEXT: movapd %xmm4, %xmm0
; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm4[1],xmm0[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm4[1]
; SSE2-NEXT: addpd %xmm4, %xmm0
; SSE2-NEXT: retq
;
@ -689,8 +689,8 @@ define double @test_v16f64(double %a0, <16 x double> %a1) {
define double @test_v2f64_zero(<2 x double> %a0) {
; SSE2-LABEL: test_v2f64_zero:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: movapd %xmm0, %xmm1
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
@ -718,7 +718,7 @@ define double @test_v4f64_zero(<4 x double> %a0) {
; SSE2: # %bb.0:
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
@ -757,7 +757,7 @@ define double @test_v8f64_zero(<8 x double> %a0) {
; SSE2-NEXT: addpd %xmm2, %xmm0
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
@ -806,7 +806,7 @@ define double @test_v16f64_zero(<16 x double> %a0) {
; SSE2-NEXT: addpd %xmm3, %xmm1
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: retq
;
@ -858,8 +858,8 @@ define double @test_v16f64_zero(<16 x double> %a0) {
define double @test_v2f64_undef(<2 x double> %a0) {
; SSE2-LABEL: test_v2f64_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: movapd %xmm0, %xmm1
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
@ -887,7 +887,7 @@ define double @test_v4f64_undef(<4 x double> %a0) {
; SSE2: # %bb.0:
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
@ -926,7 +926,7 @@ define double @test_v8f64_undef(<8 x double> %a0) {
; SSE2-NEXT: addpd %xmm2, %xmm0
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
@ -975,7 +975,7 @@ define double @test_v16f64_undef(<16 x double> %a0) {
; SSE2-NEXT: addpd %xmm3, %xmm1
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: retq
;

View File

@ -50,7 +50,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
@ -62,7 +62,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm2
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
@ -101,7 +101,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[2,3]
; SSE2-NEXT: addss %xmm3, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm3
; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE2-NEXT: addss %xmm3, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
@ -110,7 +110,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
@ -122,7 +122,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
; SSE41-NEXT: addss %xmm3, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm3
; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE41-NEXT: addss %xmm3, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
@ -130,7 +130,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: addss %xmm2, %xmm0
@ -187,7 +187,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[2,3]
; SSE2-NEXT: addss %xmm5, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm5
; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1]
; SSE2-NEXT: addss %xmm5, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
@ -196,7 +196,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
@ -205,7 +205,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE2-NEXT: addss %xmm3, %xmm0
@ -214,7 +214,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm4[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm4, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3]
; SSE2-NEXT: addss %xmm4, %xmm0
@ -226,7 +226,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm1[1,1,3,3]
; SSE41-NEXT: addss %xmm5, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm5
; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1]
; SSE41-NEXT: addss %xmm5, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
@ -234,7 +234,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: addss %xmm2, %xmm0
@ -242,7 +242,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm3, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE41-NEXT: addss %xmm3, %xmm0
@ -250,7 +250,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm4[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm4, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3]
; SSE41-NEXT: addss %xmm4, %xmm0
@ -379,7 +379,7 @@ define float @test_v4f32_zero(<4 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
; SSE2-NEXT: addss %xmm1, %xmm2
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addss %xmm2, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
@ -392,7 +392,7 @@ define float @test_v4f32_zero(<4 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm2
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: addss %xmm2, %xmm1
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
@ -434,7 +434,7 @@ define float @test_v8f32_zero(<8 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
; SSE2-NEXT: addss %xmm2, %xmm3
; SSE2-NEXT: movaps %xmm0, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE2-NEXT: addss %xmm3, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
@ -443,7 +443,7 @@ define float @test_v8f32_zero(<8 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
@ -456,7 +456,7 @@ define float @test_v8f32_zero(<8 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
; SSE41-NEXT: addss %xmm2, %xmm3
; SSE41-NEXT: movaps %xmm0, %xmm2
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE41-NEXT: addss %xmm3, %xmm2
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: addss %xmm2, %xmm0
@ -464,7 +464,7 @@ define float @test_v8f32_zero(<8 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm2
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
@ -524,7 +524,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm0[2,3]
; SSE2-NEXT: addss %xmm4, %xmm5
; SSE2-NEXT: movaps %xmm0, %xmm4
; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
; SSE2-NEXT: addss %xmm5, %xmm4
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: addss %xmm4, %xmm0
@ -533,7 +533,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3]
; SSE2-NEXT: addss %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm4
; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE2-NEXT: addss %xmm4, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
@ -542,7 +542,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
@ -551,7 +551,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE2-NEXT: addss %xmm3, %xmm0
@ -564,7 +564,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
; SSE41-NEXT: addss %xmm4, %xmm5
; SSE41-NEXT: movaps %xmm0, %xmm4
; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
; SSE41-NEXT: addss %xmm5, %xmm4
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: addss %xmm4, %xmm0
@ -572,7 +572,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
; SSE41-NEXT: addss %xmm4, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm4
; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE41-NEXT: addss %xmm4, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
@ -580,7 +580,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: addss %xmm2, %xmm0
@ -588,7 +588,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm3, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE41-NEXT: addss %xmm3, %xmm0
@ -709,7 +709,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE2-NEXT: addss {{.*}}(%rip), %xmm1
; SSE2-NEXT: movaps %xmm0, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE2-NEXT: addss %xmm1, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
@ -720,7 +720,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: addss {{.*}}(%rip), %xmm1
; SSE41-NEXT: movaps %xmm0, %xmm2
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE41-NEXT: addss %xmm1, %xmm2
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: addss %xmm2, %xmm0
@ -756,7 +756,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
; SSE2-NEXT: addss {{.*}}(%rip), %xmm2
; SSE2-NEXT: movaps %xmm0, %xmm3
; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE2-NEXT: addss %xmm2, %xmm3
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: addss %xmm3, %xmm0
@ -765,7 +765,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
@ -776,7 +776,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: addss {{.*}}(%rip), %xmm2
; SSE41-NEXT: movaps %xmm0, %xmm3
; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE41-NEXT: addss %xmm2, %xmm3
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: addss %xmm3, %xmm0
@ -784,7 +784,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm2
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
@ -838,7 +838,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3]
; SSE2-NEXT: addss {{.*}}(%rip), %xmm4
; SSE2-NEXT: movaps %xmm0, %xmm5
; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE2-NEXT: addss %xmm4, %xmm5
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: addss %xmm5, %xmm0
@ -847,7 +847,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3]
; SSE2-NEXT: addss %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm4
; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE2-NEXT: addss %xmm4, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
@ -856,7 +856,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
@ -865,7 +865,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE2-NEXT: addss %xmm3, %xmm0
@ -876,7 +876,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
; SSE41-NEXT: addss {{.*}}(%rip), %xmm4
; SSE41-NEXT: movaps %xmm0, %xmm5
; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE41-NEXT: addss %xmm4, %xmm5
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: addss %xmm5, %xmm0
@ -884,7 +884,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
; SSE41-NEXT: addss %xmm4, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm4
; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE41-NEXT: addss %xmm4, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
@ -892,7 +892,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: addss %xmm2, %xmm0
@ -900,7 +900,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm3, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE41-NEXT: addss %xmm3, %xmm0

View File

@ -43,7 +43,7 @@ define float @test_v4f32(<4 x float> %a0) {
; SSE2-LABEL: test_v4f32:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
@ -53,7 +53,7 @@ define float @test_v4f32(<4 x float> %a0) {
; SSE41-LABEL: test_v4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: maxps %xmm1, %xmm0
@ -83,7 +83,7 @@ define float @test_v8f32(<8 x float> %a0) {
; SSE2: # %bb.0:
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
@ -94,7 +94,7 @@ define float @test_v8f32(<8 x float> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: maxps %xmm1, %xmm0
@ -134,7 +134,7 @@ define float @test_v16f32(<16 x float> %a0) {
; SSE2-NEXT: maxps %xmm2, %xmm0
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
@ -147,7 +147,7 @@ define float @test_v16f32(<16 x float> %a0) {
; SSE41-NEXT: maxps %xmm2, %xmm0
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: maxps %xmm1, %xmm0
@ -190,8 +190,8 @@ define float @test_v16f32(<16 x float> %a0) {
define double @test_v2f64(<2 x double> %a0) {
; SSE-LABEL: test_v2f64:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -215,7 +215,7 @@ define double @test_v4f64(<4 x double> %a0) {
; SSE: # %bb.0:
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -249,7 +249,7 @@ define double @test_v8f64(<8 x double> %a0) {
; SSE-NEXT: maxpd %xmm2, %xmm0
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -290,7 +290,7 @@ define double @test_v16f64(<16 x double> %a0) {
; SSE-NEXT: maxpd %xmm3, %xmm1
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;

View File

@ -43,7 +43,7 @@ define float @test_v4f32(<4 x float> %a0) {
; SSE2-LABEL: test_v4f32:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
@ -53,7 +53,7 @@ define float @test_v4f32(<4 x float> %a0) {
; SSE41-LABEL: test_v4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: maxps %xmm1, %xmm0
@ -83,7 +83,7 @@ define float @test_v8f32(<8 x float> %a0) {
; SSE2: # %bb.0:
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
@ -94,7 +94,7 @@ define float @test_v8f32(<8 x float> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: maxps %xmm1, %xmm0
@ -134,7 +134,7 @@ define float @test_v16f32(<16 x float> %a0) {
; SSE2-NEXT: maxps %xmm2, %xmm0
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
@ -147,7 +147,7 @@ define float @test_v16f32(<16 x float> %a0) {
; SSE41-NEXT: maxps %xmm2, %xmm0
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: maxps %xmm1, %xmm0
@ -190,8 +190,8 @@ define float @test_v16f32(<16 x float> %a0) {
define double @test_v2f64(<2 x double> %a0) {
; SSE-LABEL: test_v2f64:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -215,7 +215,7 @@ define double @test_v4f64(<4 x double> %a0) {
; SSE: # %bb.0:
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -249,7 +249,7 @@ define double @test_v8f64(<8 x double> %a0) {
; SSE-NEXT: maxpd %xmm2, %xmm0
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -290,7 +290,7 @@ define double @test_v16f64(<16 x double> %a0) {
; SSE-NEXT: maxpd %xmm3, %xmm1
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;

View File

@ -43,7 +43,7 @@ define float @test_v4f32(<4 x float> %a0) {
; SSE2-LABEL: test_v4f32:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
@ -53,7 +53,7 @@ define float @test_v4f32(<4 x float> %a0) {
; SSE41-LABEL: test_v4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: minps %xmm1, %xmm0
@ -83,7 +83,7 @@ define float @test_v8f32(<8 x float> %a0) {
; SSE2: # %bb.0:
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
@ -94,7 +94,7 @@ define float @test_v8f32(<8 x float> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: minps %xmm1, %xmm0
@ -134,7 +134,7 @@ define float @test_v16f32(<16 x float> %a0) {
; SSE2-NEXT: minps %xmm2, %xmm0
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
@ -147,7 +147,7 @@ define float @test_v16f32(<16 x float> %a0) {
; SSE41-NEXT: minps %xmm2, %xmm0
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: minps %xmm1, %xmm0
@ -190,8 +190,8 @@ define float @test_v16f32(<16 x float> %a0) {
define double @test_v2f64(<2 x double> %a0) {
; SSE-LABEL: test_v2f64:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -215,7 +215,7 @@ define double @test_v4f64(<4 x double> %a0) {
; SSE: # %bb.0:
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -249,7 +249,7 @@ define double @test_v8f64(<8 x double> %a0) {
; SSE-NEXT: minpd %xmm2, %xmm0
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -290,7 +290,7 @@ define double @test_v16f64(<16 x double> %a0) {
; SSE-NEXT: minpd %xmm3, %xmm1
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;

View File

@ -43,7 +43,7 @@ define float @test_v4f32(<4 x float> %a0) {
; SSE2-LABEL: test_v4f32:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
@ -53,7 +53,7 @@ define float @test_v4f32(<4 x float> %a0) {
; SSE41-LABEL: test_v4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: minps %xmm1, %xmm0
@ -83,7 +83,7 @@ define float @test_v8f32(<8 x float> %a0) {
; SSE2: # %bb.0:
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
@ -94,7 +94,7 @@ define float @test_v8f32(<8 x float> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: minps %xmm1, %xmm0
@ -134,7 +134,7 @@ define float @test_v16f32(<16 x float> %a0) {
; SSE2-NEXT: minps %xmm2, %xmm0
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
@ -147,7 +147,7 @@ define float @test_v16f32(<16 x float> %a0) {
; SSE41-NEXT: minps %xmm2, %xmm0
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: minps %xmm1, %xmm0
@ -190,8 +190,8 @@ define float @test_v16f32(<16 x float> %a0) {
define double @test_v2f64(<2 x double> %a0) {
; SSE-LABEL: test_v2f64:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -215,7 +215,7 @@ define double @test_v4f64(<4 x double> %a0) {
; SSE: # %bb.0:
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -249,7 +249,7 @@ define double @test_v8f64(<8 x double> %a0) {
; SSE-NEXT: minpd %xmm2, %xmm0
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -290,7 +290,7 @@ define double @test_v16f64(<16 x double> %a0) {
; SSE-NEXT: minpd %xmm3, %xmm1
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;

View File

@ -43,7 +43,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
; SSE2-LABEL: test_v4f32:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm1, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: mulps %xmm1, %xmm2
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3]
@ -53,7 +53,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
; SSE41-LABEL: test_v4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm1, %xmm2
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: mulps %xmm1, %xmm2
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3]
; SSE41-NEXT: mulps %xmm2, %xmm0
@ -83,7 +83,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; SSE2: # %bb.0:
; SSE2-NEXT: mulps %xmm2, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: mulps %xmm1, %xmm2
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3]
@ -94,7 +94,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; SSE41: # %bb.0:
; SSE41-NEXT: mulps %xmm2, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm2
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: mulps %xmm1, %xmm2
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3]
; SSE41-NEXT: mulps %xmm2, %xmm0
@ -134,7 +134,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE2-NEXT: mulps %xmm3, %xmm1
; SSE2-NEXT: mulps %xmm2, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: mulps %xmm1, %xmm2
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3]
@ -147,7 +147,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE41-NEXT: mulps %xmm3, %xmm1
; SSE41-NEXT: mulps %xmm2, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm2
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: mulps %xmm1, %xmm2
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3]
; SSE41-NEXT: mulps %xmm2, %xmm0
@ -221,7 +221,7 @@ define float @test_v4f32_zero(<4 x float> %a0) {
; SSE2-LABEL: test_v4f32_zero:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: mulps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
@ -231,7 +231,7 @@ define float @test_v4f32_zero(<4 x float> %a0) {
; SSE41-LABEL: test_v4f32_zero:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: mulps %xmm0, %xmm1
@ -262,7 +262,7 @@ define float @test_v8f32_zero(<8 x float> %a0) {
; SSE2: # %bb.0:
; SSE2-NEXT: mulps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: mulps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
@ -273,7 +273,7 @@ define float @test_v8f32_zero(<8 x float> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: mulps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: mulps %xmm0, %xmm1
@ -314,7 +314,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
; SSE2-NEXT: mulps %xmm2, %xmm0
; SSE2-NEXT: mulps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: mulps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
@ -327,7 +327,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
; SSE41-NEXT: mulps %xmm2, %xmm0
; SSE41-NEXT: mulps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: mulps %xmm0, %xmm1
@ -402,7 +402,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
; SSE2-LABEL: test_v4f32_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: mulps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
@ -412,7 +412,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
; SSE41-LABEL: test_v4f32_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: mulps %xmm0, %xmm1
@ -443,7 +443,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; SSE2: # %bb.0:
; SSE2-NEXT: mulps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: mulps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
@ -454,7 +454,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; SSE41: # %bb.0:
; SSE41-NEXT: mulps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: mulps %xmm0, %xmm1
@ -495,7 +495,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE2-NEXT: mulps %xmm2, %xmm0
; SSE2-NEXT: mulps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: mulps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
@ -508,7 +508,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE41-NEXT: mulps %xmm2, %xmm0
; SSE41-NEXT: mulps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: mulps %xmm0, %xmm1
@ -552,8 +552,8 @@ define float @test_v16f32_undef(<16 x float> %a0) {
define double @test_v2f64(double %a0, <2 x double> %a1) {
; SSE-LABEL: test_v2f64:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -577,7 +577,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) {
; SSE: # %bb.0:
; SSE-NEXT: mulpd %xmm2, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -611,7 +611,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) {
; SSE-NEXT: mulpd %xmm3, %xmm1
; SSE-NEXT: mulpd %xmm2, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -652,7 +652,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) {
; SSE-NEXT: mulpd %xmm2, %xmm4
; SSE-NEXT: mulpd %xmm1, %xmm4
; SSE-NEXT: movapd %xmm4, %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm4[1],xmm0[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm4[1]
; SSE-NEXT: mulpd %xmm4, %xmm0
; SSE-NEXT: retq
;
@ -692,8 +692,8 @@ define double @test_v16f64(double %a0, <16 x double> %a1) {
define double @test_v2f64_zero(<2 x double> %a0) {
; SSE-LABEL: test_v2f64_zero:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
@ -718,7 +718,7 @@ define double @test_v4f64_zero(<4 x double> %a0) {
; SSE: # %bb.0:
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
@ -753,7 +753,7 @@ define double @test_v8f64_zero(<8 x double> %a0) {
; SSE-NEXT: mulpd %xmm2, %xmm0
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
@ -795,7 +795,7 @@ define double @test_v16f64_zero(<16 x double> %a0) {
; SSE-NEXT: mulpd %xmm3, %xmm1
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: retq
;
@ -835,8 +835,8 @@ define double @test_v16f64_zero(<16 x double> %a0) {
define double @test_v2f64_undef(<2 x double> %a0) {
; SSE-LABEL: test_v2f64_undef:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
@ -861,7 +861,7 @@ define double @test_v4f64_undef(<4 x double> %a0) {
; SSE: # %bb.0:
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
@ -896,7 +896,7 @@ define double @test_v8f64_undef(<8 x double> %a0) {
; SSE-NEXT: mulpd %xmm2, %xmm0
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
@ -938,7 +938,7 @@ define double @test_v16f64_undef(<16 x double> %a0) {
; SSE-NEXT: mulpd %xmm3, %xmm1
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: retq
;

View File

@ -50,7 +50,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
@ -62,7 +62,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm2
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
@ -101,7 +101,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm3
; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE2-NEXT: mulss %xmm3, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
@ -110,7 +110,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
@ -122,7 +122,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm3
; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE41-NEXT: mulss %xmm3, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
@ -130,7 +130,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
@ -187,7 +187,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm5, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm5
; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1]
; SSE2-NEXT: mulss %xmm5, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
@ -196,7 +196,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
@ -205,7 +205,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
@ -214,7 +214,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm4[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm4, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3]
; SSE2-NEXT: mulss %xmm4, %xmm0
@ -226,7 +226,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm5, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm5
; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1]
; SSE41-NEXT: mulss %xmm5, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
@ -234,7 +234,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
@ -242,7 +242,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm3, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
@ -250,7 +250,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm4[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm4, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3]
; SSE41-NEXT: mulss %xmm4, %xmm0
@ -371,7 +371,7 @@ define float @test_v4f32_one(<4 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE2-NEXT: mulss %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm0, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE2-NEXT: mulss %xmm1, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
@ -382,7 +382,7 @@ define float @test_v4f32_one(<4 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss %xmm0, %xmm1
; SSE41-NEXT: movaps %xmm0, %xmm2
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE41-NEXT: mulss %xmm1, %xmm2
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
@ -418,7 +418,7 @@ define float @test_v8f32_one(<8 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
; SSE2-NEXT: mulss %xmm0, %xmm2
; SSE2-NEXT: movaps %xmm0, %xmm3
; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE2-NEXT: mulss %xmm2, %xmm3
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
@ -427,7 +427,7 @@ define float @test_v8f32_one(<8 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
@ -438,7 +438,7 @@ define float @test_v8f32_one(<8 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss %xmm0, %xmm2
; SSE41-NEXT: movaps %xmm0, %xmm3
; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE41-NEXT: mulss %xmm2, %xmm3
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
@ -446,7 +446,7 @@ define float @test_v8f32_one(<8 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm2
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
@ -500,7 +500,7 @@ define float @test_v16f32_one(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3]
; SSE2-NEXT: mulss %xmm0, %xmm4
; SSE2-NEXT: movaps %xmm0, %xmm5
; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE2-NEXT: mulss %xmm4, %xmm5
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm5, %xmm0
@ -509,7 +509,7 @@ define float @test_v16f32_one(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm4
; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE2-NEXT: mulss %xmm4, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
@ -518,7 +518,7 @@ define float @test_v16f32_one(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
@ -527,7 +527,7 @@ define float @test_v16f32_one(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
@ -538,7 +538,7 @@ define float @test_v16f32_one(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss %xmm0, %xmm4
; SSE41-NEXT: movaps %xmm0, %xmm5
; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE41-NEXT: mulss %xmm4, %xmm5
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm5, %xmm0
@ -546,7 +546,7 @@ define float @test_v16f32_one(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm4, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm4
; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE41-NEXT: mulss %xmm4, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
@ -554,7 +554,7 @@ define float @test_v16f32_one(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
@ -562,7 +562,7 @@ define float @test_v16f32_one(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm3, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
@ -679,7 +679,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE2-NEXT: mulss {{.*}}(%rip), %xmm1
; SSE2-NEXT: movaps %xmm0, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE2-NEXT: mulss %xmm1, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
@ -690,7 +690,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss {{.*}}(%rip), %xmm1
; SSE41-NEXT: movaps %xmm0, %xmm2
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE41-NEXT: mulss %xmm1, %xmm2
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
@ -726,7 +726,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
; SSE2-NEXT: mulss {{.*}}(%rip), %xmm2
; SSE2-NEXT: movaps %xmm0, %xmm3
; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE2-NEXT: mulss %xmm2, %xmm3
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
@ -735,7 +735,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
@ -746,7 +746,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss {{.*}}(%rip), %xmm2
; SSE41-NEXT: movaps %xmm0, %xmm3
; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE41-NEXT: mulss %xmm2, %xmm3
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
@ -754,7 +754,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm2
; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
@ -808,7 +808,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3]
; SSE2-NEXT: mulss {{.*}}(%rip), %xmm4
; SSE2-NEXT: movaps %xmm0, %xmm5
; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE2-NEXT: mulss %xmm4, %xmm5
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm5, %xmm0
@ -817,7 +817,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm4
; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE2-NEXT: mulss %xmm4, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
@ -826,7 +826,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
@ -835,7 +835,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
@ -846,7 +846,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss {{.*}}(%rip), %xmm4
; SSE41-NEXT: movaps %xmm0, %xmm5
; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE41-NEXT: mulss %xmm4, %xmm5
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm5, %xmm0
@ -854,7 +854,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm4, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm4
; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE41-NEXT: mulss %xmm4, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
@ -862,7 +862,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
@ -870,7 +870,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm3, %xmm1
; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
@ -1182,8 +1182,8 @@ define double @test_v16f64(double %a0, <16 x double> %a1) {
define double @test_v2f64_one(<2 x double> %a0) {
; SSE-LABEL: test_v2f64_one:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: mulsd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
@ -1206,8 +1206,8 @@ define double @test_v2f64_one(<2 x double> %a0) {
define double @test_v4f64_one(<4 x double> %a0) {
; SSE-LABEL: test_v4f64_one:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
; SSE-NEXT: movapd %xmm0, %xmm2
; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE-NEXT: mulsd %xmm0, %xmm2
; SSE-NEXT: mulsd %xmm1, %xmm2
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
@ -1243,8 +1243,8 @@ define double @test_v4f64_one(<4 x double> %a0) {
define double @test_v8f64_one(<8 x double> %a0) {
; SSE-LABEL: test_v8f64_one:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm4
; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1]
; SSE-NEXT: movapd %xmm0, %xmm4
; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
; SSE-NEXT: mulsd %xmm0, %xmm4
; SSE-NEXT: mulsd %xmm1, %xmm4
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]

View File

@ -74,7 +74,7 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr)
; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
; X86-SSE2-NEXT: movss %xmm0, (%eax)
; X86-SSE2-NEXT: movaps %xmm0, %xmm1
; X86-SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; X86-SSE2-NEXT: movss %xmm1, 8(%eax)
; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X86-SSE2-NEXT: movss %xmm0, 4(%eax)

View File

@ -19,7 +19,7 @@ define void @convert_v7i16_v7f32(<7 x float>* %dst.addr, <7 x i16> %src) nounwin
; X86-SSE2-NEXT: movups %xmm0, (%eax)
; X86-SSE2-NEXT: movss %xmm2, 16(%eax)
; X86-SSE2-NEXT: movaps %xmm2, %xmm0
; X86-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm2[1],xmm0[1]
; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
; X86-SSE2-NEXT: movss %xmm0, 24(%eax)
; X86-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
; X86-SSE2-NEXT: movss %xmm2, 20(%eax)
@ -100,7 +100,7 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr)
; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
; X86-SSE2-NEXT: movss %xmm0, (%eax)
; X86-SSE2-NEXT: movaps %xmm0, %xmm1
; X86-SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; X86-SSE2-NEXT: movss %xmm1, 8(%eax)
; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X86-SSE2-NEXT: movss %xmm0, 4(%eax)