Using target specific nodes for shuffle nodes makes the mask
check more strict, breaking some cases not checked in the
testsuite, but also exposes some foldings not done before,
as this example:
movaps (%rdi), %xmm0
movaps (%rax), %xmm1
movaps %xmm0, %xmm2
movss %xmm1, %xmm2
shufps $36, %xmm2, %xmm0
now is generated as:
movaps (%rdi), %xmm0
movaps %xmm0, %xmm1
movlps (%rax), %xmm1
shufps $36, %xmm1, %xmm0
llvm-svn: 112753
2010-09-02 06:33:20 +08:00
|
|
|
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
2010-09-10 02:48:34 +08:00
|
|
|
; RUN: llc -O0 < %s -march=x86 -mcpu=core2 | FileCheck %s --check-prefix=CHECK_O0
|
Using target specific nodes for shuffle nodes makes the mask
check more strict, breaking some cases not checked in the
testsuite, but also exposes some foldings not done before,
as this example:
movaps (%rdi), %xmm0
movaps (%rax), %xmm1
movaps %xmm0, %xmm2
movss %xmm1, %xmm2
shufps $36, %xmm2, %xmm0
now is generated as:
movaps (%rdi), %xmm0
movaps %xmm0, %xmm1
movlps (%rax), %xmm1
shufps $36, %xmm1, %xmm0
llvm-svn: 112753
2010-09-02 06:33:20 +08:00
|
|
|
|
|
|
|
define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
; CHECK: movaps (%rdi), %xmm0
|
|
|
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
|
|
|
; CHECK-NEXT: movlps (%rax), %xmm1
|
|
|
|
; CHECK-NEXT: shufps $36, %xmm1, %xmm0
|
|
|
|
%0 = load <4 x i32>* undef, align 16
|
|
|
|
%1 = load <4 x i32>* %a0, align 16
|
|
|
|
%2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
|
2010-09-10 02:48:34 +08:00
|
|
|
define void @t01(double* %a0) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
; CHECK_O0: movsd (%eax), %xmm0
|
|
|
|
; CHECK_O0: unpcklpd %xmm0, %xmm0
|
|
|
|
%tmp93 = load double* %a0, align 8
|
|
|
|
%vecinit94 = insertelement <2 x double> undef, double %tmp93, i32 1
|
|
|
|
store <2 x double> %vecinit94, <2 x double>* undef
|
|
|
|
ret void
|
|
|
|
}
|