we compile multiply-by-constant into horrible code. Doesn't sse4 have some

instruction for doing this? llvm-svn: 51473
2008-05-23 04:29:53 +00:00 · 2008-05-23 04:29:53 +00:00 · 3546c2b4e4
parent 452fb32927
commit 3546c2b4e4
1 changed files with 38 additions and 0 deletions
--- a/llvm/lib/Target/X86/README-SSE.txt
+++ b/llvm/lib/Target/X86/README-SSE.txt
@ -807,3 +807,41 @@ foo1:

 //===---------------------------------------------------------------------===//

+We compile vector multiply-by-constant into poor code:
+
+define <4 x i32> @f(<4 x i32> %i) nounwind  {
+	%A = mul <4 x i32> %i, < i32 10, i32 10, i32 10, i32 10 >
+	ret <4 x i32> %A
+}
+
+Compiles into:
+
+LCPI1_0:					##  <4 x i32>
+	.long	10
+	.long	10
+	.long	10
+	.long	10
+	.text
+	.align	4,0x90
+	.globl	_f
+_f:
+	pshufd	$3, %xmm0, %xmm1
+	movd	%xmm1, %eax
+	imull	LCPI1_0+12, %eax
+	movd	%eax, %xmm1
+	pshufd	$1, %xmm0, %xmm2
+	movd	%xmm2, %eax
+	imull	LCPI1_0+4, %eax
+	movd	%eax, %xmm2
+	punpckldq	%xmm1, %xmm2
+	movd	%xmm0, %eax
+	imull	LCPI1_0, %eax
+	movd	%eax, %xmm1
+	movhlps	%xmm0, %xmm0
+	movd	%xmm0, %eax
+	imull	LCPI1_0+8, %eax
+	movd	%eax, %xmm0
+	punpckldq	%xmm0, %xmm1
+	movaps	%xmm1, %xmm0
+	punpckldq	%xmm2, %xmm0
+	ret