llvm-project/llvm/test/CodeGen/X86/v2f32.ll

; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -o - | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mcpu=yonah -march=x86 -mtriple=i386-linux-gnu -o - | FileCheck %s --check-prefix=X32

; PR7518
define void @test1(<2 x float> %Q, float *%P2) nounwind {
; X64-LABEL: test1:
; X64:       # BB#0:
; X64-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; X64-NEXT:    addss %xmm0, %xmm1
; X64-NEXT:    movss %xmm1, (%rdi)
; X64-NEXT:    retq
;
; X32-LABEL: test1:
; X32:       # BB#0:
; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X32-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; X32-NEXT:    addss %xmm0, %xmm1
; X32-NEXT:    movss %xmm1, (%eax)
; X32-NEXT:    retl
  %a = extractelement <2 x float> %Q, i32 0
  %b = extractelement <2 x float> %Q, i32 1
  %c = fadd float %a, %b
  store float %c, float* %P2
  ret void
}

define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounwind {
; X64-LABEL: test2:
; X64:       # BB#0:
; X64-NEXT:    addps %xmm1, %xmm0
; X64-NEXT:    retq
;
; X32-LABEL: test2:
; X32:       # BB#0:
; X32-NEXT:    addps %xmm1, %xmm0
; X32-NEXT:    retl
  %Z = fadd <2 x float> %Q, %R
  ret <2 x float> %Z
}

define <2 x float> @test3(<4 x float> %A) nounwind {
; X64-LABEL: test3:
; X64:       # BB#0:
; X64-NEXT:    addps %xmm0, %xmm0
; X64-NEXT:    retq
;
; X32-LABEL: test3:
; X32:       # BB#0:
; X32-NEXT:    addps %xmm0, %xmm0
; X32-NEXT:    retl
	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
	%C = fadd <2 x float> %B, %B
	ret <2 x float> %C
}

define <2 x float> @test4(<2 x float> %A) nounwind {
; X64-LABEL: test4:
; X64:       # BB#0:
; X64-NEXT:    addps %xmm0, %xmm0
; X64-NEXT:    retq
;
; X32-LABEL: test4:
; X32:       # BB#0:
; X32-NEXT:    addps %xmm0, %xmm0
; X32-NEXT:    retl
	%C = fadd <2 x float> %A, %A
	ret <2 x float> %C
}

define <4 x float> @test5(<4 x float> %A) nounwind {
; X64-LABEL: test5:
; X64:       # BB#0:
; X64-NEXT:    addps %xmm0, %xmm0
; X64-NEXT:    addps %xmm0, %xmm0
; X64-NEXT:    retq
;
; X32-LABEL: test5:
; X32:       # BB#0:
; X32-NEXT:    addps %xmm0, %xmm0
; X32-NEXT:    addps %xmm0, %xmm0
; X32-NEXT:    retl
	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
	%C = fadd <2 x float> %B, %B
  br label %BB

BB:
  %D = fadd <2 x float> %C, %C
	%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
	ret <4 x float> %E
}
[x86] Regenerate and clean up more tests is preparation for vector shufle switch. I nuked a win64 config from one test as it doesn't really make sense to cover that ABI specially for generic v2f32 tests... llvm-svn: 218948 2014-10-03 09:44:04 +08:00			`; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -o - \| FileCheck %s --check-prefix=X64`
			`; RUN: llc < %s -mcpu=yonah -march=x86 -mtriple=i386-linux-gnu -o - \| FileCheck %s --check-prefix=X32`
fix PR7518 - terrible codegen of <2 x float>, by only marking v2f32 as legal in 32-bit mode. It is just as terrible there, but I just care about x86-64 and noone claims it is valuable in 64-bit mode. llvm-svn: 107600 2010-07-05 06:57:10 +08:00
			`; PR7518`
Line endings fix. NFC. llvm-svn: 227138 2015-01-27 05:28:32 +08:00			`define void @test1(<2 x float> %Q, float *%P2) nounwind {`
			`; X64-LABEL: test1:`
			`; X64: # BB#0:`
			`; X64-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]`
			`; X64-NEXT: addss %xmm0, %xmm1`
			`; X64-NEXT: movss %xmm1, (%rdi)`
			`; X64-NEXT: retq`
			`;`
			`; X32-LABEL: test1:`
			`; X32: # BB#0:`
			`; X32-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; X32-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]`
			`; X32-NEXT: addss %xmm0, %xmm1`
			`; X32-NEXT: movss %xmm1, (%eax)`
			`; X32-NEXT: retl`
fix PR7518 - terrible codegen of <2 x float>, by only marking v2f32 as legal in 32-bit mode. It is just as terrible there, but I just care about x86-64 and noone claims it is valuable in 64-bit mode. llvm-svn: 107600 2010-07-05 06:57:10 +08:00			`%a = extractelement <2 x float> %Q, i32 0`
			`%b = extractelement <2 x float> %Q, i32 1`
			`%c = fadd float %a, %b`
			`store float %c, float* %P2`
			`ret void`
			`}`

another v2f32 case, in this case showing poor codegen. llvm-svn: 107614 2010-07-05 13:52:56 +08:00			`define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounwind {`
Convert CodeGen//.ll tests to use the new CHECK-LABEL for easier debugging. No functionality change and all tests pass after conversion. This was done with the following sed invocation to catch label lines demarking function boundaries: sed -i '' "s/^;\( \)\([A-Z0-9_]\):\( \)test\([A-Za-z0-9_-]\):\( \)$/;\1\2-LABEL:\3test\4:\5/g" test/CodeGen//*.ll which was written conservatively to avoid false positives rather than false negatives. I scanned through all the changes and everything looks correct. llvm-svn: 186258 2013-07-14 04:38:47 +08:00			`; X64-LABEL: test2:`
[x86] Regenerate and clean up more tests is preparation for vector shufle switch. I nuked a win64 config from one test as it doesn't really make sense to cover that ABI specially for generic v2f32 tests... llvm-svn: 218948 2014-10-03 09:44:04 +08:00			`; X64: # BB#0:`
			`; X64-NEXT: addps %xmm1, %xmm0`
			`; X64-NEXT: retq`
			`;`
Convert CodeGen//.ll tests to use the new CHECK-LABEL for easier debugging. No functionality change and all tests pass after conversion. This was done with the following sed invocation to catch label lines demarking function boundaries: sed -i '' "s/^;\( \)\([A-Z0-9_]\):\( \)test\([A-Za-z0-9_-]\):\( \)$/;\1\2-LABEL:\3test\4:\5/g" test/CodeGen//*.ll which was written conservatively to avoid false positives rather than false negatives. I scanned through all the changes and everything looks correct. llvm-svn: 186258 2013-07-14 04:38:47 +08:00			`; X32-LABEL: test2:`
[x86] Regenerate and clean up more tests is preparation for vector shufle switch. I nuked a win64 config from one test as it doesn't really make sense to cover that ABI specially for generic v2f32 tests... llvm-svn: 218948 2014-10-03 09:44:04 +08:00			`; X32: # BB#0:`
			`; X32-NEXT: addps %xmm1, %xmm0`
			`; X32-NEXT: retl`
			`%Z = fadd <2 x float> %Q, %R`
			`ret <2 x float> %Z`
another v2f32 case, in this case showing poor codegen. llvm-svn: 107614 2010-07-05 13:52:56 +08:00			`}`
Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 llvm-svn: 112101 2010-08-26 06:49:25 +08:00
			`define <2 x float> @test3(<4 x float> %A) nounwind {`
[x86] Regenerate and clean up more tests is preparation for vector shufle switch. I nuked a win64 config from one test as it doesn't really make sense to cover that ABI specially for generic v2f32 tests... llvm-svn: 218948 2014-10-03 09:44:04 +08:00			`; X64-LABEL: test3:`
			`; X64: # BB#0:`
			`; X64-NEXT: addps %xmm0, %xmm0`
			`; X64-NEXT: retq`
			`;`
			`; X32-LABEL: test3:`
			`; X32: # BB#0:`
			`; X32-NEXT: addps %xmm0, %xmm0`
			`; X32-NEXT: retl`
Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 llvm-svn: 112101 2010-08-26 06:49:25 +08:00			`%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>`
			`%C = fadd <2 x float> %B, %B`
			`ret <2 x float> %C`
			`}`

			`define <2 x float> @test4(<2 x float> %A) nounwind {`
Convert CodeGen//.ll tests to use the new CHECK-LABEL for easier debugging. No functionality change and all tests pass after conversion. This was done with the following sed invocation to catch label lines demarking function boundaries: sed -i '' "s/^;\( \)\([A-Z0-9_]\):\( \)test\([A-Za-z0-9_-]\):\( \)$/;\1\2-LABEL:\3test\4:\5/g" test/CodeGen//*.ll which was written conservatively to avoid false positives rather than false negatives. I scanned through all the changes and everything looks correct. llvm-svn: 186258 2013-07-14 04:38:47 +08:00			`; X64-LABEL: test4:`
[x86] Regenerate and clean up more tests is preparation for vector shufle switch. I nuked a win64 config from one test as it doesn't really make sense to cover that ABI specially for generic v2f32 tests... llvm-svn: 218948 2014-10-03 09:44:04 +08:00			`; X64: # BB#0:`
			`; X64-NEXT: addps %xmm0, %xmm0`
			`; X64-NEXT: retq`
			`;`
Convert CodeGen//.ll tests to use the new CHECK-LABEL for easier debugging. No functionality change and all tests pass after conversion. This was done with the following sed invocation to catch label lines demarking function boundaries: sed -i '' "s/^;\( \)\([A-Z0-9_]\):\( \)test\([A-Za-z0-9_-]\):\( \)$/;\1\2-LABEL:\3test\4:\5/g" test/CodeGen//*.ll which was written conservatively to avoid false positives rather than false negatives. I scanned through all the changes and everything looks correct. llvm-svn: 186258 2013-07-14 04:38:47 +08:00			`; X32-LABEL: test4:`
[x86] Regenerate and clean up more tests is preparation for vector shufle switch. I nuked a win64 config from one test as it doesn't really make sense to cover that ABI specially for generic v2f32 tests... llvm-svn: 218948 2014-10-03 09:44:04 +08:00			`; X32: # BB#0:`
			`; X32-NEXT: addps %xmm0, %xmm0`
			`; X32-NEXT: retl`
			`%C = fadd <2 x float> %A, %A`
			`ret <2 x float> %C`
Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 llvm-svn: 112101 2010-08-26 06:49:25 +08:00			`}`

			`define <4 x float> @test5(<4 x float> %A) nounwind {`
[x86] Regenerate and clean up more tests is preparation for vector shufle switch. I nuked a win64 config from one test as it doesn't really make sense to cover that ABI specially for generic v2f32 tests... llvm-svn: 218948 2014-10-03 09:44:04 +08:00			`; X64-LABEL: test5:`
			`; X64: # BB#0:`
			`; X64-NEXT: addps %xmm0, %xmm0`
			`; X64-NEXT: addps %xmm0, %xmm0`
			`; X64-NEXT: retq`
			`;`
			`; X32-LABEL: test5:`
			`; X32: # BB#0:`
			`; X32-NEXT: addps %xmm0, %xmm0`
			`; X32-NEXT: addps %xmm0, %xmm0`
			`; X32-NEXT: retl`
Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 llvm-svn: 112101 2010-08-26 06:49:25 +08:00			`%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>`
			`%C = fadd <2 x float> %B, %B`
[x86] Regenerate and clean up more tests is preparation for vector shufle switch. I nuked a win64 config from one test as it doesn't really make sense to cover that ABI specially for generic v2f32 tests... llvm-svn: 218948 2014-10-03 09:44:04 +08:00			`br label %BB`

Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 llvm-svn: 112101 2010-08-26 06:49:25 +08:00			`BB:`
[x86] Regenerate and clean up more tests is preparation for vector shufle switch. I nuked a win64 config from one test as it doesn't really make sense to cover that ABI specially for generic v2f32 tests... llvm-svn: 218948 2014-10-03 09:44:04 +08:00			`%D = fadd <2 x float> %C, %C`
Change handling of illegal vector types to widen when possible instead of expanding: e.g. <2 x float> -> <4 x float> instead of -> 2 floats. This affects two places in the code: handling cross block values and handling function return and arguments. Since vectors are already widened by legalizetypes, this gives us much better code and unblocks x86-64 abi and SPU abi work. For example, this (which is a silly example of a cross-block value): define <4 x float> @test2(<4 x float> %A) nounwind { %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1> %C = fadd <2 x float> %B, %B br label %BB BB: %D = fadd <2 x float> %C, %C %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x float> %E } Now compiles into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 addps %xmm0, %xmm0 ret previously it compiled into: _test2: ## @test2 ## BB#0: addps %xmm0, %xmm0 pshufd $1, %xmm0, %xmm1 ## kill: XMM0<def> XMM0<kill> XMM0<def> insertps $0, %xmm0, %xmm0 insertps $16, %xmm1, %xmm0 addps %xmm0, %xmm0 ret This implements rdar://8230384 llvm-svn: 112101 2010-08-26 06:49:25 +08:00			`%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>`
			`ret <4 x float> %E`
			`}`