llvm-project/llvm/test/CodeGen/X86/vec_ss_load_fold.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=+sse,+sse2,+sse4.1 | FileCheck %s

target datalayout = "e-p:32:32"

define i16 @test1(float %f) nounwind {
; CHECK-LABEL: test1:
; CHECK:       ## BB#0:
; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT:    xorps %xmm1, %xmm1
; CHECK-NEXT:    subss LCPI0_0, %xmm0
; CHECK-NEXT:    mulss LCPI0_1, %xmm0
; CHECK-NEXT:    minss LCPI0_2, %xmm0
; CHECK-NEXT:    maxss %xmm1, %xmm0
; CHECK-NEXT:    cvttss2si %xmm0, %eax
; CHECK-NEXT:    retl
	%tmp = insertelement <4 x float> undef, float %f, i32 0		; <<4 x float>> [#uses=1]
	%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
	%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
	%tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
	%tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
	%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )		; <<4 x float>> [#uses=1]
	%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]
	%tmp69 = trunc i32 %tmp.upgrd.1 to i16		; <i16> [#uses=1]
	ret i16 %tmp69
}

define i16 @test2(float %f) nounwind {
; CHECK-LABEL: test2:
; CHECK:       ## BB#0:
; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT:    addss LCPI1_0, %xmm0
; CHECK-NEXT:    mulss LCPI1_1, %xmm0
; CHECK-NEXT:    minss LCPI1_2, %xmm0
; CHECK-NEXT:    xorps %xmm1, %xmm1
; CHECK-NEXT:    maxss %xmm1, %xmm0
; CHECK-NEXT:    cvttss2si %xmm0, %eax
; CHECK-NEXT:    retl
	%tmp28 = fsub float %f, 1.000000e+00		; <float> [#uses=1]
	%tmp37 = fmul float %tmp28, 5.000000e-01		; <float> [#uses=1]
	%tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0		; <<4 x float>> [#uses=1]
	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]
	%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]
	%tmp = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]
	%tmp69 = trunc i32 %tmp to i16		; <i16> [#uses=1]
	ret i16 %tmp69
}

declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)

declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)

declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)

declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)

declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)

declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32)

declare <4 x float> @f()

define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind {
; CHECK-LABEL: test3:
; CHECK:       ## BB#0:
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    roundss $4, (%eax), %xmm0
; CHECK-NEXT:    retl
  %a = load float , float *%b
  %B = insertelement <4 x float> undef, float %a, i32 0
  %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4)
  ret <4 x float> %X
}

define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
; CHECK-LABEL: test4:
; CHECK:       ## BB#0:
; CHECK-NEXT:    subl $28, %esp
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT:    movaps %xmm0, (%esp) ## 16-byte Spill
; CHECK-NEXT:    calll _f
; CHECK-NEXT:    movaps (%esp), %xmm1 ## 16-byte Reload
; CHECK-NEXT:    roundss $4, %xmm1, %xmm0
; CHECK-NEXT:    addl $28, %esp
; CHECK-NEXT:    retl
  %a = load float , float *%b
  %B = insertelement <4 x float> undef, float %a, i32 0
  %q = call <4 x float> @f()
  %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %q, <4 x float> %B, i32 4)
  ret <4 x float> %X
}

; PR13576
define  <2 x double> @test5() nounwind uwtable readnone noinline {
; CHECK-LABEL: test5:
; CHECK:       ## BB#0: ## %entry
; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [4.569870e+02,1.233210e+02]
; CHECK-NEXT:    movl $128, %eax
; CHECK-NEXT:    cvtsi2sdl %eax, %xmm0
; CHECK-NEXT:    retl
entry:
  %0 = tail call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double
4.569870e+02, double 1.233210e+02>, i32 128) nounwind readnone
  ret <2 x double> %0
}

declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
[X86][SSE] Regenerated scalar load folding tests llvm-svn: 270431 2016-05-23 20:53:09 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_test_checks.py`
Update tests to use at least darwin9. llvm-svn: 274129 2016-06-29 22:51:10 +08:00			`; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=+sse,+sse2,+sse4.1 \| FileCheck %s`
new testcase llvm-svn: 30893 2006-10-12 06:07:38 +08:00
Remove llvm-upgrade and update tests. llvm-svn: 47432 2008-02-21 15:42:26 +08:00			`target datalayout = "e-p:32:32"`
new testcase llvm-svn: 30893 2006-10-12 06:07:38 +08:00
Add nounwind. llvm-svn: 50837 2008-05-08 06:59:08 +08:00			`define i16 @test1(float %f) nounwind {`
[X86][SSE] Regenerated scalar load folding tests llvm-svn: 270431 2016-05-23 20:53:09 +08:00			`; CHECK-LABEL: test1:`
			`; CHECK: ## BB#0:`
			`; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero`
			`; CHECK-NEXT: xorps %xmm1, %xmm1`
			`; CHECK-NEXT: subss LCPI0_0, %xmm0`
			`; CHECK-NEXT: mulss LCPI0_1, %xmm0`
			`; CHECK-NEXT: minss LCPI0_2, %xmm0`
			`; CHECK-NEXT: maxss %xmm1, %xmm0`
			`; CHECK-NEXT: cvttss2si %xmm0, %eax`
			`; CHECK-NEXT: retl`
Remove llvm-upgrade and update tests. llvm-svn: 47432 2008-02-21 15:42:26 +08:00			`%tmp = insertelement <4 x float> undef, float %f, i32 0 ; <<4 x float>> [#uses=1]`
			`%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1]`
			`%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]`
			`%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1]`
			`%tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]`
			`%tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]`
			`%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]`
			`%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer ) ; <<4 x float>> [#uses=1]`
			`%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1]`
			`%tmp69 = trunc i32 %tmp.upgrd.1 to i16 ; <i16> [#uses=1]`
			`ret i16 %tmp69`
new testcase llvm-svn: 30893 2006-10-12 06:07:38 +08:00			`}`

Add nounwind. llvm-svn: 50837 2008-05-08 06:59:08 +08:00			`define i16 @test2(float %f) nounwind {`
[X86][SSE] Regenerated scalar load folding tests llvm-svn: 270431 2016-05-23 20:53:09 +08:00			`; CHECK-LABEL: test2:`
			`; CHECK: ## BB#0:`
			`; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero`
			`; CHECK-NEXT: addss LCPI1_0, %xmm0`
			`; CHECK-NEXT: mulss LCPI1_1, %xmm0`
			`; CHECK-NEXT: minss LCPI1_2, %xmm0`
			`; CHECK-NEXT: xorps %xmm1, %xmm1`
			`; CHECK-NEXT: maxss %xmm1, %xmm0`
			`; CHECK-NEXT: cvttss2si %xmm0, %eax`
			`; CHECK-NEXT: retl`
Split the Add, Sub, and Mul instruction opcodes into separate integer and floating-point opcodes, introducing FAdd, FSub, and FMul. For now, the AsmParser, BitcodeReader, and IRBuilder all preserve backwards compatability, and the Core LLVM APIs preserve backwards compatibility for IR producers. Most front-ends won't need to change immediately. This implements the first step of the plan outlined here: http://nondot.org/sabre/LLVMNotes/IntegerOverflow.txt llvm-svn: 72897 2009-06-05 06:49:04 +08:00			`%tmp28 = fsub float %f, 1.000000e+00 ; <float> [#uses=1]`
			`%tmp37 = fmul float %tmp28, 5.000000e-01 ; <float> [#uses=1]`
Remove llvm-upgrade and update tests. llvm-svn: 47432 2008-02-21 15:42:26 +08:00			`%tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0 ; <<4 x float>> [#uses=1]`
			`%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1]`
			`%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1]`
			`%tmp = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1]`
			`%tmp69 = trunc i32 %tmp to i16 ; <i16> [#uses=1]`
			`ret i16 %tmp69`
new testcase llvm-svn: 30893 2006-10-12 06:07:38 +08:00			`}`

Remove llvm-upgrade and update tests. llvm-svn: 47432 2008-02-21 15:42:26 +08:00			`declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)`
new testcase llvm-svn: 30893 2006-10-12 06:07:38 +08:00
Remove llvm-upgrade and update tests. llvm-svn: 47432 2008-02-21 15:42:26 +08:00			`declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)`
new testcase llvm-svn: 30893 2006-10-12 06:07:38 +08:00
Remove llvm-upgrade and update tests. llvm-svn: 47432 2008-02-21 15:42:26 +08:00			`declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)`
new testcase llvm-svn: 30893 2006-10-12 06:07:38 +08:00
Remove llvm-upgrade and update tests. llvm-svn: 47432 2008-02-21 15:42:26 +08:00			`declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)`
new testcase llvm-svn: 30893 2006-10-12 06:07:38 +08:00
Remove llvm-upgrade and update tests. llvm-svn: 47432 2008-02-21 15:42:26 +08:00			`declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)`
fix rdar://7653908, a crash on a case where we would fold a load into a roundss intrinsic, producing a cyclic dag. The root cause of this is badness handling ComplexPattern nodes in the old dagisel that I noticed through inspection. Eliminate a copy of the of the code that handled ComplexPatterns by making EmitChildMatchCode call into EmitMatchCode. llvm-svn: 96408 2010-02-17 06:35:06 +08:00
			`declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32)`
[X86][SSE] Regenerated scalar load folding tests llvm-svn: 270431 2016-05-23 20:53:09 +08:00
fix rdar://7653908, a crash on a case where we would fold a load into a roundss intrinsic, producing a cyclic dag. The root cause of this is badness handling ComplexPattern nodes in the old dagisel that I noticed through inspection. Eliminate a copy of the of the code that handled ComplexPatterns by making EmitChildMatchCode call into EmitMatchCode. llvm-svn: 96408 2010-02-17 06:35:06 +08:00			`declare <4 x float> @f()`

			`define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind {`
[X86][SSE] Regenerated scalar load folding tests llvm-svn: 270431 2016-05-23 20:53:09 +08:00			`; CHECK-LABEL: test3:`
			`; CHECK: ## BB#0:`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; CHECK-NEXT: roundss $4, (%eax), %xmm0`
			`; CHECK-NEXT: retl`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%a = load float , float *%b`
fix rdar://7653908, a crash on a case where we would fold a load into a roundss intrinsic, producing a cyclic dag. The root cause of this is badness handling ComplexPattern nodes in the old dagisel that I noticed through inspection. Eliminate a copy of the of the code that handled ComplexPatterns by making EmitChildMatchCode call into EmitMatchCode. llvm-svn: 96408 2010-02-17 06:35:06 +08:00			`%B = insertelement <4 x float> undef, float %a, i32 0`
			`%X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4)`
			`ret <4 x float> %X`
			`}`

			`define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {`
[X86][SSE] Regenerated scalar load folding tests llvm-svn: 270431 2016-05-23 20:53:09 +08:00			`; CHECK-LABEL: test4:`
			`; CHECK: ## BB#0:`
			`; CHECK-NEXT: subl $28, %esp`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero`
			`; CHECK-NEXT: movaps %xmm0, (%esp) ## 16-byte Spill`
Update tests to use at least darwin9. llvm-svn: 274129 2016-06-29 22:51:10 +08:00			`; CHECK-NEXT: calll _f`
[X86][SSE] Regenerated scalar load folding tests llvm-svn: 270431 2016-05-23 20:53:09 +08:00			`; CHECK-NEXT: movaps (%esp), %xmm1 ## 16-byte Reload`
			`; CHECK-NEXT: roundss $4, %xmm1, %xmm0`
			`; CHECK-NEXT: addl $28, %esp`
			`; CHECK-NEXT: retl`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%a = load float , float *%b`
fix rdar://7653908, a crash on a case where we would fold a load into a roundss intrinsic, producing a cyclic dag. The root cause of this is badness handling ComplexPattern nodes in the old dagisel that I noticed through inspection. Eliminate a copy of the of the code that handled ComplexPatterns by making EmitChildMatchCode call into EmitMatchCode. llvm-svn: 96408 2010-02-17 06:35:06 +08:00			`%B = insertelement <4 x float> undef, float %a, i32 0`
			`%q = call <4 x float> @f()`
			`%X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %q, <4 x float> %B, i32 4)`
			`ret <4 x float> %X`
temporarily disable this. llvm-svn: 96717 2010-02-21 11:24:41 +08:00			`}`
X86: move Int_CVTSD2SSrr, Int_CVTSI2SSrr, Int_CVTSI2SDrr, Int_CVTSS2SDrr from OpTbl1 to OpTbl2 since they have 3 operands and the last operand can be changed to a memory operand. PR13576 llvm-svn: 161769 2012-08-14 02:29:41 +08:00
[X86][SSE] Regenerated scalar load folding tests llvm-svn: 270431 2016-05-23 20:53:09 +08:00			`; PR13576`
X86: move Int_CVTSD2SSrr, Int_CVTSI2SSrr, Int_CVTSI2SDrr, Int_CVTSS2SDrr from OpTbl1 to OpTbl2 since they have 3 operands and the last operand can be changed to a memory operand. PR13576 llvm-svn: 161769 2012-08-14 02:29:41 +08:00			`define <2 x double> @test5() nounwind uwtable readnone noinline {`
[X86][SSE] Regenerated scalar load folding tests llvm-svn: 270431 2016-05-23 20:53:09 +08:00			`; CHECK-LABEL: test5:`
			`; CHECK: ## BB#0: ## %entry`
			`; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4.569870e+02,1.233210e+02]`
			`; CHECK-NEXT: movl $128, %eax`
			`; CHECK-NEXT: cvtsi2sdl %eax, %xmm0`
			`; CHECK-NEXT: retl`
X86: move Int_CVTSD2SSrr, Int_CVTSI2SSrr, Int_CVTSI2SDrr, Int_CVTSS2SDrr from OpTbl1 to OpTbl2 since they have 3 operands and the last operand can be changed to a memory operand. PR13576 llvm-svn: 161769 2012-08-14 02:29:41 +08:00			`entry:`
			`%0 = tail call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double`
			`4.569870e+02, double 1.233210e+02>, i32 128) nounwind readnone`
			`ret <2 x double> %0`
			`}`

			`declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone`