llvm-project/llvm/test/CodeGen/X86/widen_cast-1.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=i686-unknown-unknown -mcpu=generic -mattr=+sse4.2 < %s | FileCheck %s
; RUN: llc -mtriple=i686-unknown-unknown -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s

; Scheduler causes produce a different instruction order

; bitcast a v4i16 to v2i32

define void @convert(<2 x i32>* %dst, <4 x i16>* %src) nounwind {
; CHECK-LABEL: convert:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    pushl %eax
; CHECK-NEXT:    movl $0, (%esp)
; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
; CHECK-NEXT:    cmpl $3, (%esp)
; CHECK-NEXT:    jg .LBB0_3
; CHECK-NEXT:    .p2align 4, 0x90
; CHECK-NEXT:  .LBB0_2: # %forbody
; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    movl (%esp), %eax
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT:    psubw %xmm0, %xmm1
; CHECK-NEXT:    movq %xmm1, (%ecx,%eax,8)
; CHECK-NEXT:    incl (%esp)
; CHECK-NEXT:    cmpl $3, (%esp)
; CHECK-NEXT:    jle .LBB0_2
; CHECK-NEXT:  .LBB0_3: # %afterfor
; CHECK-NEXT:    popl %eax
; CHECK-NEXT:    retl
;
; ATOM-LABEL: convert:
; ATOM:       # %bb.0: # %entry
; ATOM-NEXT:    pushl %eax
; ATOM-NEXT:    pcmpeqd %xmm0, %xmm0
; ATOM-NEXT:    movl $0, (%esp)
; ATOM-NEXT:    cmpl $3, (%esp)
; ATOM-NEXT:    jg .LBB0_3
; ATOM-NEXT:    .p2align 4, 0x90
; ATOM-NEXT:  .LBB0_2: # %forbody
; ATOM-NEXT:    # =>This Inner Loop Header: Depth=1
; ATOM-NEXT:    movl (%esp), %eax
; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; ATOM-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
; ATOM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; ATOM-NEXT:    psubw %xmm0, %xmm1
; ATOM-NEXT:    movq %xmm1, (%ecx,%eax,8)
; ATOM-NEXT:    incl (%esp)
; ATOM-NEXT:    cmpl $3, (%esp)
; ATOM-NEXT:    jle .LBB0_2
; ATOM-NEXT:  .LBB0_3: # %afterfor
; ATOM-NEXT:    popl %eax
; ATOM-NEXT:    retl
entry:
	%dst.addr = alloca <2 x i32>*
	%src.addr = alloca <4 x i16>*
	%i = alloca i32, align 4
	store <2 x i32>* %dst, <2 x i32>** %dst.addr
	store <4 x i16>* %src, <4 x i16>** %src.addr
	store i32 0, i32* %i
	br label %forcond

forcond:
	%tmp = load i32, i32* %i
	%cmp = icmp slt i32 %tmp, 4
	br i1 %cmp, label %forbody, label %afterfor

forbody:
	%tmp1 = load i32, i32* %i
	%tmp2 = load <2 x i32>*, <2 x i32>** %dst.addr
	%arrayidx = getelementptr <2 x i32>, <2 x i32>* %tmp2, i32 %tmp1
	%tmp3 = load i32, i32* %i
	%tmp4 = load <4 x i16>*, <4 x i16>** %src.addr
	%arrayidx5 = getelementptr <4 x i16>, <4 x i16>* %tmp4, i32 %tmp3
	%tmp6 = load <4 x i16>, <4 x i16>* %arrayidx5
	%add = add <4 x i16> %tmp6, < i16 1, i16 1, i16 1, i16 1 >
	%conv = bitcast <4 x i16> %add to <2 x i32>
	store <2 x i32> %conv, <2 x i32>* %arrayidx
	br label %forinc

forinc:
	%tmp7 = load i32, i32* %i
	%inc = add i32 %tmp7, 1
	store i32 %inc, i32* %i
	br label %forcond

afterfor:
	ret void
}
[x86] specify triples and auto-generate complete checks; NFC llvm-svn: 305655 2017-06-19 05:42:19 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
			`; RUN: llc -mtriple=i686-unknown-unknown -mcpu=generic -mattr=+sse4.2 < %s \| FileCheck %s`
			`; RUN: llc -mtriple=i686-unknown-unknown -mcpu=atom < %s \| FileCheck -check-prefix=ATOM %s`
Added some basic test cases for r61209 llvm-svn: 61210 2008-12-19 04:05:58 +08:00
This patch fixes 8 out of 20 unexpected failures in "make check" when run on an Intel Atom processor. The failures have arisen due to changes elsewhere in the trunk over the past 8 weeks or so. These failures were not detected by the Atom buildbot because the CPU on the Atom buildbot was not being detected as an Atom CPU. The fix for this problem is in Host.cpp and X86Subtarget.cpp, but shall remain commented out until the current set of Atom test failures are fixed. Patch by Andy Zhang and Tyler Nowicki! llvm-svn: 160451 2012-07-19 04:49:17 +08:00			`; Scheduler causes produce a different instruction order`

Added some basic test cases for r61209 llvm-svn: 61210 2008-12-19 04:05:58 +08:00			`; bitcast a v4i16 to v2i32`

			`define void @convert(<2 x i32>* %dst, <4 x i16>* %src) nounwind {`
[x86] specify triples and auto-generate complete checks; NFC llvm-svn: 305655 2017-06-19 05:42:19 +08:00			`; CHECK-LABEL: convert:`
[CodeGen] Unify MBB reference format in both MIR and debug output As part of the unification of the debug format and the MIR format, print MBB references as '%bb.5'. The MIR printer prints the IR name of a MBB only for block definitions. * find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)->getNumber\(\)/" << printMBBReference(\1)/g' find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)\.getNumber\(\)/" << printMBBReference(\1)/g' * find . \( -name ".txt" -o -name ".s" -o -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#([0-9]+)/%bb.\1/g' * grep -nr 'BB#' and fix Differential Revision: https://reviews.llvm.org/D40422 llvm-svn: 319665 2017-12-05 01:18:51 +08:00			`; CHECK: # %bb.0: # %entry`
[x86] specify triples and auto-generate complete checks; NFC llvm-svn: 305655 2017-06-19 05:42:19 +08:00			`; CHECK-NEXT: pushl %eax`
			`; CHECK-NEXT: movl $0, (%esp)`
[DAGCombiner][X86][SystemZ][AArch64] Combine some cases of (bitcast (build_vector constants)) between legalize types and legalize dag. This patch enables combining integer bitcasts of integer build vectors when the new scalar type is legal. I've avoided floating point because the implementation bitcasts float to int along the way and we would need to check the intermediate types for legality Differential Revision: https://reviews.llvm.org/D58884 llvm-svn: 355324 2019-03-05 03:12:16 +08:00			`; CHECK-NEXT: pcmpeqd %xmm0, %xmm0`
[x86] specify triples and auto-generate complete checks; NFC llvm-svn: 305655 2017-06-19 05:42:19 +08:00			`; CHECK-NEXT: cmpl $3, (%esp)`
			`; CHECK-NEXT: jg .LBB0_3`
			`; CHECK-NEXT: .p2align 4, 0x90`
			`; CHECK-NEXT: .LBB0_2: # %forbody`
			`; CHECK-NEXT: # =>This Inner Loop Header: Depth=1`
			`; CHECK-NEXT: movl (%esp), %eax`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx`
			`; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx`
Recommit r367901 "[X86] Enable -x86-experimental-vector-widening-legalization by default." The assert that caused this to be reverted should be fixed now. Original commit message: This patch changes our defualt legalization behavior for 16, 32, and 64 bit vectors with i8/i16/i32/i64 scalar types from promotion to widening. For example, v8i8 will now be widened to v16i8 instead of promoted to v8i16. This keeps the elements widths the same and pads with undef elements. We believe this is a better legalization strategy. But it carries some issues due to the fragmented vector ISA. For example, i8 shifts and multiplies get widened and then later have to be promoted/split into vXi16 vectors. This has the potential to cause regressions so we wanted to get it in early in the 10.0 cycle so we have plenty of time to address them. Next steps will be to merge tests that explicitly test the command line option. And then we can remove the option and its associated code. llvm-svn: 368183 2019-08-08 00:24:26 +08:00			`; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero`
			`; CHECK-NEXT: psubw %xmm0, %xmm1`
			`; CHECK-NEXT: movq %xmm1, (%ecx,%eax,8)`
[x86] specify triples and auto-generate complete checks; NFC llvm-svn: 305655 2017-06-19 05:42:19 +08:00			`; CHECK-NEXT: incl (%esp)`
			`; CHECK-NEXT: cmpl $3, (%esp)`
			`; CHECK-NEXT: jle .LBB0_2`
			`; CHECK-NEXT: .LBB0_3: # %afterfor`
			`; CHECK-NEXT: popl %eax`
			`; CHECK-NEXT: retl`
			`;`
			`; ATOM-LABEL: convert:`
[CodeGen] Unify MBB reference format in both MIR and debug output As part of the unification of the debug format and the MIR format, print MBB references as '%bb.5'. The MIR printer prints the IR name of a MBB only for block definitions. * find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)->getNumber\(\)/" << printMBBReference(\1)/g' find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)\.getNumber\(\)/" << printMBBReference(\1)/g' * find . \( -name ".txt" -o -name ".s" -o -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#([0-9]+)/%bb.\1/g' * grep -nr 'BB#' and fix Differential Revision: https://reviews.llvm.org/D40422 llvm-svn: 319665 2017-12-05 01:18:51 +08:00			`; ATOM: # %bb.0: # %entry`
[x86] specify triples and auto-generate complete checks; NFC llvm-svn: 305655 2017-06-19 05:42:19 +08:00			`; ATOM-NEXT: pushl %eax`
[DAGCombiner][X86][SystemZ][AArch64] Combine some cases of (bitcast (build_vector constants)) between legalize types and legalize dag. This patch enables combining integer bitcasts of integer build vectors when the new scalar type is legal. I've avoided floating point because the implementation bitcasts float to int along the way and we would need to check the intermediate types for legality Differential Revision: https://reviews.llvm.org/D58884 llvm-svn: 355324 2019-03-05 03:12:16 +08:00			`; ATOM-NEXT: pcmpeqd %xmm0, %xmm0`
[x86] specify triples and auto-generate complete checks; NFC llvm-svn: 305655 2017-06-19 05:42:19 +08:00			`; ATOM-NEXT: movl $0, (%esp)`
			`; ATOM-NEXT: cmpl $3, (%esp)`
			`; ATOM-NEXT: jg .LBB0_3`
			`; ATOM-NEXT: .p2align 4, 0x90`
			`; ATOM-NEXT: .LBB0_2: # %forbody`
			`; ATOM-NEXT: # =>This Inner Loop Header: Depth=1`
			`; ATOM-NEXT: movl (%esp), %eax`
			`; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx`
Recommit r367901 "[X86] Enable -x86-experimental-vector-widening-legalization by default." The assert that caused this to be reverted should be fixed now. Original commit message: This patch changes our defualt legalization behavior for 16, 32, and 64 bit vectors with i8/i16/i32/i64 scalar types from promotion to widening. For example, v8i8 will now be widened to v16i8 instead of promoted to v8i16. This keeps the elements widths the same and pads with undef elements. We believe this is a better legalization strategy. But it carries some issues due to the fragmented vector ISA. For example, i8 shifts and multiplies get widened and then later have to be promoted/split into vXi16 vectors. This has the potential to cause regressions so we wanted to get it in early in the 10.0 cycle so we have plenty of time to address them. Next steps will be to merge tests that explicitly test the command line option. And then we can remove the option and its associated code. llvm-svn: 368183 2019-08-08 00:24:26 +08:00			`; ATOM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero`
[x86] specify triples and auto-generate complete checks; NFC llvm-svn: 305655 2017-06-19 05:42:19 +08:00			`; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx`
Recommit r367901 "[X86] Enable -x86-experimental-vector-widening-legalization by default." The assert that caused this to be reverted should be fixed now. Original commit message: This patch changes our defualt legalization behavior for 16, 32, and 64 bit vectors with i8/i16/i32/i64 scalar types from promotion to widening. For example, v8i8 will now be widened to v16i8 instead of promoted to v8i16. This keeps the elements widths the same and pads with undef elements. We believe this is a better legalization strategy. But it carries some issues due to the fragmented vector ISA. For example, i8 shifts and multiplies get widened and then later have to be promoted/split into vXi16 vectors. This has the potential to cause regressions so we wanted to get it in early in the 10.0 cycle so we have plenty of time to address them. Next steps will be to merge tests that explicitly test the command line option. And then we can remove the option and its associated code. llvm-svn: 368183 2019-08-08 00:24:26 +08:00			`; ATOM-NEXT: psubw %xmm0, %xmm1`
			`; ATOM-NEXT: movq %xmm1, (%ecx,%eax,8)`
[x86] specify triples and auto-generate complete checks; NFC llvm-svn: 305655 2017-06-19 05:42:19 +08:00			`; ATOM-NEXT: incl (%esp)`
			`; ATOM-NEXT: cmpl $3, (%esp)`
			`; ATOM-NEXT: jle .LBB0_2`
			`; ATOM-NEXT: .LBB0_3: # %afterfor`
			`; ATOM-NEXT: popl %eax`
			`; ATOM-NEXT: retl`
Added some basic test cases for r61209 llvm-svn: 61210 2008-12-19 04:05:58 +08:00			`entry:`
[x86] specify triples and auto-generate complete checks; NFC llvm-svn: 305655 2017-06-19 05:42:19 +08:00			`%dst.addr = alloca <2 x i32>*`
			`%src.addr = alloca <4 x i16>*`
			`%i = alloca i32, align 4`
Added some basic test cases for r61209 llvm-svn: 61210 2008-12-19 04:05:58 +08:00			`store <2 x i32>* %dst, <2 x i32>** %dst.addr`
			`store <4 x i16>* %src, <4 x i16>** %src.addr`
			`store i32 0, i32* %i`
			`br label %forcond`

[x86] specify triples and auto-generate complete checks; NFC llvm-svn: 305655 2017-06-19 05:42:19 +08:00			`forcond:`
			`%tmp = load i32, i32* %i`
			`%cmp = icmp slt i32 %tmp, 4`
Added some basic test cases for r61209 llvm-svn: 61210 2008-12-19 04:05:58 +08:00			`br i1 %cmp, label %forbody, label %afterfor`

[x86] specify triples and auto-generate complete checks; NFC llvm-svn: 305655 2017-06-19 05:42:19 +08:00			`forbody:`
			`%tmp1 = load i32, i32* %i`
			`%tmp2 = load <2 x i32>, <2 x i32>* %dst.addr`
			`%arrayidx = getelementptr <2 x i32>, <2 x i32>* %tmp2, i32 %tmp1`
			`%tmp3 = load i32, i32* %i`
			`%tmp4 = load <4 x i16>, <4 x i16>* %src.addr`
			`%arrayidx5 = getelementptr <4 x i16>, <4 x i16>* %tmp4, i32 %tmp3`
			`%tmp6 = load <4 x i16>, <4 x i16>* %arrayidx5`
			`%add = add <4 x i16> %tmp6, < i16 1, i16 1, i16 1, i16 1 >`
			`%conv = bitcast <4 x i16> %add to <2 x i32>`
Added some basic test cases for r61209 llvm-svn: 61210 2008-12-19 04:05:58 +08:00			`store <2 x i32> %conv, <2 x i32>* %arrayidx`
			`br label %forinc`

[x86] specify triples and auto-generate complete checks; NFC llvm-svn: 305655 2017-06-19 05:42:19 +08:00			`forinc:`
			`%tmp7 = load i32, i32* %i`
			`%inc = add i32 %tmp7, 1`
Added some basic test cases for r61209 llvm-svn: 61210 2008-12-19 04:05:58 +08:00			`store i32 %inc, i32* %i`
			`br label %forcond`

[x86] specify triples and auto-generate complete checks; NFC llvm-svn: 305655 2017-06-19 05:42:19 +08:00			`afterfor:`
Added some basic test cases for r61209 llvm-svn: 61210 2008-12-19 04:05:58 +08:00			`ret void`
			`}`