llvm-project/llvm/test/CodeGen/X86/byval4.ll

; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl	 | count 2

%struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
                   i16, i16, i16, i16, i16, i16, i16, i16,
                   i16, i16, i16, i16, i16, i16, i16, i16,
                   i16, i16, i16, i16, i16, i16, i16, i16,
                   i16, i16, i16, i16, i16, i16, i16, i16,
                   i16, i16, i16, i16, i16, i16, i16, i16,
                   i16, i16, i16, i16, i16, i16, i16, i16,
                   i16, i16, i16, i16, i16, i16, i16, i16,
                   i16 }


define void @g(i16 signext  %a1, i16 signext  %a2, i16 signext  %a3,
	 i16 signext  %a4, i16 signext  %a5, i16 signext  %a6) nounwind {
entry:
        %a = alloca %struct.s, align 16
        %tmp = getelementptr %struct.s* %a, i32 0, i32 0
        store i16 %a1, i16* %tmp, align 16
        %tmp2 = getelementptr %struct.s* %a, i32 0, i32 1
        store i16 %a2, i16* %tmp2, align 16
        %tmp4 = getelementptr %struct.s* %a, i32 0, i32 2
        store i16 %a3, i16* %tmp4, align 16
        %tmp6 = getelementptr %struct.s* %a, i32 0, i32 3
        store i16 %a4, i16* %tmp6, align 16
        %tmp8 = getelementptr %struct.s* %a, i32 0, i32 4
        store i16 %a5, i16* %tmp8, align 16
        %tmp10 = getelementptr %struct.s* %a, i32 0, i32 5
        store i16 %a6, i16* %tmp10, align 16
        call void @f( %struct.s* %a byval )
        call void @f( %struct.s* %a byval )
        ret void
}

declare void @f(%struct.s* byval)
Fix a number of byval / memcpy / memset related codegen issues. 1. x86-64 byval alignment should be max of 8 and alignment of type. Previously the code was not doing what the commit message was saying. 2. Do not use byte repeat move and store operations. These are slow. llvm-svn: 55139 2008-08-22 05:00:15 +08:00			`; RUN: llvm-as < %s \| llc -march=x86-64 \| grep rep.movsq \| count 2`
Let each target decide byval alignment. For X86, it's 4-byte unless the aggregare contains SSE vector(s). For x86-64, it's max of 8 or alignment of the type. llvm-svn: 46286 2008-01-24 07:17:41 +08:00			`; RUN: llvm-as < %s \| llc -march=x86 \| grep rep.movsl \| count 2`
Add support for byval function whose argument is not 32 bit aligned. To do this it is necessary to add a "always inline" argument to the memcpy node. For completeness I have also added this node to memmove and memset. I have also added getMem* functions, because the extra argument makes it cumbersome to use getNode and because I get confused by it :-) llvm-svn: 43172 2007-10-19 18:41:11 +08:00
Drop ISD::MEMSET, ISD::MEMMOVE, and ISD::MEMCPY, which are not Legal on any current target and aren't optimized in DAGCombiner. Instead of using intermediate nodes, expand the operations, choosing between simple loads/stores, target-specific code, and library calls, immediately. Previously, the code to emit optimized code for these operations was only used at initial SelectionDAG construction time; now it is used at all times. This fixes some cases where rep;movs was being used for small copies where simple loads/stores would be better. This also cleans up code that checks for alignments less than 4; let the targets make that decision instead of doing it in target-independent code. This allows x86 to use rep;movs in low-alignment cases. Also, this fixes a bug that resulted in the use of rep;stos for memsets of 0 with non-constant memory size when the alignment was at least 4. It's better to use the library in this case, which can be significantly faster when the size is large. This also preserves more SourceValue information when memory intrinsics are lowered into simple loads/stores. llvm-svn: 49572 2008-04-12 12:36:06 +08:00			`%struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,`
			`i16, i16, i16, i16, i16, i16, i16, i16,`
			`i16, i16, i16, i16, i16, i16, i16, i16,`
			`i16, i16, i16, i16, i16, i16, i16, i16,`
			`i16, i16, i16, i16, i16, i16, i16, i16,`
			`i16, i16, i16, i16, i16, i16, i16, i16,`
			`i16, i16, i16, i16, i16, i16, i16, i16,`
			`i16, i16, i16, i16, i16, i16, i16, i16,`
			`i16 }`
Add support for byval function whose argument is not 32 bit aligned. To do this it is necessary to add a "always inline" argument to the memcpy node. For completeness I have also added this node to memmove and memset. I have also added getMem* functions, because the extra argument makes it cumbersome to use getNode and because I get confused by it :-) llvm-svn: 43172 2007-10-19 18:41:11 +08:00

			`define void @g(i16 signext %a1, i16 signext %a2, i16 signext %a3,`
Fix a number of byval / memcpy / memset related codegen issues. 1. x86-64 byval alignment should be max of 8 and alignment of type. Previously the code was not doing what the commit message was saying. 2. Do not use byte repeat move and store operations. These are slow. llvm-svn: 55139 2008-08-22 05:00:15 +08:00			`i16 signext %a4, i16 signext %a5, i16 signext %a6) nounwind {`
Add support for byval function whose argument is not 32 bit aligned. To do this it is necessary to add a "always inline" argument to the memcpy node. For completeness I have also added this node to memmove and memset. I have also added getMem* functions, because the extra argument makes it cumbersome to use getNode and because I get confused by it :-) llvm-svn: 43172 2007-10-19 18:41:11 +08:00			`entry:`
			`%a = alloca %struct.s, align 16`
			`%tmp = getelementptr %struct.s* %a, i32 0, i32 0`
			`store i16 %a1, i16* %tmp, align 16`
			`%tmp2 = getelementptr %struct.s* %a, i32 0, i32 1`
			`store i16 %a2, i16* %tmp2, align 16`
			`%tmp4 = getelementptr %struct.s* %a, i32 0, i32 2`
			`store i16 %a3, i16* %tmp4, align 16`
			`%tmp6 = getelementptr %struct.s* %a, i32 0, i32 3`
			`store i16 %a4, i16* %tmp6, align 16`
			`%tmp8 = getelementptr %struct.s* %a, i32 0, i32 4`
			`store i16 %a5, i16* %tmp8, align 16`
			`%tmp10 = getelementptr %struct.s* %a, i32 0, i32 5`
			`store i16 %a6, i16* %tmp10, align 16`
			`call void @f( %struct.s* %a byval )`
			`call void @f( %struct.s* %a byval )`
			`ret void`
			`}`

			`declare void @f(%struct.s* byval)`