llvm-project/compiler-rt/lib/builtins/i386/floatundisf.S

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "../assembly.h"

// float __floatundisf(du_int a);

// Note that there is a hardware instruction, fildll, that does most of what
// this function needs to do.  However, because of our ia32 ABI, it will take
// a write-small read-large stall, so the software implementation here is
// actually several cycles faster.

// This is a branch-free implementation.  A branchy implementation might be
// faster for the common case if you know something a priori about the input
// distribution.

/* branch-free x87 implementation - one cycle slower than without x87.

#ifdef __i386__

CONST_SECTION
.balign 3

		.quad	0x43f0000000000000
twop64:	.quad	0x0000000000000000

#define			TWOp64			twop64-0b(%ecx,%eax,8)

.text
.balign 4
DEFINE_COMPILERRT_FUNCTION(__floatundisf)
	movl		8(%esp),		%eax
	movd		8(%esp),		%xmm1
	movd		4(%esp),		%xmm0
	punpckldq	%xmm1,			%xmm0
	calll		0f
0:	popl		%ecx
	sarl		$31,			%eax
	movq		%xmm0,			4(%esp)
	fildll		4(%esp)
	faddl		TWOp64
	fstps		4(%esp)
	flds		4(%esp)
	ret
END_COMPILERRT_FUNCTION(__floatundisf)

#endif // __i386__

*/

// branch-free, x87-free implementation - faster at the expense of code size

#ifdef __i386__

CONST_SECTION

	.balign 16
twop52:
	.quad 0x4330000000000000
	.quad 0x0000000000000fff

	.balign 16
sticky:
	.quad 0x0000000000000000
	.long 0x00000012

	.balign 16
twelve:
	.long 0x00000000

#define			TWOp52			twop52-0b(%ecx)
#define			STICKY			sticky-0b(%ecx,%eax,8)

.text
.balign 4
DEFINE_COMPILERRT_FUNCTION(__floatundisf)
	movl		8(%esp),		%eax
	movd		8(%esp),		%xmm1
	movd		4(%esp),		%xmm0
	punpckldq	%xmm1,			%xmm0

	calll		0f
0:	popl		%ecx
	shrl		%eax					// high 31 bits of input as sint32
	addl		$0x7ff80000,	%eax
	sarl		$31,			%eax	// (big input) ? -1 : 0
	movsd		STICKY,			%xmm1	// (big input) ? 0xfff : 0
	movl		$12,			%edx
	andl		%eax,			%edx	// (big input) ? 12 : 0
	movd		%edx,			%xmm3
	andpd		%xmm0,			%xmm1	// (big input) ? input & 0xfff : 0
	movsd		TWOp52,			%xmm2	// 0x1.0p52
	psrlq		%xmm3,			%xmm0	// (big input) ? input >> 12 : input
	orpd		%xmm2,			%xmm1	// 0x1.0p52 + ((big input) ? input & 0xfff : input)
	orpd		%xmm1,			%xmm0	// 0x1.0p52 + ((big input) ? (input >> 12 | input & 0xfff) : input)
	subsd		%xmm2,			%xmm0	// (double)((big input) ? (input >> 12 | input & 0xfff) : input)
	cvtsd2ss	%xmm0,			%xmm0	// (float)((big input) ? (input >> 12 | input & 0xfff) : input)
	pslld		$23,			%xmm3
	paddd		%xmm3,			%xmm0	// (float)input
	movd		%xmm0,			4(%esp)
	flds		4(%esp)
	ret
END_COMPILERRT_FUNCTION(__floatundisf)

#endif // __i386__

NO_EXEC_STACK_DIRECTIVE
Update more file headers across all of the LLVM projects in the monorepo to reflect the new license. These used slightly different spellings that defeated my regular expressions. We understand that people may be surprised that we're moving the header entirely to discuss the new license. We checked this carefully with the Foundation's lawyer and we believe this is the correct approach. Essentially, all code in the project is now made available by the LLVM project under our new license, so you will see that the license headers include that license only. Some of our contributors have contributed code under our old license, and accordingly, we have retained a copy of our old license notice in the top-level files in each project and repository. llvm-svn: 351648 2019-01-19 18:56:40 +08:00			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
Initial import of compiler-rt. - llvm-svn: 74292 2009-06-27 00:47:03 +08:00
Add assembly.h for use in .S files. llvm-svn: 85263 2009-10-28 01:49:50 +08:00			`#include "../assembly.h"`

Initial import of compiler-rt. - llvm-svn: 74292 2009-06-27 00:47:03 +08:00			`// float __floatundisf(du_int a);`

			`// Note that there is a hardware instruction, fildll, that does most of what`
			`// this function needs to do. However, because of our ia32 ABI, it will take`
			`// a write-small read-large stall, so the software implementation here is`
			`// actually several cycles faster.`

			`// This is a branch-free implementation. A branchy implementation might be`
			`// faster for the common case if you know something a priori about the input`
			`// distribution.`

			`/* branch-free x87 implementation - one cycle slower than without x87.`

			`#ifdef __i386__`

builtins: cleanup constant data section selection Each of the object formats use a different directive for selecting the constant section. Use a macro to avoid the duplication across a number of files. Also correct a small macro mismatch on the Windows case (HIDDEN_DIRECTIVE -> HIDDEN). Patch by Vadim Chugunov! llvm-svn: 223910 2014-12-10 10:36:22 +08:00			`CONST_SECTION`
[CompilerRT] use .p2align, .balign instead of .align The .align statements in ARM assembly routines is actually meant to be a power of 2 alignment (e.g. .align 2 == 4 byte alignment, not 2). Switch to using .p2align. .p2align is guaranteed to be a power-of-two alignment always and much more explicit. The .align in the case of x86_64 is byte alignment, use .balign instead of .align. llvm-svn: 208578 2014-05-12 23:23:37 +08:00			`.balign 3`
Initial import of compiler-rt. - llvm-svn: 74292 2009-06-27 00:47:03 +08:00
			`.quad 0x43f0000000000000`
			`twop64: .quad 0x0000000000000000`

			`#define TWOp64 twop64-0b(%ecx,%eax,8)`

			`.text`
[CompilerRT] use .p2align, .balign instead of .align The .align statements in ARM assembly routines is actually meant to be a power of 2 alignment (e.g. .align 2 == 4 byte alignment, not 2). Switch to using .p2align. .p2align is guaranteed to be a power-of-two alignment always and much more explicit. The .align in the case of x86_64 is byte alignment, use .balign instead of .align. llvm-svn: 208578 2014-05-12 23:23:37 +08:00			`.balign 4`
Switch to using DEFINE_COMPILERRT_[PRIVATE_]FUNCTION to define function symbols inside .S files. llvm-svn: 85264 2009-10-28 01:50:21 +08:00			`DEFINE_COMPILERRT_FUNCTION(__floatundisf)`
Initial import of compiler-rt. - llvm-svn: 74292 2009-06-27 00:47:03 +08:00			`movl 8(%esp), %eax`
			`movd 8(%esp), %xmm1`
			`movd 4(%esp), %xmm0`
			`punpckldq %xmm1, %xmm0`
			`calll 0f`
			`0: popl %ecx`
			`sarl $31, %eax`
			`movq %xmm0, 4(%esp)`
			`fildll 4(%esp)`
			`faddl TWOp64`
			`fstps 4(%esp)`
			`flds 4(%esp)`
			`ret`
Add end-of-function markers. llvm-svn: 200006 2014-01-24 22:40:53 +08:00			`END_COMPILERRT_FUNCTION(__floatundisf)`

Initial import of compiler-rt. - llvm-svn: 74292 2009-06-27 00:47:03 +08:00			`#endif // __i386__`

			`*/`

[builtins] Use single line C++/C99 comment style Use the uniform single line C++/99 style for code comments. This is part of the cleanup proposed in "[RFC] compiler-rt builtins cleanup and refactoring". Differential Revision: https://reviews.llvm.org/D60352 llvm-svn: 359411 2019-04-29 06:47:49 +08:00			`// branch-free, x87-free implementation - faster at the expense of code size`
Initial import of compiler-rt. - llvm-svn: 74292 2009-06-27 00:47:03 +08:00
			`#ifdef __i386__`

builtins: cleanup constant data section selection Each of the object formats use a different directive for selecting the constant section. Use a macro to avoid the duplication across a number of files. Also correct a small macro mismatch on the Windows case (HIDDEN_DIRECTIVE -> HIDDEN). Patch by Vadim Chugunov! llvm-svn: 223910 2014-12-10 10:36:22 +08:00			`CONST_SECTION`
builtins: move the readonly constants into rodata Place the floating point constants into the read-only data section. This was already being done for x86_64, this simply mirrors the behaviour for i686. llvm-svn: 214034 2014-07-27 05:08:41 +08:00
builtins: correct constant alignments MMX/SSE instructions expect 128-bit alignment (16-byte) for constants that they reference. Correct the alignment on the constant values. Although it is quite possible for the data to end up aligned, there is no guarantee that this will occur unless it is explicitly aligned to the desired location. If the data ends up being unaligned, the resultant binary would fault at runtime due to the unaligned access. As an example, the follow would fault previously: cc -c lib/builtins/x86_64/floatundidf.S -o floatundidf.o cc -c test/builtins/Unit/floatundidf_test.c -o floatundidf_test.c ld -m elf_x86_64 floatundidf.o floatundidf_test.o -lc -o floatundidf However, if the object files were reversed, the data would end up aligned and the problem would go unnoticed. llvm-svn: 214033 2014-07-27 05:08:34 +08:00			`.balign 16`
			`twop52:`
			`.quad 0x4330000000000000`
			`.quad 0x0000000000000fff`

			`.balign 16`
			`sticky:`
			`.quad 0x0000000000000000`
			`.long 0x00000012`

			`.balign 16`
			`twelve:`
			`.long 0x00000000`
Initial import of compiler-rt. - llvm-svn: 74292 2009-06-27 00:47:03 +08:00
			`#define TWOp52 twop52-0b(%ecx)`
			`#define STICKY sticky-0b(%ecx,%eax,8)`

			`.text`
[CompilerRT] use .p2align, .balign instead of .align The .align statements in ARM assembly routines is actually meant to be a power of 2 alignment (e.g. .align 2 == 4 byte alignment, not 2). Switch to using .p2align. .p2align is guaranteed to be a power-of-two alignment always and much more explicit. The .align in the case of x86_64 is byte alignment, use .balign instead of .align. llvm-svn: 208578 2014-05-12 23:23:37 +08:00			`.balign 4`
Switch to using DEFINE_COMPILERRT_[PRIVATE_]FUNCTION to define function symbols inside .S files. llvm-svn: 85264 2009-10-28 01:50:21 +08:00			`DEFINE_COMPILERRT_FUNCTION(__floatundisf)`
Initial import of compiler-rt. - llvm-svn: 74292 2009-06-27 00:47:03 +08:00			`movl 8(%esp), %eax`
			`movd 8(%esp), %xmm1`
			`movd 4(%esp), %xmm0`
			`punpckldq %xmm1, %xmm0`
[builtins] Use single line C++/C99 comment style Use the uniform single line C++/99 style for code comments. This is part of the cleanup proposed in "[RFC] compiler-rt builtins cleanup and refactoring". Differential Revision: https://reviews.llvm.org/D60352 llvm-svn: 359411 2019-04-29 06:47:49 +08:00
Initial import of compiler-rt. - llvm-svn: 74292 2009-06-27 00:47:03 +08:00			`calll 0f`
			`0: popl %ecx`
			`shrl %eax // high 31 bits of input as sint32`
			`addl $0x7ff80000, %eax`
			`sarl $31, %eax // (big input) ? -1 : 0`
			`movsd STICKY, %xmm1 // (big input) ? 0xfff : 0`
			`movl $12, %edx`
			`andl %eax, %edx // (big input) ? 12 : 0`
			`movd %edx, %xmm3`
			`andpd %xmm0, %xmm1 // (big input) ? input & 0xfff : 0`
			`movsd TWOp52, %xmm2 // 0x1.0p52`
			`psrlq %xmm3, %xmm0 // (big input) ? input >> 12 : input`
			`orpd %xmm2, %xmm1 // 0x1.0p52 + ((big input) ? input & 0xfff : input)`
			`orpd %xmm1, %xmm0 // 0x1.0p52 + ((big input) ? (input >> 12 \| input & 0xfff) : input)`
			`subsd %xmm2, %xmm0 // (double)((big input) ? (input >> 12 \| input & 0xfff) : input)`
			`cvtsd2ss %xmm0, %xmm0 // (float)((big input) ? (input >> 12 \| input & 0xfff) : input)`
			`pslld $23, %xmm3`
			`paddd %xmm3, %xmm0 // (float)input`
			`movd %xmm0, 4(%esp)`
			`flds 4(%esp)`
			`ret`
Add end-of-function markers. llvm-svn: 200006 2014-01-24 22:40:53 +08:00			`END_COMPILERRT_FUNCTION(__floatundisf)`

Initial import of compiler-rt. - llvm-svn: 74292 2009-06-27 00:47:03 +08:00			`#endif // __i386__`
builtins: tag with noexecstack These routines do not require executable stacks. However, by default ELFish linkers may assume an executable stack on GNUish environments (and some non-GNU ones too!). The GNU extension to add a note to indicate a non-executable stack is honoured by these environments to mark the stack as non-executable (the compiler normally emits this directive on appropriate targets whenever possible). This allows normal builds from getting executable stacks due to linking to the compiler rt builtins. llvm-svn: 273500 2016-06-23 06:09:42 +08:00
			`NO_EXEC_STACK_DIRECTIVE`