llvm-project/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

4523 lines
159 KiB
LLVM
Raw Normal View History

Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-linux | FileCheck %s --check-prefix=X86-X87
; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
;
; 32-bit float to signed integer
;
declare i1 @llvm.fptosi.sat.i1.f32 (float)
declare i8 @llvm.fptosi.sat.i8.f32 (float)
declare i13 @llvm.fptosi.sat.i13.f32 (float)
declare i16 @llvm.fptosi.sat.i16.f32 (float)
declare i19 @llvm.fptosi.sat.i19.f32 (float)
declare i32 @llvm.fptosi.sat.i32.f32 (float)
declare i50 @llvm.fptosi.sat.i50.f32 (float)
declare i64 @llvm.fptosi.sat.i64.f32 (float)
declare i100 @llvm.fptosi.sat.i100.f32(float)
declare i128 @llvm.fptosi.sat.i128.f32(float)
define i1 @test_signed_i1_f32(float %f) nounwind {
; X86-X87-LABEL: test_signed_i1_f32:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fld1
; X86-X87-NEXT: fchs
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movb $-1, %dl
; X86-X87-NEXT: jb .LBB0_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
; X86-X87-NEXT: .LBB0_2:
; X86-X87-NEXT: fldz
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: ja .LBB0_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %edx, %ebx
; X86-X87-NEXT: .LBB0_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB0_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %ebx, %ecx
; X86-X87-NEXT: .LBB0_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: popl %ebx
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i1_f32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: xorps %xmm1, %xmm1
; X86-SSE-NEXT: minss %xmm0, %xmm1
; X86-SSE-NEXT: cvttss2si %xmm1, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i1_f32:
; X64: # %bb.0:
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: maxss %xmm0, %xmm1
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: minss %xmm1, %xmm0
; X64-NEXT: cvttss2si %xmm0, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%x = call i1 @llvm.fptosi.sat.i1.f32(float %f)
ret i1 %x
}
define i8 @test_signed_i8_f32(float %f) nounwind {
; X86-X87-LABEL: test_signed_i8_f32:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movb $-128, %dl
; X86-X87-NEXT: jb .LBB1_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
; X86-X87-NEXT: .LBB1_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movb $127, %cl
; X86-X87-NEXT: ja .LBB1_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB1_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jnp .LBB1_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: .LBB1_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i8_f32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-SSE-NEXT: minss %xmm0, %xmm1
; X86-SSE-NEXT: cvttss2si %xmm1, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i8_f32:
; X64: # %bb.0:
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: maxss %xmm0, %xmm1
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: minss %xmm1, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%x = call i8 @llvm.fptosi.sat.i8.f32(float %f)
ret i8 %x
}
define i13 @test_signed_i13_f32(float %f) nounwind {
; X86-X87-LABEL: test_signed_i13_f32:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movw $-4096, %cx # imm = 0xF000
; X86-X87-NEXT: jb .LBB2_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB2_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $4095, %edx # imm = 0xFFF
; X86-X87-NEXT: ja .LBB2_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB2_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB2_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB2_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i13_f32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-SSE-NEXT: minss %xmm0, %xmm1
; X86-SSE-NEXT: cvttss2si %xmm1, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i13_f32:
; X64: # %bb.0:
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: maxss %xmm0, %xmm1
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: minss %xmm1, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%x = call i13 @llvm.fptosi.sat.i13.f32(float %f)
ret i13 %x
}
define i16 @test_signed_i16_f32(float %f) nounwind {
; X86-X87-LABEL: test_signed_i16_f32:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movw $-32768, %cx # imm = 0x8000
; X86-X87-NEXT: jb .LBB3_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB3_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $32767, %edx # imm = 0x7FFF
; X86-X87-NEXT: ja .LBB3_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB3_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB3_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB3_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i16_f32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-SSE-NEXT: minss %xmm0, %xmm1
; X86-SSE-NEXT: cvttss2si %xmm1, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i16_f32:
; X64: # %bb.0:
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: maxss %xmm0, %xmm1
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: minss %xmm1, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%x = call i16 @llvm.fptosi.sat.i16.f32(float %f)
ret i16 %x
}
define i19 @test_signed_i19_f32(float %f) nounwind {
; X86-X87-LABEL: test_signed_i19_f32:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw (%esp)
; X86-X87-NEXT: movzwl (%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fistl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw (%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-262144, %ecx # imm = 0xFFFC0000
; X86-X87-NEXT: jb .LBB4_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB4_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $262143, %edx # imm = 0x3FFFF
; X86-X87-NEXT: ja .LBB4_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB4_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB4_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB4_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i19_f32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
; X86-SSE-NEXT: maxss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: minss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: cvttss2si %xmm0, %ecx
; X86-SSE-NEXT: cmovnpl %ecx, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i19_f32:
; X64: # %bb.0:
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomiss %xmm0, %xmm0
; X64-NEXT: maxss {{.*}}(%rip), %xmm0
; X64-NEXT: minss {{.*}}(%rip), %xmm0
; X64-NEXT: cvttss2si %xmm0, %ecx
; X64-NEXT: cmovnpl %ecx, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: retq
%x = call i19 @llvm.fptosi.sat.i19.f32(float %f)
ret i19 %x
}
define i32 @test_signed_i32_f32(float %f) nounwind {
; X86-X87-LABEL: test_signed_i32_f32:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw (%esp)
; X86-X87-NEXT: movzwl (%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fistl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw (%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X86-X87-NEXT: jb .LBB5_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB5_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-X87-NEXT: ja .LBB5_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB5_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB5_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB5_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i32_f32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: cvttss2si %xmm0, %eax
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
; X86-SSE-NEXT: cmovbel %eax, %ecx
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
; X86-SSE-NEXT: cmovnpl %ecx, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i32_f32:
; X64: # %bb.0:
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
; X64-NEXT: cmovbel %eax, %ecx
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomiss %xmm0, %xmm0
; X64-NEXT: cmovnpl %ecx, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: retq
%x = call i32 @llvm.fptosi.sat.i32.f32(float %f)
ret i32 %x
}
define i50 @test_signed_i50_f32(float %f) nounwind {
; X86-X87-LABEL: test_signed_i50_f32:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $20, %esp
; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fld %st(0)
; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB6_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB6_2:
; X86-X87-NEXT: movl $-131072, %edi # imm = 0xFFFE0000
; X86-X87-NEXT: jb .LBB6_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB6_4:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $131071, %esi # imm = 0x1FFFF
; X86-X87-NEXT: ja .LBB6_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edi, %esi
; X86-X87-NEXT: .LBB6_6:
; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: ja .LBB6_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %edx, %edi
; X86-X87-NEXT: .LBB6_8:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jp .LBB6_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edi, %ecx
; X86-X87-NEXT: movl %esi, %edx
; X86-X87-NEXT: .LBB6_10:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $20, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i50_f32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $16, %esp
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: flds {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: cmovbl %ecx, %esi
; X86-SSE-NEXT: movl $-131072, %eax # imm = 0xFFFE0000
; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl $131071, %edx # imm = 0x1FFFF
; X86-SSE-NEXT: cmovbel %eax, %edx
; X86-SSE-NEXT: movl $-1, %eax
; X86-SSE-NEXT: cmovbel %esi, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
; X86-SSE-NEXT: cmovpl %ecx, %eax
; X86-SSE-NEXT: cmovpl %ecx, %edx
; X86-SSE-NEXT: addl $16, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i50_f32:
; X64: # %bb.0:
; X64-NEXT: cvttss2si %xmm0, %rax
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: movabsq $-562949953421312, %rcx # imm = 0xFFFE000000000000
; X64-NEXT: cmovaeq %rax, %rcx
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: movabsq $562949953421311, %rdx # imm = 0x1FFFFFFFFFFFF
; X64-NEXT: cmovbeq %rcx, %rdx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomiss %xmm0, %xmm0
; X64-NEXT: cmovnpq %rdx, %rax
; X64-NEXT: retq
%x = call i50 @llvm.fptosi.sat.i50.f32(float %f)
ret i50 %x
}
define i64 @test_signed_i64_f32(float %f) nounwind {
; X86-X87-LABEL: test_signed_i64_f32:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $20, %esp
; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fld %st(0)
; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB7_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB7_2:
; X86-X87-NEXT: movl $-2147483648, %edi # imm = 0x80000000
; X86-X87-NEXT: jb .LBB7_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB7_4:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $2147483647, %esi # imm = 0x7FFFFFFF
; X86-X87-NEXT: ja .LBB7_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edi, %esi
; X86-X87-NEXT: .LBB7_6:
; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: ja .LBB7_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %edx, %edi
; X86-X87-NEXT: .LBB7_8:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jp .LBB7_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edi, %ecx
; X86-X87-NEXT: movl %esi, %edx
; X86-X87-NEXT: .LBB7_10:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $20, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i64_f32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $16, %esp
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: flds {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: cmovbl %ecx, %esi
; X86-SSE-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-SSE-NEXT: cmovbel %eax, %edx
; X86-SSE-NEXT: movl $-1, %eax
; X86-SSE-NEXT: cmovbel %esi, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
; X86-SSE-NEXT: cmovpl %ecx, %eax
; X86-SSE-NEXT: cmovpl %ecx, %edx
; X86-SSE-NEXT: addl $16, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i64_f32:
; X64: # %bb.0:
; X64-NEXT: cvttss2si %xmm0, %rax
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
; X64-NEXT: cmovbeq %rax, %rcx
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomiss %xmm0, %xmm0
; X64-NEXT: cmovnpq %rcx, %rax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: retq
%x = call i64 @llvm.fptosi.sat.i64.f32(float %f)
ret i64 %x
}
define i100 @test_signed_i100_f32(float %f) nounwind {
; X86-X87-LABEL: test_signed_i100_f32:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %ebp
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $44, %esp
; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
; X86-X87-NEXT: fsts {{[0-9]+}}(%esp)
; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fsts {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: fucompp
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixsfti
; X86-X87-NEXT: subl $4, %esp
; X86-X87-NEXT: xorl %edx, %edx
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-8, %ebx
; X86-X87-NEXT: jb .LBB8_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-X87-NEXT: .LBB8_2:
; X86-X87-NEXT: movl $0, %ecx
; X86-X87-NEXT: movl $0, %ebp
; X86-X87-NEXT: jb .LBB8_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-X87-NEXT: .LBB8_4:
; X86-X87-NEXT: movl $0, %edi
; X86-X87-NEXT: jb .LBB8_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB8_6:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB8_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %edi, %eax
; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl %ecx, %esi
; X86-X87-NEXT: .LBB8_8:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $7, %edi
; X86-X87-NEXT: ja .LBB8_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %ebx, %edi
; X86-X87-NEXT: .LBB8_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
; X86-X87-NEXT: movl $0, %ebp
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jp .LBB8_12
; X86-X87-NEXT: # %bb.11:
; X86-X87-NEXT: movl %edi, %edx
; X86-X87-NEXT: movl %esi, %eax
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-X87-NEXT: .LBB8_12:
; X86-X87-NEXT: movl %ebx, 8(%ecx)
; X86-X87-NEXT: movl %ebp, 4(%ecx)
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: andl $15, %edx
; X86-X87-NEXT: movb %dl, 12(%ecx)
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $44, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: popl %ebx
; X86-X87-NEXT: popl %ebp
; X86-X87-NEXT: retl $4
;
; X86-SSE-LABEL: test_signed_i100_f32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %ebp
; X86-SSE-NEXT: pushl %ebx
; X86-SSE-NEXT: pushl %edi
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $28, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __fixsfti
; X86-SSE-NEXT: subl $4, %esp
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: xorl %ebp, %ebp
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl $-8, %ebx
; X86-SSE-NEXT: movl $0, %ecx
; X86-SSE-NEXT: movl $0, %edx
; X86-SSE-NEXT: movl $0, %edi
; X86-SSE-NEXT: jb .LBB8_2
; X86-SSE-NEXT: # %bb.1:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE-NEXT: .LBB8_2:
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl $-1, %eax
; X86-SSE-NEXT: cmoval %eax, %edi
; X86-SSE-NEXT: cmoval %eax, %edx
; X86-SSE-NEXT: cmoval %eax, %ecx
; X86-SSE-NEXT: movl $7, %eax
; X86-SSE-NEXT: cmovbel %ebx, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
; X86-SSE-NEXT: cmovpl %ebp, %eax
; X86-SSE-NEXT: cmovpl %ebp, %ecx
; X86-SSE-NEXT: cmovpl %ebp, %edx
; X86-SSE-NEXT: cmovpl %ebp, %edi
; X86-SSE-NEXT: movl %edi, 8(%esi)
; X86-SSE-NEXT: movl %edx, 4(%esi)
; X86-SSE-NEXT: movl %ecx, (%esi)
; X86-SSE-NEXT: andl $15, %eax
; X86-SSE-NEXT: movb %al, 12(%esi)
; X86-SSE-NEXT: movl %esi, %eax
; X86-SSE-NEXT: addl $28, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: popl %edi
; X86-SSE-NEXT: popl %ebx
; X86-SSE-NEXT: popl %ebp
; X86-SSE-NEXT: retl $4
;
; X64-LABEL: test_signed_i100_f32:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: callq __fixsfti@PLT
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: cmovbq %rcx, %rax
; X64-NEXT: movabsq $-34359738368, %rsi # imm = 0xFFFFFFF800000000
; X64-NEXT: cmovbq %rsi, %rdx
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: movabsq $34359738367, %rsi # imm = 0x7FFFFFFFF
; X64-NEXT: cmovaq %rsi, %rdx
; X64-NEXT: movq $-1, %rsi
; X64-NEXT: cmovaq %rsi, %rax
; X64-NEXT: ucomiss %xmm0, %xmm0
; X64-NEXT: cmovpq %rcx, %rax
; X64-NEXT: cmovpq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i100 @llvm.fptosi.sat.i100.f32(float %f)
ret i100 %x
}
define i128 @test_signed_i128_f32(float %f) nounwind {
; X86-X87-LABEL: test_signed_i128_f32:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %ebp
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $44, %esp
; X86-X87-NEXT: flds {{[0-9]+}}(%esp)
; X86-X87-NEXT: fsts {{[0-9]+}}(%esp)
; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fsts {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: fucompp
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixsfti
; X86-X87-NEXT: subl $4, %esp
; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jae .LBB9_1
; X86-X87-NEXT: # %bb.2:
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jae .LBB9_3
; X86-X87-NEXT: .LBB9_4:
; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X86-X87-NEXT: jb .LBB9_6
; X86-X87-NEXT: .LBB9_5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB9_6:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
; X86-X87-NEXT: ja .LBB9_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: .LBB9_8:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $-1, %ebp
; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB9_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edx, %ebp
; X86-X87-NEXT: movl %ebx, %edi
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-X87-NEXT: .LBB9_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jp .LBB9_12
; X86-X87-NEXT: # %bb.11:
; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl %edi, %eax
; X86-X87-NEXT: movl %ebp, %edx
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-X87-NEXT: .LBB9_12:
; X86-X87-NEXT: movl %ebx, 12(%ecx)
; X86-X87-NEXT: movl %edx, 8(%ecx)
; X86-X87-NEXT: movl %eax, 4(%ecx)
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $44, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: popl %ebx
; X86-X87-NEXT: popl %ebp
; X86-X87-NEXT: retl $4
; X86-X87-NEXT: .LBB9_1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB9_4
; X86-X87-NEXT: .LBB9_3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X86-X87-NEXT: jae .LBB9_5
; X86-X87-NEXT: jmp .LBB9_6
;
; X86-SSE-LABEL: test_signed_i128_f32:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %ebp
; X86-SSE-NEXT: pushl %ebx
; X86-SSE-NEXT: pushl %edi
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $28, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __fixsfti
; X86-SSE-NEXT: subl $4, %esp
; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cmovbl %ecx, %eax
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE-NEXT: cmovbl %ecx, %edx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE-NEXT: cmovbl %ecx, %edi
; X86-SSE-NEXT: movl $-2147483648, %ebx # imm = 0x80000000
; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %ebx
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl $2147483647, %ebp # imm = 0x7FFFFFFF
; X86-SSE-NEXT: cmovbel %ebx, %ebp
; X86-SSE-NEXT: movl $-1, %ebx
; X86-SSE-NEXT: cmoval %ebx, %edi
; X86-SSE-NEXT: cmoval %ebx, %edx
; X86-SSE-NEXT: cmoval %ebx, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
; X86-SSE-NEXT: cmovpl %ecx, %eax
; X86-SSE-NEXT: cmovpl %ecx, %edx
; X86-SSE-NEXT: cmovpl %ecx, %edi
; X86-SSE-NEXT: cmovpl %ecx, %ebp
; X86-SSE-NEXT: movl %ebp, 12(%esi)
; X86-SSE-NEXT: movl %edi, 8(%esi)
; X86-SSE-NEXT: movl %edx, 4(%esi)
; X86-SSE-NEXT: movl %eax, (%esi)
; X86-SSE-NEXT: movl %esi, %eax
; X86-SSE-NEXT: addl $28, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: popl %edi
; X86-SSE-NEXT: popl %ebx
; X86-SSE-NEXT: popl %ebp
; X86-SSE-NEXT: retl $4
;
; X64-LABEL: test_signed_i128_f32:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: callq __fixsfti@PLT
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: cmovbq %rcx, %rax
; X64-NEXT: movabsq $-9223372036854775808, %rsi # imm = 0x8000000000000000
; X64-NEXT: cmovbq %rsi, %rdx
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: movabsq $9223372036854775807, %rsi # imm = 0x7FFFFFFFFFFFFFFF
; X64-NEXT: cmovaq %rsi, %rdx
; X64-NEXT: movq $-1, %rsi
; X64-NEXT: cmovaq %rsi, %rax
; X64-NEXT: ucomiss %xmm0, %xmm0
; X64-NEXT: cmovpq %rcx, %rax
; X64-NEXT: cmovpq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i128 @llvm.fptosi.sat.i128.f32(float %f)
ret i128 %x
}
;
; 64-bit float to signed integer
;
declare i1 @llvm.fptosi.sat.i1.f64 (double)
declare i8 @llvm.fptosi.sat.i8.f64 (double)
declare i13 @llvm.fptosi.sat.i13.f64 (double)
declare i16 @llvm.fptosi.sat.i16.f64 (double)
declare i19 @llvm.fptosi.sat.i19.f64 (double)
declare i32 @llvm.fptosi.sat.i32.f64 (double)
declare i50 @llvm.fptosi.sat.i50.f64 (double)
declare i64 @llvm.fptosi.sat.i64.f64 (double)
declare i100 @llvm.fptosi.sat.i100.f64(double)
declare i128 @llvm.fptosi.sat.i128.f64(double)
define i1 @test_signed_i1_f64(double %f) nounwind {
; X86-X87-LABEL: test_signed_i1_f64:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fld1
; X86-X87-NEXT: fchs
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movb $-1, %dl
; X86-X87-NEXT: jb .LBB10_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
; X86-X87-NEXT: .LBB10_2:
; X86-X87-NEXT: fldz
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: ja .LBB10_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %edx, %ebx
; X86-X87-NEXT: .LBB10_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB10_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %ebx, %ecx
; X86-X87-NEXT: .LBB10_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: popl %ebx
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i1_f64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: maxsd {{[0-9]+}}(%esp), %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: xorpd %xmm1, %xmm1
; X86-SSE-NEXT: minsd %xmm0, %xmm1
; X86-SSE-NEXT: cvttsd2si %xmm1, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i1_f64:
; X64: # %bb.0:
; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: maxsd %xmm0, %xmm1
; X64-NEXT: xorpd %xmm0, %xmm0
; X64-NEXT: minsd %xmm1, %xmm0
; X64-NEXT: cvttsd2si %xmm0, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%x = call i1 @llvm.fptosi.sat.i1.f64(double %f)
ret i1 %x
}
define i8 @test_signed_i8_f64(double %f) nounwind {
; X86-X87-LABEL: test_signed_i8_f64:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movb $-128, %dl
; X86-X87-NEXT: jb .LBB11_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
; X86-X87-NEXT: .LBB11_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movb $127, %cl
; X86-X87-NEXT: ja .LBB11_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB11_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jnp .LBB11_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: .LBB11_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i8_f64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: maxsd {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X86-SSE-NEXT: minsd %xmm0, %xmm1
; X86-SSE-NEXT: cvttsd2si %xmm1, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i8_f64:
; X64: # %bb.0:
; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: maxsd %xmm0, %xmm1
; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: minsd %xmm1, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: cvttsd2si %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%x = call i8 @llvm.fptosi.sat.i8.f64(double %f)
ret i8 %x
}
define i13 @test_signed_i13_f64(double %f) nounwind {
; X86-X87-LABEL: test_signed_i13_f64:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movw $-4096, %cx # imm = 0xF000
; X86-X87-NEXT: jb .LBB12_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB12_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $4095, %edx # imm = 0xFFF
; X86-X87-NEXT: ja .LBB12_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB12_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB12_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB12_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i13_f64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: maxsd {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X86-SSE-NEXT: minsd %xmm0, %xmm1
; X86-SSE-NEXT: cvttsd2si %xmm1, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i13_f64:
; X64: # %bb.0:
; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: maxsd %xmm0, %xmm1
; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: minsd %xmm1, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: cvttsd2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%x = call i13 @llvm.fptosi.sat.i13.f64(double %f)
ret i13 %x
}
define i16 @test_signed_i16_f64(double %f) nounwind {
; X86-X87-LABEL: test_signed_i16_f64:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movw $-32768, %cx # imm = 0x8000
; X86-X87-NEXT: jb .LBB13_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB13_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $32767, %edx # imm = 0x7FFF
; X86-X87-NEXT: ja .LBB13_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB13_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB13_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB13_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i16_f64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: maxsd {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X86-SSE-NEXT: minsd %xmm0, %xmm1
; X86-SSE-NEXT: cvttsd2si %xmm1, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i16_f64:
; X64: # %bb.0:
; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: maxsd %xmm0, %xmm1
; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: minsd %xmm1, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: cvttsd2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%x = call i16 @llvm.fptosi.sat.i16.f64(double %f)
ret i16 %x
}
define i19 @test_signed_i19_f64(double %f) nounwind {
; X86-X87-LABEL: test_signed_i19_f64:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw (%esp)
; X86-X87-NEXT: movzwl (%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fistl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw (%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-262144, %ecx # imm = 0xFFFC0000
; X86-X87-NEXT: jb .LBB14_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB14_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $262143, %edx # imm = 0x3FFFF
; X86-X87-NEXT: ja .LBB14_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB14_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB14_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB14_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i19_f64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: ucomisd %xmm0, %xmm0
; X86-SSE-NEXT: maxsd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: minsd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: cvttsd2si %xmm0, %ecx
; X86-SSE-NEXT: cmovnpl %ecx, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i19_f64:
; X64: # %bb.0:
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomisd %xmm0, %xmm0
; X64-NEXT: maxsd {{.*}}(%rip), %xmm0
; X64-NEXT: minsd {{.*}}(%rip), %xmm0
; X64-NEXT: cvttsd2si %xmm0, %ecx
; X64-NEXT: cmovnpl %ecx, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: retq
%x = call i19 @llvm.fptosi.sat.i19.f64(double %f)
ret i19 %x
}
define i32 @test_signed_i32_f64(double %f) nounwind {
; X86-X87-LABEL: test_signed_i32_f64:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw (%esp)
; X86-X87-NEXT: movzwl (%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fistl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw (%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X86-X87-NEXT: jb .LBB15_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB15_2:
; X86-X87-NEXT: fldl {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-X87-NEXT: ja .LBB15_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB15_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB15_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB15_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i32_f64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: ucomisd %xmm0, %xmm0
; X86-SSE-NEXT: maxsd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: minsd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: cvttsd2si %xmm0, %ecx
; X86-SSE-NEXT: cmovnpl %ecx, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i32_f64:
; X64: # %bb.0:
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomisd %xmm0, %xmm0
; X64-NEXT: maxsd {{.*}}(%rip), %xmm0
; X64-NEXT: minsd {{.*}}(%rip), %xmm0
; X64-NEXT: cvttsd2si %xmm0, %ecx
; X64-NEXT: cmovnpl %ecx, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: retq
%x = call i32 @llvm.fptosi.sat.i32.f64(double %f)
ret i32 %x
}
define i50 @test_signed_i50_f64(double %f) nounwind {
; X86-X87-LABEL: test_signed_i50_f64:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $20, %esp
; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fld %st(0)
; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB16_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB16_2:
; X86-X87-NEXT: movl $-131072, %edi # imm = 0xFFFE0000
; X86-X87-NEXT: jb .LBB16_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB16_4:
; X86-X87-NEXT: fldl {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $131071, %esi # imm = 0x1FFFF
; X86-X87-NEXT: ja .LBB16_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edi, %esi
; X86-X87-NEXT: .LBB16_6:
; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: ja .LBB16_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %edx, %edi
; X86-X87-NEXT: .LBB16_8:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jp .LBB16_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edi, %ecx
; X86-X87-NEXT: movl %esi, %edx
; X86-X87-NEXT: .LBB16_10:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $20, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i50_f64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $16, %esp
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: ucomisd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: cmovbl %ecx, %esi
; X86-SSE-NEXT: movl $-131072, %eax # imm = 0xFFFE0000
; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: ucomisd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl $131071, %edx # imm = 0x1FFFF
; X86-SSE-NEXT: cmovbel %eax, %edx
; X86-SSE-NEXT: movl $-1, %eax
; X86-SSE-NEXT: cmovbel %esi, %eax
; X86-SSE-NEXT: ucomisd %xmm0, %xmm0
; X86-SSE-NEXT: cmovpl %ecx, %eax
; X86-SSE-NEXT: cmovpl %ecx, %edx
; X86-SSE-NEXT: addl $16, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i50_f64:
; X64: # %bb.0:
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomisd %xmm0, %xmm0
; X64-NEXT: maxsd {{.*}}(%rip), %xmm0
; X64-NEXT: minsd {{.*}}(%rip), %xmm0
; X64-NEXT: cvttsd2si %xmm0, %rcx
; X64-NEXT: cmovnpq %rcx, %rax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: retq
%x = call i50 @llvm.fptosi.sat.i50.f64(double %f)
ret i50 %x
}
define i64 @test_signed_i64_f64(double %f) nounwind {
; X86-X87-LABEL: test_signed_i64_f64:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $20, %esp
; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fld %st(0)
; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB17_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB17_2:
; X86-X87-NEXT: movl $-2147483648, %edi # imm = 0x80000000
; X86-X87-NEXT: jb .LBB17_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB17_4:
; X86-X87-NEXT: fldl {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $2147483647, %esi # imm = 0x7FFFFFFF
; X86-X87-NEXT: ja .LBB17_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edi, %esi
; X86-X87-NEXT: .LBB17_6:
; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: ja .LBB17_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %edx, %edi
; X86-X87-NEXT: .LBB17_8:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jp .LBB17_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edi, %ecx
; X86-X87-NEXT: movl %esi, %edx
; X86-X87-NEXT: .LBB17_10:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $20, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i64_f64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $16, %esp
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: ucomisd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: cmovbl %ecx, %esi
; X86-SSE-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: ucomisd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-SSE-NEXT: cmovbel %eax, %edx
; X86-SSE-NEXT: movl $-1, %eax
; X86-SSE-NEXT: cmovbel %esi, %eax
; X86-SSE-NEXT: ucomisd %xmm0, %xmm0
; X86-SSE-NEXT: cmovpl %ecx, %eax
; X86-SSE-NEXT: cmovpl %ecx, %edx
; X86-SSE-NEXT: addl $16, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i64_f64:
; X64: # %bb.0:
; X64-NEXT: cvttsd2si %xmm0, %rax
; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0
; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
; X64-NEXT: cmovbeq %rax, %rcx
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomisd %xmm0, %xmm0
; X64-NEXT: cmovnpq %rcx, %rax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: retq
%x = call i64 @llvm.fptosi.sat.i64.f64(double %f)
ret i64 %x
}
define i100 @test_signed_i100_f64(double %f) nounwind {
; X86-X87-LABEL: test_signed_i100_f64:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %ebp
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $60, %esp
; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fstl {{[0-9]+}}(%esp)
; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fstl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
; X86-X87-NEXT: fucompp
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixdfti
; X86-X87-NEXT: subl $4, %esp
; X86-X87-NEXT: xorl %edx, %edx
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-8, %ebx
; X86-X87-NEXT: jb .LBB18_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-X87-NEXT: .LBB18_2:
; X86-X87-NEXT: movl $0, %ecx
; X86-X87-NEXT: movl $0, %ebp
; X86-X87-NEXT: jb .LBB18_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-X87-NEXT: .LBB18_4:
; X86-X87-NEXT: movl $0, %edi
; X86-X87-NEXT: jb .LBB18_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB18_6:
; X86-X87-NEXT: fldl {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB18_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %edi, %eax
; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl %ecx, %esi
; X86-X87-NEXT: .LBB18_8:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $7, %edi
; X86-X87-NEXT: ja .LBB18_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %ebx, %edi
; X86-X87-NEXT: .LBB18_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
; X86-X87-NEXT: movl $0, %ebp
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jp .LBB18_12
; X86-X87-NEXT: # %bb.11:
; X86-X87-NEXT: movl %edi, %edx
; X86-X87-NEXT: movl %esi, %eax
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-X87-NEXT: .LBB18_12:
; X86-X87-NEXT: movl %ebx, 8(%ecx)
; X86-X87-NEXT: movl %ebp, 4(%ecx)
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: andl $15, %edx
; X86-X87-NEXT: movb %dl, 12(%ecx)
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $60, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: popl %ebx
; X86-X87-NEXT: popl %ebp
; X86-X87-NEXT: retl $4
;
; X86-SSE-LABEL: test_signed_i100_f64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %ebp
; X86-SSE-NEXT: pushl %ebx
; X86-SSE-NEXT: pushl %edi
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $44, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __fixdfti
; X86-SSE-NEXT: subl $4, %esp
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: xorl %ebp, %ebp
; X86-SSE-NEXT: ucomisd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl $-8, %ebx
; X86-SSE-NEXT: movl $0, %ecx
; X86-SSE-NEXT: movl $0, %edx
; X86-SSE-NEXT: movl $0, %edi
; X86-SSE-NEXT: jb .LBB18_2
; X86-SSE-NEXT: # %bb.1:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE-NEXT: .LBB18_2:
; X86-SSE-NEXT: ucomisd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl $-1, %eax
; X86-SSE-NEXT: cmoval %eax, %edi
; X86-SSE-NEXT: cmoval %eax, %edx
; X86-SSE-NEXT: cmoval %eax, %ecx
; X86-SSE-NEXT: movl $7, %eax
; X86-SSE-NEXT: cmovbel %ebx, %eax
; X86-SSE-NEXT: ucomisd %xmm0, %xmm0
; X86-SSE-NEXT: cmovpl %ebp, %eax
; X86-SSE-NEXT: cmovpl %ebp, %ecx
; X86-SSE-NEXT: cmovpl %ebp, %edx
; X86-SSE-NEXT: cmovpl %ebp, %edi
; X86-SSE-NEXT: movl %edi, 8(%esi)
; X86-SSE-NEXT: movl %edx, 4(%esi)
; X86-SSE-NEXT: movl %ecx, (%esi)
; X86-SSE-NEXT: andl $15, %eax
; X86-SSE-NEXT: movb %al, 12(%esi)
; X86-SSE-NEXT: movl %esi, %eax
; X86-SSE-NEXT: addl $44, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: popl %edi
; X86-SSE-NEXT: popl %ebx
; X86-SSE-NEXT: popl %ebp
; X86-SSE-NEXT: retl $4
;
; X64-LABEL: test_signed_i100_f64:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
; X64-NEXT: callq __fixdfti@PLT
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; X64-NEXT: # xmm0 = mem[0],zero
; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0
; X64-NEXT: cmovbq %rcx, %rax
; X64-NEXT: movabsq $-34359738368, %rsi # imm = 0xFFFFFFF800000000
; X64-NEXT: cmovbq %rsi, %rdx
; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0
; X64-NEXT: movabsq $34359738367, %rsi # imm = 0x7FFFFFFFF
; X64-NEXT: cmovaq %rsi, %rdx
; X64-NEXT: movq $-1, %rsi
; X64-NEXT: cmovaq %rsi, %rax
; X64-NEXT: ucomisd %xmm0, %xmm0
; X64-NEXT: cmovpq %rcx, %rax
; X64-NEXT: cmovpq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i100 @llvm.fptosi.sat.i100.f64(double %f)
ret i100 %x
}
define i128 @test_signed_i128_f64(double %f) nounwind {
; X86-X87-LABEL: test_signed_i128_f64:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %ebp
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $60, %esp
; X86-X87-NEXT: fldl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fstl {{[0-9]+}}(%esp)
; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fstl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
; X86-X87-NEXT: fucompp
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixdfti
; X86-X87-NEXT: subl $4, %esp
; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jae .LBB19_1
; X86-X87-NEXT: # %bb.2:
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jae .LBB19_3
; X86-X87-NEXT: .LBB19_4:
; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X86-X87-NEXT: jb .LBB19_6
; X86-X87-NEXT: .LBB19_5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB19_6:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: fldl {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
; X86-X87-NEXT: ja .LBB19_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: .LBB19_8:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $-1, %ebp
; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB19_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edx, %ebp
; X86-X87-NEXT: movl %ebx, %edi
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-X87-NEXT: .LBB19_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jp .LBB19_12
; X86-X87-NEXT: # %bb.11:
; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl %edi, %eax
; X86-X87-NEXT: movl %ebp, %edx
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-X87-NEXT: .LBB19_12:
; X86-X87-NEXT: movl %ebx, 12(%ecx)
; X86-X87-NEXT: movl %edx, 8(%ecx)
; X86-X87-NEXT: movl %eax, 4(%ecx)
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $60, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: popl %ebx
; X86-X87-NEXT: popl %ebp
; X86-X87-NEXT: retl $4
; X86-X87-NEXT: .LBB19_1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB19_4
; X86-X87-NEXT: .LBB19_3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X86-X87-NEXT: jae .LBB19_5
; X86-X87-NEXT: jmp .LBB19_6
;
; X86-SSE-LABEL: test_signed_i128_f64:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %ebp
; X86-SSE-NEXT: pushl %ebx
; X86-SSE-NEXT: pushl %edi
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $44, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __fixdfti
; X86-SSE-NEXT: subl $4, %esp
; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: ucomisd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cmovbl %ecx, %eax
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE-NEXT: cmovbl %ecx, %edx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE-NEXT: cmovbl %ecx, %edi
; X86-SSE-NEXT: movl $-2147483648, %ebx # imm = 0x80000000
; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %ebx
; X86-SSE-NEXT: ucomisd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl $2147483647, %ebp # imm = 0x7FFFFFFF
; X86-SSE-NEXT: cmovbel %ebx, %ebp
; X86-SSE-NEXT: movl $-1, %ebx
; X86-SSE-NEXT: cmoval %ebx, %edi
; X86-SSE-NEXT: cmoval %ebx, %edx
; X86-SSE-NEXT: cmoval %ebx, %eax
; X86-SSE-NEXT: ucomisd %xmm0, %xmm0
; X86-SSE-NEXT: cmovpl %ecx, %eax
; X86-SSE-NEXT: cmovpl %ecx, %edx
; X86-SSE-NEXT: cmovpl %ecx, %edi
; X86-SSE-NEXT: cmovpl %ecx, %ebp
; X86-SSE-NEXT: movl %ebp, 12(%esi)
; X86-SSE-NEXT: movl %edi, 8(%esi)
; X86-SSE-NEXT: movl %edx, 4(%esi)
; X86-SSE-NEXT: movl %eax, (%esi)
; X86-SSE-NEXT: movl %esi, %eax
; X86-SSE-NEXT: addl $44, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: popl %edi
; X86-SSE-NEXT: popl %ebx
; X86-SSE-NEXT: popl %ebp
; X86-SSE-NEXT: retl $4
;
; X64-LABEL: test_signed_i128_f64:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
; X64-NEXT: callq __fixdfti@PLT
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
; X64-NEXT: # xmm0 = mem[0],zero
; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0
; X64-NEXT: cmovbq %rcx, %rax
; X64-NEXT: movabsq $-9223372036854775808, %rsi # imm = 0x8000000000000000
; X64-NEXT: cmovbq %rsi, %rdx
; X64-NEXT: ucomisd {{.*}}(%rip), %xmm0
; X64-NEXT: movabsq $9223372036854775807, %rsi # imm = 0x7FFFFFFFFFFFFFFF
; X64-NEXT: cmovaq %rsi, %rdx
; X64-NEXT: movq $-1, %rsi
; X64-NEXT: cmovaq %rsi, %rax
; X64-NEXT: ucomisd %xmm0, %xmm0
; X64-NEXT: cmovpq %rcx, %rax
; X64-NEXT: cmovpq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i128 @llvm.fptosi.sat.i128.f64(double %f)
ret i128 %x
}
;
; 16-bit float to signed integer
;
declare i1 @llvm.fptosi.sat.i1.f16 (half)
declare i8 @llvm.fptosi.sat.i8.f16 (half)
declare i13 @llvm.fptosi.sat.i13.f16 (half)
declare i16 @llvm.fptosi.sat.i16.f16 (half)
declare i19 @llvm.fptosi.sat.i19.f16 (half)
declare i32 @llvm.fptosi.sat.i32.f16 (half)
declare i50 @llvm.fptosi.sat.i50.f16 (half)
declare i64 @llvm.fptosi.sat.i64.f16 (half)
declare i100 @llvm.fptosi.sat.i100.f16(half)
declare i128 @llvm.fptosi.sat.i128.f16(half)
define i1 @test_signed_i1_f16(half %f) nounwind {
; X86-X87-LABEL: test_signed_i1_f16:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: subl $24, %esp
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: calll __gnu_h2f_ieee
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fld1
; X86-X87-NEXT: fchs
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movb $-1, %dl
; X86-X87-NEXT: jb .LBB20_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
; X86-X87-NEXT: .LBB20_2:
; X86-X87-NEXT: fldz
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: ja .LBB20_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %edx, %ebx
; X86-X87-NEXT: .LBB20_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB20_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %ebx, %ecx
; X86-X87-NEXT: .LBB20_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $24, %esp
; X86-X87-NEXT: popl %ebx
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i1_f16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __gnu_h2f_ieee
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: xorps %xmm1, %xmm1
; X86-SSE-NEXT: minss %xmm0, %xmm1
; X86-SSE-NEXT: cvttss2si %xmm1, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i1_f16:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movzwl %di, %edi
; X64-NEXT: callq __gnu_h2f_ieee@PLT
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: maxss %xmm0, %xmm1
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: minss %xmm1, %xmm0
; X64-NEXT: cvttss2si %xmm0, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i1 @llvm.fptosi.sat.i1.f16(half %f)
ret i1 %x
}
define i8 @test_signed_i8_f16(half %f) nounwind {
; X86-X87-LABEL: test_signed_i8_f16:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $12, %esp
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: calll __gnu_h2f_ieee
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movb $-128, %dl
; X86-X87-NEXT: jb .LBB21_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
; X86-X87-NEXT: .LBB21_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movb $127, %cl
; X86-X87-NEXT: ja .LBB21_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB21_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jnp .LBB21_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: .LBB21_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $12, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i8_f16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __gnu_h2f_ieee
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-SSE-NEXT: minss %xmm0, %xmm1
; X86-SSE-NEXT: cvttss2si %xmm1, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i8_f16:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movzwl %di, %edi
; X64-NEXT: callq __gnu_h2f_ieee@PLT
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: maxss %xmm0, %xmm1
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: minss %xmm1, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i8 @llvm.fptosi.sat.i8.f16(half %f)
ret i8 %x
}
define i13 @test_signed_i13_f16(half %f) nounwind {
; X86-X87-LABEL: test_signed_i13_f16:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $12, %esp
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: calll __gnu_h2f_ieee
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movw $-4096, %cx # imm = 0xF000
; X86-X87-NEXT: jb .LBB22_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB22_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $4095, %edx # imm = 0xFFF
; X86-X87-NEXT: ja .LBB22_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB22_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB22_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB22_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $12, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i13_f16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __gnu_h2f_ieee
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-SSE-NEXT: minss %xmm0, %xmm1
; X86-SSE-NEXT: cvttss2si %xmm1, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i13_f16:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movzwl %di, %edi
; X64-NEXT: callq __gnu_h2f_ieee@PLT
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: maxss %xmm0, %xmm1
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: minss %xmm1, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i13 @llvm.fptosi.sat.i13.f16(half %f)
ret i13 %x
}
define i16 @test_signed_i16_f16(half %f) nounwind {
; X86-X87-LABEL: test_signed_i16_f16:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $12, %esp
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: calll __gnu_h2f_ieee
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movw $-32768, %cx # imm = 0x8000
; X86-X87-NEXT: jb .LBB23_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB23_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $32767, %edx # imm = 0x7FFF
; X86-X87-NEXT: ja .LBB23_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB23_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB23_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB23_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $12, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i16_f16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __gnu_h2f_ieee
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: maxss {{[0-9]+}}(%esp), %xmm0
; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-SSE-NEXT: minss %xmm0, %xmm1
; X86-SSE-NEXT: cvttss2si %xmm1, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i16_f16:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movzwl %di, %edi
; X64-NEXT: callq __gnu_h2f_ieee@PLT
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: maxss %xmm0, %xmm1
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: minss %xmm1, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i16 @llvm.fptosi.sat.i16.f16(half %f)
ret i16 %x
}
define i19 @test_signed_i19_f16(half %f) nounwind {
; X86-X87-LABEL: test_signed_i19_f16:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $12, %esp
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: calll __gnu_h2f_ieee
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fistl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-262144, %ecx # imm = 0xFFFC0000
; X86-X87-NEXT: jb .LBB24_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB24_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $262143, %edx # imm = 0x3FFFF
; X86-X87-NEXT: ja .LBB24_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB24_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB24_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB24_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $12, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i19_f16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __gnu_h2f_ieee
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
; X86-SSE-NEXT: maxss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: minss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: cvttss2si %xmm0, %ecx
; X86-SSE-NEXT: cmovnpl %ecx, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i19_f16:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movzwl %di, %edi
; X64-NEXT: callq __gnu_h2f_ieee@PLT
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomiss %xmm0, %xmm0
; X64-NEXT: maxss {{.*}}(%rip), %xmm0
; X64-NEXT: minss {{.*}}(%rip), %xmm0
; X64-NEXT: cvttss2si %xmm0, %ecx
; X64-NEXT: cmovnpl %ecx, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i19 @llvm.fptosi.sat.i19.f16(half %f)
ret i19 %x
}
define i32 @test_signed_i32_f16(half %f) nounwind {
; X86-X87-LABEL: test_signed_i32_f16:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $12, %esp
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: calll __gnu_h2f_ieee
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fistl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X86-X87-NEXT: jb .LBB25_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB25_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-X87-NEXT: ja .LBB25_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB25_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB25_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB25_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $12, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i32_f16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: subl $12, %esp
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __gnu_h2f_ieee
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: cvttss2si %xmm0, %eax
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
; X86-SSE-NEXT: cmovbel %eax, %ecx
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
; X86-SSE-NEXT: cmovnpl %ecx, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i32_f16:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movzwl %di, %edi
; X64-NEXT: callq __gnu_h2f_ieee@PLT
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
; X64-NEXT: cmovbel %eax, %ecx
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomiss %xmm0, %xmm0
; X64-NEXT: cmovnpl %ecx, %eax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i32 @llvm.fptosi.sat.i32.f16(half %f)
ret i32 %x
}
define i50 @test_signed_i50_f16(half %f) nounwind {
; X86-X87-LABEL: test_signed_i50_f16:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $20, %esp
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: calll __gnu_h2f_ieee
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fld %st(0)
; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB26_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB26_2:
; X86-X87-NEXT: movl $-131072, %edi # imm = 0xFFFE0000
; X86-X87-NEXT: jb .LBB26_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB26_4:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $131071, %esi # imm = 0x1FFFF
; X86-X87-NEXT: ja .LBB26_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edi, %esi
; X86-X87-NEXT: .LBB26_6:
; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: ja .LBB26_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %edx, %edi
; X86-X87-NEXT: .LBB26_8:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jp .LBB26_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edi, %ecx
; X86-X87-NEXT: movl %esi, %edx
; X86-X87-NEXT: .LBB26_10:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $20, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i50_f16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $24, %esp
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __gnu_h2f_ieee
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: flds {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: cmovbl %ecx, %esi
; X86-SSE-NEXT: movl $-131072, %eax # imm = 0xFFFE0000
; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl $131071, %edx # imm = 0x1FFFF
; X86-SSE-NEXT: cmovbel %eax, %edx
; X86-SSE-NEXT: movl $-1, %eax
; X86-SSE-NEXT: cmovbel %esi, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
; X86-SSE-NEXT: cmovpl %ecx, %eax
; X86-SSE-NEXT: cmovpl %ecx, %edx
; X86-SSE-NEXT: addl $24, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i50_f16:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movzwl %di, %edi
; X64-NEXT: callq __gnu_h2f_ieee@PLT
; X64-NEXT: cvttss2si %xmm0, %rax
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: movabsq $-562949953421312, %rcx # imm = 0xFFFE000000000000
; X64-NEXT: cmovaeq %rax, %rcx
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: movabsq $562949953421311, %rdx # imm = 0x1FFFFFFFFFFFF
; X64-NEXT: cmovbeq %rcx, %rdx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomiss %xmm0, %xmm0
; X64-NEXT: cmovnpq %rdx, %rax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i50 @llvm.fptosi.sat.i50.f16(half %f)
ret i50 %x
}
define i64 @test_signed_i64_f16(half %f) nounwind {
; X86-X87-LABEL: test_signed_i64_f16:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $20, %esp
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: calll __gnu_h2f_ieee
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fld %st(0)
; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB27_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB27_2:
; X86-X87-NEXT: movl $-2147483648, %edi # imm = 0x80000000
; X86-X87-NEXT: jb .LBB27_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB27_4:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $2147483647, %esi # imm = 0x7FFFFFFF
; X86-X87-NEXT: ja .LBB27_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edi, %esi
; X86-X87-NEXT: .LBB27_6:
; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: ja .LBB27_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %edx, %edi
; X86-X87-NEXT: .LBB27_8:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jp .LBB27_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edi, %ecx
; X86-X87-NEXT: movl %esi, %edx
; X86-X87-NEXT: .LBB27_10:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $20, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i64_f16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $24, %esp
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __gnu_h2f_ieee
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: flds {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: cmovbl %ecx, %esi
; X86-SSE-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-SSE-NEXT: cmovbel %eax, %edx
; X86-SSE-NEXT: movl $-1, %eax
; X86-SSE-NEXT: cmovbel %esi, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
; X86-SSE-NEXT: cmovpl %ecx, %eax
; X86-SSE-NEXT: cmovpl %ecx, %edx
; X86-SSE-NEXT: addl $24, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i64_f16:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movzwl %di, %edi
; X64-NEXT: callq __gnu_h2f_ieee@PLT
; X64-NEXT: cvttss2si %xmm0, %rax
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
; X64-NEXT: cmovbeq %rax, %rcx
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomiss %xmm0, %xmm0
; X64-NEXT: cmovnpq %rcx, %rax
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i64 @llvm.fptosi.sat.i64.f16(half %f)
ret i64 %x
}
define i100 @test_signed_i100_f16(half %f) nounwind {
; X86-X87-LABEL: test_signed_i100_f16:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %ebp
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $44, %esp
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: calll __gnu_h2f_ieee
; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: fsts {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fsts {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: fucompp
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixsfti
; X86-X87-NEXT: subl $4, %esp
; X86-X87-NEXT: xorl %edx, %edx
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-8, %ebx
; X86-X87-NEXT: jb .LBB28_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-X87-NEXT: .LBB28_2:
; X86-X87-NEXT: movl $0, %ecx
; X86-X87-NEXT: movl $0, %ebp
; X86-X87-NEXT: jb .LBB28_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-X87-NEXT: .LBB28_4:
; X86-X87-NEXT: movl $0, %edi
; X86-X87-NEXT: jb .LBB28_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB28_6:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB28_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %edi, %eax
; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl %ecx, %esi
; X86-X87-NEXT: .LBB28_8:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $7, %edi
; X86-X87-NEXT: ja .LBB28_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %ebx, %edi
; X86-X87-NEXT: .LBB28_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
; X86-X87-NEXT: movl $0, %ebp
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jp .LBB28_12
; X86-X87-NEXT: # %bb.11:
; X86-X87-NEXT: movl %edi, %edx
; X86-X87-NEXT: movl %esi, %eax
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-X87-NEXT: .LBB28_12:
; X86-X87-NEXT: movl %ebx, 8(%ecx)
; X86-X87-NEXT: movl %ebp, 4(%ecx)
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: andl $15, %edx
; X86-X87-NEXT: movb %dl, 12(%ecx)
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $44, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: popl %ebx
; X86-X87-NEXT: popl %ebp
; X86-X87-NEXT: retl $4
;
; X86-SSE-LABEL: test_signed_i100_f16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %ebp
; X86-SSE-NEXT: pushl %ebx
; X86-SSE-NEXT: pushl %edi
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $44, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __gnu_h2f_ieee
; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: calll __fixsfti
; X86-SSE-NEXT: subl $4, %esp
; X86-SSE-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: xorl %ebp, %ebp
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl $-8, %ebx
; X86-SSE-NEXT: movl $0, %ecx
; X86-SSE-NEXT: movl $0, %edx
; X86-SSE-NEXT: movl $0, %edi
; X86-SSE-NEXT: jb .LBB28_2
; X86-SSE-NEXT: # %bb.1:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE-NEXT: .LBB28_2:
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl $-1, %eax
; X86-SSE-NEXT: cmoval %eax, %edi
; X86-SSE-NEXT: cmoval %eax, %edx
; X86-SSE-NEXT: cmoval %eax, %ecx
; X86-SSE-NEXT: movl $7, %eax
; X86-SSE-NEXT: cmovbel %ebx, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
; X86-SSE-NEXT: cmovpl %ebp, %eax
; X86-SSE-NEXT: cmovpl %ebp, %ecx
; X86-SSE-NEXT: cmovpl %ebp, %edx
; X86-SSE-NEXT: cmovpl %ebp, %edi
; X86-SSE-NEXT: movl %edi, 8(%esi)
; X86-SSE-NEXT: movl %edx, 4(%esi)
; X86-SSE-NEXT: movl %ecx, (%esi)
; X86-SSE-NEXT: andl $15, %eax
; X86-SSE-NEXT: movb %al, 12(%esi)
; X86-SSE-NEXT: movl %esi, %eax
; X86-SSE-NEXT: addl $44, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: popl %edi
; X86-SSE-NEXT: popl %ebx
; X86-SSE-NEXT: popl %ebp
; X86-SSE-NEXT: retl $4
;
; X64-LABEL: test_signed_i100_f16:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movzwl %di, %edi
; X64-NEXT: callq __gnu_h2f_ieee@PLT
; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: callq __fixsfti@PLT
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: cmovbq %rcx, %rax
; X64-NEXT: movabsq $-34359738368, %rsi # imm = 0xFFFFFFF800000000
; X64-NEXT: cmovbq %rsi, %rdx
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: movabsq $34359738367, %rsi # imm = 0x7FFFFFFFF
; X64-NEXT: cmovaq %rsi, %rdx
; X64-NEXT: movq $-1, %rsi
; X64-NEXT: cmovaq %rsi, %rax
; X64-NEXT: ucomiss %xmm0, %xmm0
; X64-NEXT: cmovpq %rcx, %rax
; X64-NEXT: cmovpq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i100 @llvm.fptosi.sat.i100.f16(half %f)
ret i100 %x
}
define i128 @test_signed_i128_f16(half %f) nounwind {
; X86-X87-LABEL: test_signed_i128_f16:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %ebp
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $44, %esp
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: calll __gnu_h2f_ieee
; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: fsts {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fsts {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: fucompp
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixsfti
; X86-X87-NEXT: subl $4, %esp
; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jae .LBB29_1
; X86-X87-NEXT: # %bb.2:
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jae .LBB29_3
; X86-X87-NEXT: .LBB29_4:
; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X86-X87-NEXT: jb .LBB29_6
; X86-X87-NEXT: .LBB29_5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB29_6:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
; X86-X87-NEXT: ja .LBB29_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: .LBB29_8:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $-1, %ebp
; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB29_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edx, %ebp
; X86-X87-NEXT: movl %ebx, %edi
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-X87-NEXT: .LBB29_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jp .LBB29_12
; X86-X87-NEXT: # %bb.11:
; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl %edi, %eax
; X86-X87-NEXT: movl %ebp, %edx
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-X87-NEXT: .LBB29_12:
; X86-X87-NEXT: movl %ebx, 12(%ecx)
; X86-X87-NEXT: movl %edx, 8(%ecx)
; X86-X87-NEXT: movl %eax, 4(%ecx)
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $44, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: popl %ebx
; X86-X87-NEXT: popl %ebp
; X86-X87-NEXT: retl $4
; X86-X87-NEXT: .LBB29_1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB29_4
; X86-X87-NEXT: .LBB29_3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X86-X87-NEXT: jae .LBB29_5
; X86-X87-NEXT: jmp .LBB29_6
;
; X86-SSE-LABEL: test_signed_i128_f16:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %ebp
; X86-SSE-NEXT: pushl %ebx
; X86-SSE-NEXT: pushl %edi
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $44, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __gnu_h2f_ieee
; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SSE-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: calll __fixsfti
; X86-SSE-NEXT: subl $4, %esp
; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cmovbl %ecx, %eax
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE-NEXT: cmovbl %ecx, %edx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE-NEXT: cmovbl %ecx, %edi
; X86-SSE-NEXT: movl $-2147483648, %ebx # imm = 0x80000000
; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %ebx
; X86-SSE-NEXT: ucomiss {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: movl $2147483647, %ebp # imm = 0x7FFFFFFF
; X86-SSE-NEXT: cmovbel %ebx, %ebp
; X86-SSE-NEXT: movl $-1, %ebx
; X86-SSE-NEXT: cmoval %ebx, %edi
; X86-SSE-NEXT: cmoval %ebx, %edx
; X86-SSE-NEXT: cmoval %ebx, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
; X86-SSE-NEXT: cmovpl %ecx, %eax
; X86-SSE-NEXT: cmovpl %ecx, %edx
; X86-SSE-NEXT: cmovpl %ecx, %edi
; X86-SSE-NEXT: cmovpl %ecx, %ebp
; X86-SSE-NEXT: movl %ebp, 12(%esi)
; X86-SSE-NEXT: movl %edi, 8(%esi)
; X86-SSE-NEXT: movl %edx, 4(%esi)
; X86-SSE-NEXT: movl %eax, (%esi)
; X86-SSE-NEXT: movl %esi, %eax
; X86-SSE-NEXT: addl $44, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: popl %edi
; X86-SSE-NEXT: popl %ebx
; X86-SSE-NEXT: popl %ebp
; X86-SSE-NEXT: retl $4
;
; X64-LABEL: test_signed_i128_f16:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: movzwl %di, %edi
; X64-NEXT: callq __gnu_h2f_ieee@PLT
; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: callq __fixsfti@PLT
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: cmovbq %rcx, %rax
; X64-NEXT: movabsq $-9223372036854775808, %rsi # imm = 0x8000000000000000
; X64-NEXT: cmovbq %rsi, %rdx
; X64-NEXT: ucomiss {{.*}}(%rip), %xmm0
; X64-NEXT: movabsq $9223372036854775807, %rsi # imm = 0x7FFFFFFFFFFFFFFF
; X64-NEXT: cmovaq %rsi, %rdx
; X64-NEXT: movq $-1, %rsi
; X64-NEXT: cmovaq %rsi, %rax
; X64-NEXT: ucomiss %xmm0, %xmm0
; X64-NEXT: cmovpq %rcx, %rax
; X64-NEXT: cmovpq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i128 @llvm.fptosi.sat.i128.f16(half %f)
ret i128 %x
}
;
; 80-bit float to signed integer
;
declare i1 @llvm.fptosi.sat.i1.f80 (x86_fp80)
declare i8 @llvm.fptosi.sat.i8.f80 (x86_fp80)
declare i13 @llvm.fptosi.sat.i13.f80 (x86_fp80)
declare i16 @llvm.fptosi.sat.i16.f80 (x86_fp80)
declare i19 @llvm.fptosi.sat.i19.f80 (x86_fp80)
declare i32 @llvm.fptosi.sat.i32.f80 (x86_fp80)
declare i50 @llvm.fptosi.sat.i50.f80 (x86_fp80)
declare i64 @llvm.fptosi.sat.i64.f80 (x86_fp80)
declare i100 @llvm.fptosi.sat.i100.f80(x86_fp80)
declare i128 @llvm.fptosi.sat.i128.f80(x86_fp80)
define i1 @test_signed_i1_f80(x86_fp80 %f) nounwind {
; X86-X87-LABEL: test_signed_i1_f80:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: fldt {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fld1
; X86-X87-NEXT: fchs
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movb $-1, %dl
; X86-X87-NEXT: jb .LBB30_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
; X86-X87-NEXT: .LBB30_2:
; X86-X87-NEXT: fldz
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: ja .LBB30_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %edx, %ebx
; X86-X87-NEXT: .LBB30_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB30_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %ebx, %ecx
; X86-X87-NEXT: .LBB30_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: popl %ebx
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i1_f80:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: subl $8, %esp
; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fists {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-SSE-NEXT: fld1
; X86-SSE-NEXT: fchs
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl $255, %eax
; X86-SSE-NEXT: cmovael %ecx, %eax
; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: fldz
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: cmoval %ecx, %eax
; X86-SSE-NEXT: fucompi %st(0), %st
; X86-SSE-NEXT: cmovpl %ecx, %eax
; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
; X86-SSE-NEXT: addl $8, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i1_f80:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: fists -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; X64-NEXT: fld1
; X64-NEXT: fchs
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: movl $255, %eax
; X64-NEXT: cmovael %ecx, %eax
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: fldz
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: cmoval %ecx, %eax
; X64-NEXT: fucompi %st(0), %st
; X64-NEXT: cmovpl %ecx, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%x = call i1 @llvm.fptosi.sat.i1.f80(x86_fp80 %f)
ret i1 %x
}
define i8 @test_signed_i8_f80(x86_fp80 %f) nounwind {
; X86-X87-LABEL: test_signed_i8_f80:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: fldt {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movb $-128, %dl
; X86-X87-NEXT: jb .LBB31_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl
; X86-X87-NEXT: .LBB31_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movb $127, %cl
; X86-X87-NEXT: ja .LBB31_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB31_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jnp .LBB31_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: .LBB31_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i8_f80:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: subl $8, %esp
; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fists {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl $128, %ecx
; X86-SSE-NEXT: cmovael %eax, %ecx
; X86-SSE-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl $127, %edx
; X86-SSE-NEXT: cmovbel %ecx, %edx
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: fucompi %st(0), %st
; X86-SSE-NEXT: cmovnpl %edx, %eax
; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
; X86-SSE-NEXT: addl $8, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i8_f80:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: fists -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: flds {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: movl $128, %ecx
; X64-NEXT: cmovael %eax, %ecx
; X64-NEXT: flds {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: movl $127, %edx
; X64-NEXT: cmovbel %ecx, %edx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: fucompi %st(0), %st
; X64-NEXT: cmovnpl %edx, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%x = call i8 @llvm.fptosi.sat.i8.f80(x86_fp80 %f)
ret i8 %x
}
define i13 @test_signed_i13_f80(x86_fp80 %f) nounwind {
; X86-X87-LABEL: test_signed_i13_f80:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: fldt {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movw $-4096, %cx # imm = 0xF000
; X86-X87-NEXT: jb .LBB32_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB32_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $4095, %edx # imm = 0xFFF
; X86-X87-NEXT: ja .LBB32_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB32_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB32_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB32_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i13_f80:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: subl $8, %esp
; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fists {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movw $-4096, %ax # imm = 0xF000
; X86-SSE-NEXT: jb .LBB32_2
; X86-SSE-NEXT: # %bb.1:
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: .LBB32_2:
; X86-SSE-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl $4095, %ecx # imm = 0xFFF
; X86-SSE-NEXT: cmovbel %eax, %ecx
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: fucompi %st(0), %st
; X86-SSE-NEXT: cmovnpl %ecx, %eax
; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
; X86-SSE-NEXT: addl $8, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i13_f80:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: fists -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: flds {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: movw $-4096, %ax # imm = 0xF000
; X64-NEXT: jb .LBB32_2
; X64-NEXT: # %bb.1:
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: .LBB32_2:
; X64-NEXT: flds {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: movl $4095, %ecx # imm = 0xFFF
; X64-NEXT: cmovbel %eax, %ecx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: fucompi %st(0), %st
; X64-NEXT: cmovnpl %ecx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%x = call i13 @llvm.fptosi.sat.i13.f80(x86_fp80 %f)
ret i13 %x
}
define i16 @test_signed_i16_f80(x86_fp80 %f) nounwind {
; X86-X87-LABEL: test_signed_i16_f80:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: fldt {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fists {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movw $-32768, %cx # imm = 0x8000
; X86-X87-NEXT: jb .LBB33_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB33_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $32767, %edx # imm = 0x7FFF
; X86-X87-NEXT: ja .LBB33_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB33_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB33_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB33_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i16_f80:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: subl $8, %esp
; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fists {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movw $-32768, %ax # imm = 0x8000
; X86-SSE-NEXT: jb .LBB33_2
; X86-SSE-NEXT: # %bb.1:
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: .LBB33_2:
; X86-SSE-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl $32767, %ecx # imm = 0x7FFF
; X86-SSE-NEXT: cmovbel %eax, %ecx
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: fucompi %st(0), %st
; X86-SSE-NEXT: cmovnpl %ecx, %eax
; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
; X86-SSE-NEXT: addl $8, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i16_f80:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: fists -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: flds {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: movw $-32768, %ax # imm = 0x8000
; X64-NEXT: jb .LBB33_2
; X64-NEXT: # %bb.1:
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: .LBB33_2:
; X64-NEXT: flds {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: movl $32767, %ecx # imm = 0x7FFF
; X64-NEXT: cmovbel %eax, %ecx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: fucompi %st(0), %st
; X64-NEXT: cmovnpl %ecx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%x = call i16 @llvm.fptosi.sat.i16.f80(x86_fp80 %f)
ret i16 %x
}
define i19 @test_signed_i19_f80(x86_fp80 %f) nounwind {
; X86-X87-LABEL: test_signed_i19_f80:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: fldt {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw (%esp)
; X86-X87-NEXT: movzwl (%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fistl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw (%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-262144, %ecx # imm = 0xFFFC0000
; X86-X87-NEXT: jb .LBB34_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB34_2:
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $262143, %edx # imm = 0x3FFFF
; X86-X87-NEXT: ja .LBB34_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB34_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB34_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB34_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i19_f80:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: subl $8, %esp
; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw (%esp)
; X86-SSE-NEXT: movzwl (%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fistl {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw (%esp)
; X86-SSE-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl $-262144, %eax # imm = 0xFFFC0000
; X86-SSE-NEXT: jb .LBB34_2
; X86-SSE-NEXT: # %bb.1:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: .LBB34_2:
; X86-SSE-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl $262143, %ecx # imm = 0x3FFFF
; X86-SSE-NEXT: cmovbel %eax, %ecx
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: fucompi %st(0), %st
; X86-SSE-NEXT: cmovnpl %ecx, %eax
; X86-SSE-NEXT: addl $8, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i19_f80:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: fistl -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: flds {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: movl $-262144, %eax # imm = 0xFFFC0000
; X64-NEXT: jb .LBB34_2
; X64-NEXT: # %bb.1:
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: .LBB34_2:
; X64-NEXT: flds {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: movl $262143, %ecx # imm = 0x3FFFF
; X64-NEXT: cmovbel %eax, %ecx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: fucompi %st(0), %st
; X64-NEXT: cmovnpl %ecx, %eax
; X64-NEXT: retq
%x = call i19 @llvm.fptosi.sat.i19.f80(x86_fp80 %f)
ret i19 %x
}
define i32 @test_signed_i32_f80(x86_fp80 %f) nounwind {
; X86-X87-LABEL: test_signed_i32_f80:
; X86-X87: # %bb.0:
; X86-X87-NEXT: subl $8, %esp
; X86-X87-NEXT: fldt {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw (%esp)
; X86-X87-NEXT: movzwl (%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fistl {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw (%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X86-X87-NEXT: jb .LBB35_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB35_2:
; X86-X87-NEXT: fldl {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-X87-NEXT: ja .LBB35_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl %ecx, %edx
; X86-X87-NEXT: .LBB35_4:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: jp .LBB35_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edx, %ecx
; X86-X87-NEXT: .LBB35_6:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $8, %esp
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i32_f80:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: subl $8, %esp
; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw (%esp)
; X86-SSE-NEXT: movzwl (%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fistl {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw (%esp)
; X86-SSE-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X86-SSE-NEXT: jb .LBB35_2
; X86-SSE-NEXT: # %bb.1:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: .LBB35_2:
; X86-SSE-NEXT: fldl {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
; X86-SSE-NEXT: cmovbel %eax, %ecx
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: fucompi %st(0), %st
; X86-SSE-NEXT: cmovnpl %ecx, %eax
; X86-SSE-NEXT: addl $8, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i32_f80:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: fistl -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: flds {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X64-NEXT: jb .LBB35_2
; X64-NEXT: # %bb.1:
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: .LBB35_2:
; X64-NEXT: fldl {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
; X64-NEXT: cmovbel %eax, %ecx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: fucompi %st(0), %st
; X64-NEXT: cmovnpl %ecx, %eax
; X64-NEXT: retq
%x = call i32 @llvm.fptosi.sat.i32.f80(x86_fp80 %f)
ret i32 %x
}
define i50 @test_signed_i50_f80(x86_fp80 %f) nounwind {
; X86-X87-LABEL: test_signed_i50_f80:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $20, %esp
; X86-X87-NEXT: fldt {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fld %st(0)
; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB36_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB36_2:
; X86-X87-NEXT: movl $-131072, %edi # imm = 0xFFFE0000
; X86-X87-NEXT: jb .LBB36_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB36_4:
; X86-X87-NEXT: fldl {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $131071, %esi # imm = 0x1FFFF
; X86-X87-NEXT: ja .LBB36_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edi, %esi
; X86-X87-NEXT: .LBB36_6:
; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: ja .LBB36_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %edx, %edi
; X86-X87-NEXT: .LBB36_8:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jp .LBB36_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edi, %ecx
; X86-X87-NEXT: movl %esi, %edx
; X86-X87-NEXT: .LBB36_10:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $20, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i50_f80:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $16, %esp
; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fld %st(0)
; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: cmovbl %ecx, %esi
; X86-SSE-NEXT: movl $-131072, %eax # imm = 0xFFFE0000
; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: fldl {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl $131071, %edx # imm = 0x1FFFF
; X86-SSE-NEXT: cmovbel %eax, %edx
; X86-SSE-NEXT: movl $-1, %eax
; X86-SSE-NEXT: cmovbel %esi, %eax
; X86-SSE-NEXT: fucompi %st(0), %st
; X86-SSE-NEXT: cmovpl %ecx, %eax
; X86-SSE-NEXT: cmovpl %ecx, %edx
; X86-SSE-NEXT: addl $16, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i50_f80:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: fld %st(0)
; X64-NEXT: fistpll -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: flds {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: jb .LBB36_1
; X64-NEXT: # %bb.2:
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; X64-NEXT: jmp .LBB36_3
; X64-NEXT: .LBB36_1:
; X64-NEXT: movabsq $-562949953421312, %rax # imm = 0xFFFE000000000000
; X64-NEXT: .LBB36_3:
; X64-NEXT: fldl {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: movabsq $562949953421311, %rcx # imm = 0x1FFFFFFFFFFFF
; X64-NEXT: cmovbeq %rax, %rcx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: fucompi %st(0), %st
; X64-NEXT: cmovnpq %rcx, %rax
; X64-NEXT: retq
%x = call i50 @llvm.fptosi.sat.i50.f80(x86_fp80 %f)
ret i50 %x
}
define i64 @test_signed_i64_f80(x86_fp80 %f) nounwind {
; X86-X87-LABEL: test_signed_i64_f80:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $20, %esp
; X86-X87-NEXT: fldt {{[0-9]+}}(%esp)
; X86-X87-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X86-X87-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: fld %st(0)
; X86-X87-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-X87-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: xorl %ecx, %ecx
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB37_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: .LBB37_2:
; X86-X87-NEXT: movl $-2147483648, %edi # imm = 0x80000000
; X86-X87-NEXT: jb .LBB37_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB37_4:
; X86-X87-NEXT: fldt {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $2147483647, %esi # imm = 0x7FFFFFFF
; X86-X87-NEXT: ja .LBB37_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl %edi, %esi
; X86-X87-NEXT: .LBB37_6:
; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: ja .LBB37_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %edx, %edi
; X86-X87-NEXT: .LBB37_8:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jp .LBB37_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edi, %ecx
; X86-X87-NEXT: movl %esi, %edx
; X86-X87-NEXT: .LBB37_10:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $20, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: retl
;
; X86-SSE-LABEL: test_signed_i64_f80:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $16, %esp
; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fld %st(0)
; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: cmovbl %ecx, %esi
; X86-SSE-NEXT: movl $-2147483648, %eax # imm = 0x80000000
; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: fldt {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
; X86-SSE-NEXT: cmovbel %eax, %edx
; X86-SSE-NEXT: movl $-1, %eax
; X86-SSE-NEXT: cmovbel %esi, %eax
; X86-SSE-NEXT: fucompi %st(0), %st
; X86-SSE-NEXT: cmovpl %ecx, %eax
; X86-SSE-NEXT: cmovpl %ecx, %edx
; X86-SSE-NEXT: addl $16, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i64_f80:
; X64: # %bb.0:
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: orl $3072, %eax # imm = 0xC00
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: fld %st(0)
; X64-NEXT: fistpll -{{[0-9]+}}(%rsp)
; X64-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-NEXT: flds {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: jb .LBB37_1
; X64-NEXT: # %bb.2:
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; X64-NEXT: jmp .LBB37_3
; X64-NEXT: .LBB37_1:
; X64-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; X64-NEXT: .LBB37_3:
; X64-NEXT: fldt {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
; X64-NEXT: cmovbeq %rax, %rcx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: fucompi %st(0), %st
; X64-NEXT: cmovnpq %rcx, %rax
; X64-NEXT: retq
%x = call i64 @llvm.fptosi.sat.i64.f80(x86_fp80 %f)
ret i64 %x
}
define i100 @test_signed_i100_f80(x86_fp80 %f) nounwind {
; X86-X87-LABEL: test_signed_i100_f80:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %ebp
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $60, %esp
; X86-X87-NEXT: fldt {{[0-9]+}}(%esp)
; X86-X87-NEXT: fld %st(0)
; X86-X87-NEXT: fstpt {{[0-9]+}}(%esp)
; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fld %st(1)
; X86-X87-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucompp
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixxfti
; X86-X87-NEXT: subl $4, %esp
; X86-X87-NEXT: xorl %edx, %edx
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-8, %ebx
; X86-X87-NEXT: jb .LBB38_2
; X86-X87-NEXT: # %bb.1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-X87-NEXT: .LBB38_2:
; X86-X87-NEXT: movl $0, %ecx
; X86-X87-NEXT: movl $0, %ebp
; X86-X87-NEXT: jb .LBB38_4
; X86-X87-NEXT: # %bb.3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-X87-NEXT: .LBB38_4:
; X86-X87-NEXT: movl $0, %edi
; X86-X87-NEXT: jb .LBB38_6
; X86-X87-NEXT: # %bb.5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-X87-NEXT: .LBB38_6:
; X86-X87-NEXT: fldt {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $-1, %eax
; X86-X87-NEXT: movl $-1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB38_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %edi, %eax
; X86-X87-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl %ecx, %esi
; X86-X87-NEXT: .LBB38_8:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $7, %edi
; X86-X87-NEXT: ja .LBB38_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %ebx, %edi
; X86-X87-NEXT: .LBB38_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
; X86-X87-NEXT: movl $0, %ebp
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jp .LBB38_12
; X86-X87-NEXT: # %bb.11:
; X86-X87-NEXT: movl %edi, %edx
; X86-X87-NEXT: movl %esi, %eax
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-X87-NEXT: .LBB38_12:
; X86-X87-NEXT: movl %ebx, 8(%ecx)
; X86-X87-NEXT: movl %ebp, 4(%ecx)
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: andl $15, %edx
; X86-X87-NEXT: movb %dl, 12(%ecx)
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $60, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: popl %ebx
; X86-X87-NEXT: popl %ebp
; X86-X87-NEXT: retl $4
;
; X86-SSE-LABEL: test_signed_i100_f80:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %ebp
; X86-SSE-NEXT: pushl %ebx
; X86-SSE-NEXT: pushl %edi
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $44, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fld %st(0)
; X86-SSE-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; X86-SSE-NEXT: fstpt {{[0-9]+}}(%esp)
; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __fixxfti
; X86-SSE-NEXT: subl $4, %esp
; X86-SSE-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; X86-SSE-NEXT: xorl %ebp, %ebp
; X86-SSE-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl $-8, %ebx
; X86-SSE-NEXT: movl $0, %ecx
; X86-SSE-NEXT: movl $0, %edx
; X86-SSE-NEXT: movl $0, %edi
; X86-SSE-NEXT: jb .LBB38_2
; X86-SSE-NEXT: # %bb.1:
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE-NEXT: .LBB38_2:
; X86-SSE-NEXT: fldt {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl $-1, %eax
; X86-SSE-NEXT: cmoval %eax, %edi
; X86-SSE-NEXT: cmoval %eax, %edx
; X86-SSE-NEXT: cmoval %eax, %ecx
; X86-SSE-NEXT: movl $7, %eax
; X86-SSE-NEXT: cmovbel %ebx, %eax
; X86-SSE-NEXT: fucompi %st(0), %st
; X86-SSE-NEXT: cmovpl %ebp, %eax
; X86-SSE-NEXT: cmovpl %ebp, %ecx
; X86-SSE-NEXT: cmovpl %ebp, %edx
; X86-SSE-NEXT: cmovpl %ebp, %edi
; X86-SSE-NEXT: movl %edi, 8(%esi)
; X86-SSE-NEXT: movl %edx, 4(%esi)
; X86-SSE-NEXT: movl %ecx, (%esi)
; X86-SSE-NEXT: andl $15, %eax
; X86-SSE-NEXT: movb %al, 12(%esi)
; X86-SSE-NEXT: movl %esi, %eax
; X86-SSE-NEXT: addl $44, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: popl %edi
; X86-SSE-NEXT: popl %ebx
; X86-SSE-NEXT: popl %ebp
; X86-SSE-NEXT: retl $4
;
; X64-LABEL: test_signed_i100_f80:
; X64: # %bb.0:
; X64-NEXT: subq $40, %rsp
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fld %st(0)
; X64-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: callq __fixxfti@PLT
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: flds {{.*}}(%rip)
; X64-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: cmovbq %rcx, %rax
; X64-NEXT: movabsq $-34359738368, %rsi # imm = 0xFFFFFFF800000000
; X64-NEXT: cmovbq %rsi, %rdx
; X64-NEXT: fldt {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: movabsq $34359738367, %rsi # imm = 0x7FFFFFFFF
; X64-NEXT: cmovaq %rsi, %rdx
; X64-NEXT: movq $-1, %rsi
; X64-NEXT: cmovaq %rsi, %rax
; X64-NEXT: fucompi %st(0), %st
; X64-NEXT: cmovpq %rcx, %rax
; X64-NEXT: cmovpq %rcx, %rdx
; X64-NEXT: addq $40, %rsp
; X64-NEXT: retq
%x = call i100 @llvm.fptosi.sat.i100.f80(x86_fp80 %f)
ret i100 %x
}
define i128 @test_signed_i128_f80(x86_fp80 %f) nounwind {
; X86-X87-LABEL: test_signed_i128_f80:
; X86-X87: # %bb.0:
; X86-X87-NEXT: pushl %ebp
; X86-X87-NEXT: pushl %ebx
; X86-X87-NEXT: pushl %edi
; X86-X87-NEXT: pushl %esi
; X86-X87-NEXT: subl $60, %esp
; X86-X87-NEXT: fldt {{[0-9]+}}(%esp)
; X86-X87-NEXT: fld %st(0)
; X86-X87-NEXT: fstpt {{[0-9]+}}(%esp)
; X86-X87-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl %eax, (%esp)
; X86-X87-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fld %st(1)
; X86-X87-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; X86-X87-NEXT: fxch %st(1)
; X86-X87-NEXT: fucompp
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: movl %eax, %ebx
; X86-X87-NEXT: calll __fixxfti
; X86-X87-NEXT: subl $4, %esp
; X86-X87-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-X87-NEXT: movb %bh, %ah
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jae .LBB39_1
; X86-X87-NEXT: # %bb.2:
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jae .LBB39_3
; X86-X87-NEXT: .LBB39_4:
; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X86-X87-NEXT: jb .LBB39_6
; X86-X87-NEXT: .LBB39_5:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: .LBB39_6:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: fldt {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-X87-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; X86-X87-NEXT: fucom %st(1)
; X86-X87-NEXT: fstp %st(1)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
; X86-X87-NEXT: ja .LBB39_8
; X86-X87-NEXT: # %bb.7:
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: .LBB39_8:
; X86-X87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-X87-NEXT: movl $-1, %ebp
; X86-X87-NEXT: movl $-1, %edi
; X86-X87-NEXT: movl $-1, %esi
; X86-X87-NEXT: ja .LBB39_10
; X86-X87-NEXT: # %bb.9:
; X86-X87-NEXT: movl %edx, %ebp
; X86-X87-NEXT: movl %ebx, %edi
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-X87-NEXT: .LBB39_10:
; X86-X87-NEXT: fucomp %st(0)
; X86-X87-NEXT: fnstsw %ax
; X86-X87-NEXT: # kill: def $ah killed $ah killed $ax
; X86-X87-NEXT: sahf
; X86-X87-NEXT: movl $0, %eax
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: movl $0, %ebx
; X86-X87-NEXT: jp .LBB39_12
; X86-X87-NEXT: # %bb.11:
; X86-X87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-X87-NEXT: movl %edi, %eax
; X86-X87-NEXT: movl %ebp, %edx
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-X87-NEXT: .LBB39_12:
; X86-X87-NEXT: movl %ebx, 12(%ecx)
; X86-X87-NEXT: movl %edx, 8(%ecx)
; X86-X87-NEXT: movl %eax, 4(%ecx)
; X86-X87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-X87-NEXT: movl %eax, (%ecx)
; X86-X87-NEXT: movl %ecx, %eax
; X86-X87-NEXT: addl $60, %esp
; X86-X87-NEXT: popl %esi
; X86-X87-NEXT: popl %edi
; X86-X87-NEXT: popl %ebx
; X86-X87-NEXT: popl %ebp
; X86-X87-NEXT: retl $4
; X86-X87-NEXT: .LBB39_1:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-X87-NEXT: movl $0, %edx
; X86-X87-NEXT: jb .LBB39_4
; X86-X87-NEXT: .LBB39_3:
; X86-X87-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-X87-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
; X86-X87-NEXT: jae .LBB39_5
; X86-X87-NEXT: jmp .LBB39_6
;
; X86-SSE-LABEL: test_signed_i128_f80:
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: pushl %ebp
; X86-SSE-NEXT: pushl %ebx
; X86-SSE-NEXT: pushl %edi
; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $44, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-SSE-NEXT: fldt {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fld %st(0)
; X86-SSE-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; X86-SSE-NEXT: fstpt {{[0-9]+}}(%esp)
; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll __fixxfti
; X86-SSE-NEXT: subl $4, %esp
; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: flds {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: cmovbl %ecx, %eax
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-SSE-NEXT: cmovbl %ecx, %edx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-SSE-NEXT: cmovbl %ecx, %edi
; X86-SSE-NEXT: movl $-2147483648, %ebx # imm = 0x80000000
; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %ebx
; X86-SSE-NEXT: fldt {{\.LCPI[0-9]+_[0-9]+}}
Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; X86-SSE-NEXT: fxch %st(1)
; X86-SSE-NEXT: fucomi %st(1), %st
; X86-SSE-NEXT: fstp %st(1)
; X86-SSE-NEXT: movl $2147483647, %ebp # imm = 0x7FFFFFFF
; X86-SSE-NEXT: cmovbel %ebx, %ebp
; X86-SSE-NEXT: movl $-1, %ebx
; X86-SSE-NEXT: cmoval %ebx, %edi
; X86-SSE-NEXT: cmoval %ebx, %edx
; X86-SSE-NEXT: cmoval %ebx, %eax
; X86-SSE-NEXT: fucompi %st(0), %st
; X86-SSE-NEXT: cmovpl %ecx, %eax
; X86-SSE-NEXT: cmovpl %ecx, %edx
; X86-SSE-NEXT: cmovpl %ecx, %edi
; X86-SSE-NEXT: cmovpl %ecx, %ebp
; X86-SSE-NEXT: movl %ebp, 12(%esi)
; X86-SSE-NEXT: movl %edi, 8(%esi)
; X86-SSE-NEXT: movl %edx, 4(%esi)
; X86-SSE-NEXT: movl %eax, (%esi)
; X86-SSE-NEXT: movl %esi, %eax
; X86-SSE-NEXT: addl $44, %esp
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: popl %edi
; X86-SSE-NEXT: popl %ebx
; X86-SSE-NEXT: popl %ebp
; X86-SSE-NEXT: retl $4
;
; X64-LABEL: test_signed_i128_f80:
; X64: # %bb.0:
; X64-NEXT: subq $40, %rsp
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fld %st(0)
; X64-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: callq __fixxfti@PLT
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: flds {{.*}}(%rip)
; X64-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: cmovbq %rcx, %rax
; X64-NEXT: movabsq $-9223372036854775808, %rsi # imm = 0x8000000000000000
; X64-NEXT: cmovbq %rsi, %rdx
; X64-NEXT: fldt {{.*}}(%rip)
; X64-NEXT: fxch %st(1)
; X64-NEXT: fucomi %st(1), %st
; X64-NEXT: fstp %st(1)
; X64-NEXT: movabsq $9223372036854775807, %rsi # imm = 0x7FFFFFFFFFFFFFFF
; X64-NEXT: cmovaq %rsi, %rdx
; X64-NEXT: movq $-1, %rsi
; X64-NEXT: cmovaq %rsi, %rax
; X64-NEXT: fucompi %st(0), %st
; X64-NEXT: cmovpq %rcx, %rax
; X64-NEXT: cmovpq %rcx, %rdx
; X64-NEXT: addq $40, %rsp
; X64-NEXT: retq
%x = call i128 @llvm.fptosi.sat.i128.f80(x86_fp80 %f)
ret i128 %x
}