Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
|
|
|
|
|
|
|
|
;
|
|
|
|
; Float to unsigned 32-bit -- Vector size variation
|
|
|
|
;
|
|
|
|
|
|
|
|
declare <1 x i32> @llvm.fptoui.sat.v1f32.v1i32 (<1 x float>)
|
|
|
|
declare <2 x i32> @llvm.fptoui.sat.v2f32.v2i32 (<2 x float>)
|
|
|
|
declare <3 x i32> @llvm.fptoui.sat.v3f32.v3i32 (<3 x float>)
|
|
|
|
declare <4 x i32> @llvm.fptoui.sat.v4f32.v4i32 (<4 x float>)
|
|
|
|
declare <5 x i32> @llvm.fptoui.sat.v5f32.v5i32 (<5 x float>)
|
|
|
|
declare <6 x i32> @llvm.fptoui.sat.v6f32.v6i32 (<6 x float>)
|
|
|
|
declare <7 x i32> @llvm.fptoui.sat.v7f32.v7i32 (<7 x float>)
|
|
|
|
declare <8 x i32> @llvm.fptoui.sat.v8f32.v8i32 (<8 x float>)
|
|
|
|
|
|
|
|
define <1 x i32> @test_unsigned_v1f32_v1i32(<1 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v1f32_v1i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov s1, v0.s[1]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fmov s0, w8
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <1 x i32> @llvm.fptoui.sat.v1f32.v1i32(<1 x float> %f)
|
|
|
|
ret <1 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i32> @test_unsigned_v2f32_v2i32(<2 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f32_v2i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov s1, v0.s[1]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fmov s0, w8
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i32> @llvm.fptoui.sat.v2f32.v2i32(<2 x float> %f)
|
|
|
|
ret <2 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <3 x i32> @test_unsigned_v3f32_v3i32(<3 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v3f32_v3i32:
|
|
|
|
; CHECK: // %bb.0:
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov s1, v0.s[1]
|
|
|
|
; CHECK-NEXT: mov s2, v0.s[2]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov s3, v0.s[3]
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w9, s2
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: mov v0.s[2], w9
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s3
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov v0.s[3], w8
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <3 x i32> @llvm.fptoui.sat.v3f32.v3i32(<3 x float> %f)
|
|
|
|
ret <3 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @test_unsigned_v4f32_v4i32(<4 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v4f32_v4i32:
|
|
|
|
; CHECK: // %bb.0:
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov s1, v0.s[1]
|
|
|
|
; CHECK-NEXT: mov s2, v0.s[2]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov s3, v0.s[3]
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w9, s2
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: mov v0.s[2], w9
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s3
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov v0.s[3], w8
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <4 x i32> @llvm.fptoui.sat.v4f32.v4i32(<4 x float> %f)
|
|
|
|
ret <4 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <5 x i32> @test_unsigned_v5f32_v5i32(<5 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v5f32_v5i32:
|
|
|
|
; CHECK: // %bb.0:
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w0, s0
|
|
|
|
; CHECK-NEXT: fcvtzu w1, s1
|
|
|
|
; CHECK-NEXT: fcvtzu w2, s2
|
|
|
|
; CHECK-NEXT: fcvtzu w3, s3
|
|
|
|
; CHECK-NEXT: fcvtzu w4, s4
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <5 x i32> @llvm.fptoui.sat.v5f32.v5i32(<5 x float> %f)
|
|
|
|
ret <5 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v6f32_v6i32:
|
|
|
|
; CHECK: // %bb.0:
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s4
|
|
|
|
; CHECK-NEXT: fcvtzu w5, s5
|
|
|
|
; CHECK-NEXT: fcvtzu w0, s0
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w5
|
|
|
|
; CHECK-NEXT: fcvtzu w1, s1
|
|
|
|
; CHECK-NEXT: fcvtzu w2, s2
|
|
|
|
; CHECK-NEXT: fcvtzu w3, s3
|
|
|
|
; CHECK-NEXT: fmov w4, s0
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <6 x i32> @llvm.fptoui.sat.v6f32.v6i32(<6 x float> %f)
|
|
|
|
ret <6 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <7 x i32> @test_unsigned_v7f32_v7i32(<7 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v7f32_v7i32:
|
|
|
|
; CHECK: // %bb.0:
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s4
|
|
|
|
; CHECK-NEXT: fcvtzu w9, s5
|
|
|
|
; CHECK-NEXT: fcvtzu w0, s0
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w6, s6
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w9
|
|
|
|
; CHECK-NEXT: mov v0.s[2], w6
|
|
|
|
; CHECK-NEXT: fcvtzu w1, s1
|
|
|
|
; CHECK-NEXT: fcvtzu w2, s2
|
|
|
|
; CHECK-NEXT: fcvtzu w3, s3
|
|
|
|
; CHECK-NEXT: mov w5, v0.s[1]
|
|
|
|
; CHECK-NEXT: fmov w4, s0
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <7 x i32> @llvm.fptoui.sat.v7f32.v7i32(<7 x float> %f)
|
|
|
|
ret <7 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i32> @test_unsigned_v8f32_v8i32(<8 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v8f32_v8i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: mov s2, v0.s[1]
|
|
|
|
; CHECK-NEXT: mov s3, v0.s[2]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
|
|
|
; CHECK-NEXT: fcvtzu w9, s1
|
|
|
|
; CHECK-NEXT: fcvtzu w10, s2
|
|
|
|
; CHECK-NEXT: mov s2, v1.s[1]
|
|
|
|
; CHECK-NEXT: fcvtzu w11, s3
|
|
|
|
; CHECK-NEXT: mov s3, v1.s[2]
|
|
|
|
; CHECK-NEXT: fcvtzu w12, s2
|
|
|
|
; CHECK-NEXT: fmov s2, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s3
|
|
|
|
; CHECK-NEXT: fmov s3, w9
|
|
|
|
; CHECK-NEXT: mov v2.s[1], w10
|
|
|
|
; CHECK-NEXT: mov v3.s[1], w12
|
|
|
|
; CHECK-NEXT: mov s0, v0.s[3]
|
|
|
|
; CHECK-NEXT: mov v2.s[2], w11
|
|
|
|
; CHECK-NEXT: mov s1, v1.s[3]
|
|
|
|
; CHECK-NEXT: mov v3.s[2], w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
|
|
|
; CHECK-NEXT: mov v2.s[3], w8
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov v3.s[3], w8
|
|
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
|
|
|
; CHECK-NEXT: mov v1.16b, v3.16b
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <8 x i32> @llvm.fptoui.sat.v8f32.v8i32(<8 x float> %f)
|
|
|
|
ret <8 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
;
|
|
|
|
; Double to unsigned 32-bit -- Vector size variation
|
|
|
|
;
|
|
|
|
|
|
|
|
declare <1 x i32> @llvm.fptoui.sat.v1f64.v1i32 (<1 x double>)
|
|
|
|
declare <2 x i32> @llvm.fptoui.sat.v2f64.v2i32 (<2 x double>)
|
|
|
|
declare <3 x i32> @llvm.fptoui.sat.v3f64.v3i32 (<3 x double>)
|
|
|
|
declare <4 x i32> @llvm.fptoui.sat.v4f64.v4i32 (<4 x double>)
|
|
|
|
declare <5 x i32> @llvm.fptoui.sat.v5f64.v5i32 (<5 x double>)
|
|
|
|
declare <6 x i32> @llvm.fptoui.sat.v6f64.v6i32 (<6 x double>)
|
|
|
|
|
|
|
|
define <1 x i32> @test_unsigned_v1f64_v1i32(<1 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v1f64_v1i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d0
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <1 x i32> @llvm.fptoui.sat.v1f64.v1i32(<1 x double> %f)
|
|
|
|
ret <1 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f64_v2i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov d1, v0.d[1]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i32> @llvm.fptoui.sat.v2f64.v2i32(<2 x double> %f)
|
|
|
|
ret <2 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <3 x i32> @test_unsigned_v3f64_v3i32(<3 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v3f64_v3i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w9, d1
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fmov s0, w8
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w10, d2
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w9
|
|
|
|
; CHECK-NEXT: mov v0.s[2], w10
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d0
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov v0.s[3], w8
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <3 x i32> @llvm.fptoui.sat.v3f64.v3i32(<3 x double> %f)
|
|
|
|
ret <3 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @test_unsigned_v4f64_v4i32(<4 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v4f64_v4i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov d2, v0.d[1]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fmov s0, w8
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, d2
|
|
|
|
; CHECK-NEXT: fcvtzu w9, d1
|
|
|
|
; CHECK-NEXT: mov d1, v1.d[1]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov v0.s[2], w9
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, d1
|
|
|
|
; CHECK-NEXT: mov v0.s[3], w8
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <4 x i32> @llvm.fptoui.sat.v4f64.v4i32(<4 x double> %f)
|
|
|
|
ret <4 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v5f64_v5i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: fcvtzu w0, d0
|
|
|
|
; CHECK-NEXT: fcvtzu w1, d1
|
|
|
|
; CHECK-NEXT: fcvtzu w2, d2
|
|
|
|
; CHECK-NEXT: fcvtzu w3, d3
|
|
|
|
; CHECK-NEXT: fcvtzu w4, d4
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <5 x i32> @llvm.fptoui.sat.v5f64.v5i32(<5 x double> %f)
|
|
|
|
ret <5 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v6f64_v6i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: fcvtzu w0, d0
|
|
|
|
; CHECK-NEXT: fcvtzu w1, d1
|
|
|
|
; CHECK-NEXT: fcvtzu w2, d2
|
|
|
|
; CHECK-NEXT: fcvtzu w3, d3
|
|
|
|
; CHECK-NEXT: fcvtzu w4, d4
|
|
|
|
; CHECK-NEXT: fcvtzu w5, d5
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <6 x i32> @llvm.fptoui.sat.v6f64.v6i32(<6 x double> %f)
|
|
|
|
ret <6 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
;
|
|
|
|
; FP128 to unsigned 32-bit -- Vector size variation
|
|
|
|
;
|
|
|
|
|
|
|
|
declare <1 x i32> @llvm.fptoui.sat.v1f128.v1i32 (<1 x fp128>)
|
|
|
|
declare <2 x i32> @llvm.fptoui.sat.v2f128.v2i32 (<2 x fp128>)
|
|
|
|
declare <3 x i32> @llvm.fptoui.sat.v3f128.v3i32 (<3 x fp128>)
|
|
|
|
declare <4 x i32> @llvm.fptoui.sat.v4f128.v4i32 (<4 x fp128>)
|
|
|
|
|
|
|
|
define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v1f128_v1i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: sub sp, sp, #32 // =32
|
|
|
|
; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
|
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
|
|
; CHECK-NEXT: .cfi_offset w30, -16
|
|
|
|
; CHECK-NEXT: adrp x8, .LCPI14_0
|
|
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
|
|
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: bl __getf2
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov w19, w0
|
|
|
|
; CHECK-NEXT: bl __fixunstfsi
|
|
|
|
; CHECK-NEXT: adrp x8, .LCPI14_1
|
|
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_1]
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: cmp w19, #0 // =0
|
|
|
|
; CHECK-NEXT: csel w19, wzr, w0, lt
|
|
|
|
; CHECK-NEXT: bl __gttf2
|
|
|
|
; CHECK-NEXT: cmp w0, #0 // =0
|
|
|
|
; CHECK-NEXT: csinv w8, w19, wzr, le
|
|
|
|
; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: add sp, sp, #32 // =32
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <1 x i32> @llvm.fptoui.sat.v1f128.v1i32(<1 x fp128> %f)
|
|
|
|
ret <1 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f128_v2i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: sub sp, sp, #96 // =96
|
|
|
|
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 96
|
|
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
|
|
; CHECK-NEXT: .cfi_offset w20, -16
|
|
|
|
; CHECK-NEXT: .cfi_offset w30, -32
|
|
|
|
; CHECK-NEXT: adrp x8, .LCPI15_0
|
|
|
|
; CHECK-NEXT: mov v2.16b, v1.16b
|
|
|
|
; CHECK-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill
|
|
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0]
|
|
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
|
|
|
; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: bl __getf2
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov w19, w0
|
|
|
|
; CHECK-NEXT: bl __fixunstfsi
|
|
|
|
; CHECK-NEXT: adrp x8, .LCPI15_1
|
|
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1]
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: cmp w19, #0 // =0
|
|
|
|
; CHECK-NEXT: csel w19, wzr, w0, lt
|
|
|
|
; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: bl __gttf2
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: cmp w0, #0 // =0
|
|
|
|
; CHECK-NEXT: csinv w20, w19, wzr, le
|
|
|
|
; CHECK-NEXT: bl __getf2
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov w19, w0
|
|
|
|
; CHECK-NEXT: bl __fixunstfsi
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: cmp w19, #0 // =0
|
|
|
|
; CHECK-NEXT: csel w19, wzr, w0, lt
|
|
|
|
; CHECK-NEXT: bl __gttf2
|
|
|
|
; CHECK-NEXT: cmp w0, #0 // =0
|
|
|
|
; CHECK-NEXT: csinv w8, w19, wzr, le
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w20
|
|
|
|
; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: add sp, sp, #96 // =96
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i32> @llvm.fptoui.sat.v2f128.v2i32(<2 x fp128> %f)
|
|
|
|
ret <2 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v3f128_v3i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: sub sp, sp, #112 // =112
|
|
|
|
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 112
|
|
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
|
|
; CHECK-NEXT: .cfi_offset w20, -16
|
|
|
|
; CHECK-NEXT: .cfi_offset w30, -32
|
|
|
|
; CHECK-NEXT: adrp x8, .LCPI16_0
|
|
|
|
; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill
|
|
|
|
; CHECK-NEXT: mov v2.16b, v1.16b
|
|
|
|
; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
|
|
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
|
|
|
; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: bl __getf2
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov w19, w0
|
|
|
|
; CHECK-NEXT: bl __fixunstfsi
|
|
|
|
; CHECK-NEXT: adrp x8, .LCPI16_1
|
|
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_1]
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: cmp w19, #0 // =0
|
|
|
|
; CHECK-NEXT: csel w19, wzr, w0, lt
|
|
|
|
; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: bl __gttf2
|
|
|
|
; CHECK-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
|
|
|
|
; CHECK-NEXT: cmp w0, #0 // =0
|
|
|
|
; CHECK-NEXT: csinv w20, w19, wzr, le
|
|
|
|
; CHECK-NEXT: bl __getf2
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov w19, w0
|
|
|
|
; CHECK-NEXT: bl __fixunstfsi
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: cmp w19, #0 // =0
|
|
|
|
; CHECK-NEXT: csel w19, wzr, w0, lt
|
|
|
|
; CHECK-NEXT: bl __gttf2
|
|
|
|
; CHECK-NEXT: cmp w0, #0 // =0
|
|
|
|
; CHECK-NEXT: csinv w8, w19, wzr, le
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w20
|
|
|
|
; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: bl __getf2
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov w19, w0
|
|
|
|
; CHECK-NEXT: bl __fixunstfsi
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: cmp w19, #0 // =0
|
|
|
|
; CHECK-NEXT: csel w19, wzr, w0, lt
|
|
|
|
; CHECK-NEXT: bl __gttf2
|
|
|
|
; CHECK-NEXT: cmp w0, #0 // =0
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: csinv w8, w19, wzr, le
|
|
|
|
; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov v0.s[2], w8
|
|
|
|
; CHECK-NEXT: add sp, sp, #112 // =112
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <3 x i32> @llvm.fptoui.sat.v3f128.v3i32(<3 x fp128> %f)
|
|
|
|
ret <3 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v4f128_v4i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: sub sp, sp, #128 // =128
|
|
|
|
; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 128
|
|
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
|
|
; CHECK-NEXT: .cfi_offset w20, -16
|
|
|
|
; CHECK-NEXT: .cfi_offset w30, -32
|
|
|
|
; CHECK-NEXT: adrp x8, .LCPI17_0
|
|
|
|
; CHECK-NEXT: stp q0, q2, [sp, #16] // 32-byte Folded Spill
|
|
|
|
; CHECK-NEXT: mov v2.16b, v1.16b
|
|
|
|
; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
|
|
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
|
|
|
; CHECK-NEXT: str q3, [sp, #80] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: bl __getf2
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov w19, w0
|
|
|
|
; CHECK-NEXT: bl __fixunstfsi
|
|
|
|
; CHECK-NEXT: adrp x8, .LCPI17_1
|
|
|
|
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_1]
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: cmp w19, #0 // =0
|
|
|
|
; CHECK-NEXT: csel w19, wzr, w0, lt
|
|
|
|
; CHECK-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: bl __gttf2
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: cmp w0, #0 // =0
|
|
|
|
; CHECK-NEXT: csinv w20, w19, wzr, le
|
|
|
|
; CHECK-NEXT: bl __getf2
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov w19, w0
|
|
|
|
; CHECK-NEXT: bl __fixunstfsi
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: cmp w19, #0 // =0
|
|
|
|
; CHECK-NEXT: csel w19, wzr, w0, lt
|
|
|
|
; CHECK-NEXT: bl __gttf2
|
|
|
|
; CHECK-NEXT: cmp w0, #0 // =0
|
|
|
|
; CHECK-NEXT: csinv w8, w19, wzr, le
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w20
|
|
|
|
; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: bl __getf2
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov w19, w0
|
|
|
|
; CHECK-NEXT: bl __fixunstfsi
|
|
|
|
; CHECK-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
|
|
|
|
; CHECK-NEXT: cmp w19, #0 // =0
|
|
|
|
; CHECK-NEXT: csel w19, wzr, w0, lt
|
|
|
|
; CHECK-NEXT: bl __gttf2
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: cmp w0, #0 // =0
|
|
|
|
; CHECK-NEXT: csinv w8, w19, wzr, le
|
|
|
|
; CHECK-NEXT: mov v0.s[2], w8
|
|
|
|
; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: ldp q1, q0, [sp, #64] // 32-byte Folded Reload
|
|
|
|
; CHECK-NEXT: bl __getf2
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov w19, w0
|
|
|
|
; CHECK-NEXT: bl __fixunstfsi
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: cmp w19, #0 // =0
|
|
|
|
; CHECK-NEXT: csel w19, wzr, w0, lt
|
|
|
|
; CHECK-NEXT: bl __gttf2
|
|
|
|
; CHECK-NEXT: cmp w0, #0 // =0
|
|
|
|
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: csinv w8, w19, wzr, le
|
|
|
|
; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov v0.s[3], w8
|
|
|
|
; CHECK-NEXT: add sp, sp, #128 // =128
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <4 x i32> @llvm.fptoui.sat.v4f128.v4i32(<4 x fp128> %f)
|
|
|
|
ret <4 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
;
|
|
|
|
; FP16 to unsigned 32-bit -- Vector size variation
|
|
|
|
;
|
|
|
|
|
|
|
|
declare <1 x i32> @llvm.fptoui.sat.v1f16.v1i32 (<1 x half>)
|
|
|
|
declare <2 x i32> @llvm.fptoui.sat.v2f16.v2i32 (<2 x half>)
|
|
|
|
declare <3 x i32> @llvm.fptoui.sat.v3f16.v3i32 (<3 x half>)
|
|
|
|
declare <4 x i32> @llvm.fptoui.sat.v4f16.v4i32 (<4 x half>)
|
|
|
|
declare <5 x i32> @llvm.fptoui.sat.v5f16.v5i32 (<5 x half>)
|
|
|
|
declare <6 x i32> @llvm.fptoui.sat.v6f16.v6i32 (<6 x half>)
|
|
|
|
declare <7 x i32> @llvm.fptoui.sat.v7f16.v7i32 (<7 x half>)
|
|
|
|
declare <8 x i32> @llvm.fptoui.sat.v8f16.v8i32 (<8 x half>)
|
|
|
|
|
|
|
|
define <1 x i32> @test_unsigned_v1f16_v1i32(<1 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v1f16_v1i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: fcvt s0, h0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <1 x i32> @llvm.fptoui.sat.v1f16.v1i32(<1 x half> %f)
|
|
|
|
ret <1 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i32> @test_unsigned_v2f16_v2i32(<2 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f16_v2i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s1, h0
|
|
|
|
; CHECK-NEXT: mov h0, v0.h[1]
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: fcvt s1, h0
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fmov s0, w8
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i32> @llvm.fptoui.sat.v2f16.v2i32(<2 x half> %f)
|
|
|
|
ret <2 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <3 x i32> @test_unsigned_v3f16_v3i32(<3 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v3f16_v3i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s1, h0
|
|
|
|
; CHECK-NEXT: mov h2, v0.h[1]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s2, h2
|
|
|
|
; CHECK-NEXT: fmov s1, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s2
|
|
|
|
; CHECK-NEXT: mov h2, v0.h[2]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s2, h2
|
|
|
|
; CHECK-NEXT: fcvt s0, h0
|
|
|
|
; CHECK-NEXT: mov v1.s[1], w8
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s2
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov v1.s[2], w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
|
|
|
; CHECK-NEXT: mov v1.s[3], w8
|
|
|
|
; CHECK-NEXT: mov v0.16b, v1.16b
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <3 x i32> @llvm.fptoui.sat.v3f16.v3i32(<3 x half> %f)
|
|
|
|
ret <3 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @test_unsigned_v4f16_v4i32(<4 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v4f16_v4i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s1, h0
|
|
|
|
; CHECK-NEXT: mov h2, v0.h[1]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s2, h2
|
|
|
|
; CHECK-NEXT: fmov s1, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s2
|
|
|
|
; CHECK-NEXT: mov h2, v0.h[2]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s2, h2
|
|
|
|
; CHECK-NEXT: fcvt s0, h0
|
|
|
|
; CHECK-NEXT: mov v1.s[1], w8
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s2
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov v1.s[2], w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
|
|
|
; CHECK-NEXT: mov v1.s[3], w8
|
|
|
|
; CHECK-NEXT: mov v0.16b, v1.16b
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <4 x i32> @llvm.fptoui.sat.v4f16.v4i32(<4 x half> %f)
|
|
|
|
ret <4 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <5 x i32> @test_unsigned_v5f16_v5i32(<5 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v5f16_v5i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: fcvt s1, h0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w0, s1
|
|
|
|
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
|
|
|
; CHECK-NEXT: fcvt s1, h1
|
|
|
|
; CHECK-NEXT: fcvtzu w4, s1
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov h1, v0.h[1]
|
|
|
|
; CHECK-NEXT: fcvt s1, h1
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w1, s1
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov h1, v0.h[2]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvt s1, h1
|
|
|
|
; CHECK-NEXT: fcvt s0, h0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w2, s1
|
|
|
|
; CHECK-NEXT: fcvtzu w3, s0
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <5 x i32> @llvm.fptoui.sat.v5f16.v5i32(<5 x half> %f)
|
|
|
|
ret <5 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <6 x i32> @test_unsigned_v6f16_v6i32(<6 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v6f16_v6i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
|
|
|
; CHECK-NEXT: fcvt s2, h0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w0, s2
|
|
|
|
; CHECK-NEXT: fcvt s2, h1
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s2
|
|
|
|
; CHECK-NEXT: mov h2, v0.h[1]
|
|
|
|
; CHECK-NEXT: fcvt s2, h2
|
|
|
|
; CHECK-NEXT: fcvtzu w1, s2
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov h2, v0.h[2]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov h1, v1.h[1]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvt s2, h2
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s1, h1
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w2, s2
|
|
|
|
; CHECK-NEXT: fmov s2, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w5, s1
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvt s0, h0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov v2.s[1], w5
|
|
|
|
; CHECK-NEXT: fcvtzu w3, s0
|
|
|
|
; CHECK-NEXT: fmov w4, s2
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <6 x i32> @llvm.fptoui.sat.v6f16.v6i32(<6 x half> %f)
|
|
|
|
ret <6 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <7 x i32> @test_unsigned_v7f16_v7i32(<7 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v7f16_v7i32:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
|
|
|
; CHECK-NEXT: fcvt s2, h0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov h3, v0.h[1]
|
|
|
|
; CHECK-NEXT: fcvtzu w0, s2
|
|
|
|
; CHECK-NEXT: fcvt s2, h1
|
|
|
|
; CHECK-NEXT: fcvt s3, h3
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s2
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov h2, v0.h[2]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w1, s3
|
|
|
|
; CHECK-NEXT: mov h3, v1.h[1]
|
|
|
|
; CHECK-NEXT: mov h1, v1.h[2]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvt s2, h2
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s3, h3
|
|
|
|
; CHECK-NEXT: fcvt s1, h1
|
|
|
|
; CHECK-NEXT: fcvtzu w2, s2
|
|
|
|
; CHECK-NEXT: fmov s2, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s3
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w6, s1
|
|
|
|
; CHECK-NEXT: mov v2.s[1], w8
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvt s0, h0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov v2.s[2], w6
|
|
|
|
; CHECK-NEXT: fcvtzu w3, s0
|
|
|
|
; CHECK-NEXT: mov w5, v2.s[1]
|
|
|
|
; CHECK-NEXT: fmov w4, s2
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <7 x i32> @llvm.fptoui.sat.v7f16.v7i32(<7 x half> %f)
|
|
|
|
ret <7 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <8 x i32> @test_unsigned_v8f16_v8i32(<8 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v8f16_v8i32:
|
|
|
|
; CHECK: // %bb.0:
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8
|
|
|
|
; CHECK-NEXT: fcvt s1, h0
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s1, h3
|
|
|
|
; CHECK-NEXT: mov h2, v0.h[1]
|
|
|
|
; CHECK-NEXT: fcvtzu w9, s1
|
|
|
|
; CHECK-NEXT: mov h1, v0.h[2]
|
|
|
|
; CHECK-NEXT: fcvt s2, h2
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvt s1, h1
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w10, s2
|
|
|
|
; CHECK-NEXT: mov h2, v3.h[1]
|
|
|
|
; CHECK-NEXT: fcvtzu w11, s1
|
|
|
|
; CHECK-NEXT: mov h1, v3.h[2]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvt s2, h2
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s1, h1
|
|
|
|
; CHECK-NEXT: fcvtzu w12, s2
|
|
|
|
; CHECK-NEXT: fmov s2, w8
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fmov s1, w9
|
|
|
|
; CHECK-NEXT: mov h3, v3.h[3]
|
|
|
|
; CHECK-NEXT: mov v1.s[1], w12
|
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvt s3, h3
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov v2.s[1], w10
|
|
|
|
; CHECK-NEXT: fcvt s0, h0
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov v1.s[2], w8
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s3
|
|
|
|
; CHECK-NEXT: mov v2.s[2], w11
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov v1.s[3], w8
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
|
|
|
; CHECK-NEXT: mov v2.s[3], w8
|
|
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <8 x i32> @llvm.fptoui.sat.v8f16.v8i32(<8 x half> %f)
|
|
|
|
ret <8 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
;
|
|
|
|
; 2-Vector float to unsigned integer -- result size variation
|
|
|
|
;
|
|
|
|
|
|
|
|
declare <2 x i1> @llvm.fptoui.sat.v2f32.v2i1 (<2 x float>)
|
|
|
|
declare <2 x i8> @llvm.fptoui.sat.v2f32.v2i8 (<2 x float>)
|
|
|
|
declare <2 x i13> @llvm.fptoui.sat.v2f32.v2i13 (<2 x float>)
|
|
|
|
declare <2 x i16> @llvm.fptoui.sat.v2f32.v2i16 (<2 x float>)
|
|
|
|
declare <2 x i19> @llvm.fptoui.sat.v2f32.v2i19 (<2 x float>)
|
|
|
|
declare <2 x i50> @llvm.fptoui.sat.v2f32.v2i50 (<2 x float>)
|
|
|
|
declare <2 x i64> @llvm.fptoui.sat.v2f32.v2i64 (<2 x float>)
|
|
|
|
declare <2 x i100> @llvm.fptoui.sat.v2f32.v2i100(<2 x float>)
|
|
|
|
declare <2 x i128> @llvm.fptoui.sat.v2f32.v2i128(<2 x float>)
|
|
|
|
|
|
|
|
define <2 x i1> @test_unsigned_v2f32_v2i1(<2 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f32_v2i1:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fmov s2, #1.00000000
|
|
|
|
; CHECK-NEXT: mov s3, v0.s[1]
|
|
|
|
; CHECK-NEXT: fmaxnm s0, s0, s1
|
|
|
|
; CHECK-NEXT: fmaxnm s1, s3, s1
|
|
|
|
; CHECK-NEXT: fminnm s0, s0, s2
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
|
|
|
; CHECK-NEXT: fminnm s1, s1, s2
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i1> @llvm.fptoui.sat.v2f32.v2i1(<2 x float> %f)
|
|
|
|
ret <2 x i1> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i8> @test_unsigned_v2f32_v2i8(<2 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f32_v2i8:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov w8, #1132396544
|
|
|
|
; CHECK-NEXT: mov s2, v0.s[1]
|
|
|
|
; CHECK-NEXT: fmaxnm s0, s0, s1
|
|
|
|
; CHECK-NEXT: fmov s3, w8
|
|
|
|
; CHECK-NEXT: fmaxnm s1, s2, s1
|
|
|
|
; CHECK-NEXT: fminnm s0, s0, s3
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
|
|
|
; CHECK-NEXT: fminnm s1, s1, s3
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i8> @llvm.fptoui.sat.v2f32.v2i8(<2 x float> %f)
|
|
|
|
ret <2 x i8> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i13> @test_unsigned_v2f32_v2i13(<2 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f32_v2i13:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: mov w8, #63488
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: movk w8, #17919, lsl #16
|
|
|
|
; CHECK-NEXT: mov s2, v0.s[1]
|
|
|
|
; CHECK-NEXT: fmaxnm s0, s0, s1
|
|
|
|
; CHECK-NEXT: fmov s3, w8
|
|
|
|
; CHECK-NEXT: fmaxnm s1, s2, s1
|
|
|
|
; CHECK-NEXT: fminnm s0, s0, s3
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
|
|
|
; CHECK-NEXT: fminnm s1, s1, s3
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i13> @llvm.fptoui.sat.v2f32.v2i13(<2 x float> %f)
|
|
|
|
ret <2 x i13> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i16> @test_unsigned_v2f32_v2i16(<2 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f32_v2i16:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: mov w8, #65280
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: movk w8, #18303, lsl #16
|
|
|
|
; CHECK-NEXT: mov s2, v0.s[1]
|
|
|
|
; CHECK-NEXT: fmaxnm s0, s0, s1
|
|
|
|
; CHECK-NEXT: fmov s3, w8
|
|
|
|
; CHECK-NEXT: fmaxnm s1, s2, s1
|
|
|
|
; CHECK-NEXT: fminnm s0, s0, s3
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
|
|
|
; CHECK-NEXT: fminnm s1, s1, s3
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i16> @llvm.fptoui.sat.v2f32.v2i16(<2 x float> %f)
|
|
|
|
ret <2 x i16> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i19> @test_unsigned_v2f32_v2i19(<2 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f32_v2i19:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: mov w8, #65504
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: movk w8, #18687, lsl #16
|
|
|
|
; CHECK-NEXT: mov s2, v0.s[1]
|
|
|
|
; CHECK-NEXT: fmaxnm s0, s0, s1
|
|
|
|
; CHECK-NEXT: fmov s3, w8
|
|
|
|
; CHECK-NEXT: fmaxnm s1, s2, s1
|
|
|
|
; CHECK-NEXT: fminnm s0, s0, s3
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
|
|
|
; CHECK-NEXT: fminnm s1, s1, s3
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i19> @llvm.fptoui.sat.v2f32.v2i19(<2 x float> %f)
|
|
|
|
ret <2 x i19> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i32> @test_unsigned_v2f32_v2i32_duplicate(<2 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f32_v2i32_duplicate:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov s1, v0.s[1]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fmov s0, w8
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i32> @llvm.fptoui.sat.v2f32.v2i32(<2 x float> %f)
|
|
|
|
ret <2 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i50> @test_unsigned_v2f32_v2i50(<2 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f32_v2i50:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
|
|
; CHECK-NEXT: mov s1, v0.s[1]
|
|
|
|
; CHECK-NEXT: mov w8, #1484783615
|
|
|
|
; CHECK-NEXT: fmov s2, w8
|
|
|
|
; CHECK-NEXT: fcvtzu x8, s1
|
|
|
|
; CHECK-NEXT: fcmp s1, #0.0
|
|
|
|
; CHECK-NEXT: mov x9, #1125899906842623
|
|
|
|
; CHECK-NEXT: csel x8, xzr, x8, lt
|
|
|
|
; CHECK-NEXT: fcmp s1, s2
|
|
|
|
; CHECK-NEXT: fcvtzu x10, s0
|
|
|
|
; CHECK-NEXT: csel x8, x9, x8, gt
|
|
|
|
; CHECK-NEXT: fcmp s0, #0.0
|
|
|
|
; CHECK-NEXT: csel x10, xzr, x10, lt
|
|
|
|
; CHECK-NEXT: fcmp s0, s2
|
|
|
|
; CHECK-NEXT: csel x9, x9, x10, gt
|
|
|
|
; CHECK-NEXT: fmov d0, x9
|
|
|
|
; CHECK-NEXT: mov v0.d[1], x8
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i50> @llvm.fptoui.sat.v2f32.v2i50(<2 x float> %f)
|
|
|
|
ret <2 x i50> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @test_unsigned_v2f32_v2i64(<2 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f32_v2i64:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu x8, s0
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov s1, v0.s[1]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fmov d0, x8
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu x8, s1
|
|
|
|
; CHECK-NEXT: mov v0.d[1], x8
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i64> @llvm.fptoui.sat.v2f32.v2i64(<2 x float> %f)
|
|
|
|
ret <2 x i64> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i100> @test_unsigned_v2f32_v2i100(<2 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f32_v2i100:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: sub sp, sp, #64 // =64
|
|
|
|
; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x30, x21, [sp, #32] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
|
|
; CHECK-NEXT: .cfi_offset w20, -16
|
|
|
|
; CHECK-NEXT: .cfi_offset w21, -24
|
|
|
|
; CHECK-NEXT: .cfi_offset w30, -32
|
|
|
|
; CHECK-NEXT: .cfi_offset b8, -40
|
|
|
|
; CHECK-NEXT: .cfi_offset b9, -48
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
|
|
; CHECK-NEXT: mov s8, v0.s[1]
|
|
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: mov v0.16b, v8.16b
|
|
|
|
; CHECK-NEXT: bl __fixunssfti
|
|
|
|
; CHECK-NEXT: mov w8, #1904214015
|
|
|
|
; CHECK-NEXT: fcmp s8, #0.0
|
|
|
|
; CHECK-NEXT: fmov s9, w8
|
|
|
|
; CHECK-NEXT: mov x21, #68719476735
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: csel x10, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: fcmp s8, s9
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: csel x19, x21, x10, gt
|
|
|
|
; CHECK-NEXT: csinv x20, x9, xzr, le
|
|
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
|
|
|
|
; CHECK-NEXT: bl __fixunssfti
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov x2, x20
|
|
|
|
; CHECK-NEXT: mov x3, x19
|
|
|
|
; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: fcmp s0, #0.0
|
|
|
|
; CHECK-NEXT: csel x8, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: fcmp s0, s9
|
|
|
|
; CHECK-NEXT: csinv x8, x8, xzr, le
|
|
|
|
; CHECK-NEXT: csel x1, x21, x9, gt
|
|
|
|
; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: fmov d0, x8
|
|
|
|
; CHECK-NEXT: mov v0.d[1], x1
|
|
|
|
; CHECK-NEXT: fmov x0, d0
|
|
|
|
; CHECK-NEXT: add sp, sp, #64 // =64
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i100> @llvm.fptoui.sat.v2f32.v2i100(<2 x float> %f)
|
|
|
|
ret <2 x i100> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f32_v2i128:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: sub sp, sp, #64 // =64
|
|
|
|
; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
|
|
; CHECK-NEXT: .cfi_offset w20, -16
|
|
|
|
; CHECK-NEXT: .cfi_offset w30, -32
|
|
|
|
; CHECK-NEXT: .cfi_offset b8, -40
|
|
|
|
; CHECK-NEXT: .cfi_offset b9, -48
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
|
|
; CHECK-NEXT: mov s8, v0.s[1]
|
|
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: mov v0.16b, v8.16b
|
|
|
|
; CHECK-NEXT: bl __fixunssfti
|
|
|
|
; CHECK-NEXT: mov w8, #2139095039
|
|
|
|
; CHECK-NEXT: fcmp s8, #0.0
|
|
|
|
; CHECK-NEXT: fmov s9, w8
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: csel x10, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: fcmp s8, s9
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: csinv x19, x10, xzr, le
|
|
|
|
; CHECK-NEXT: csinv x20, x9, xzr, le
|
|
|
|
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
|
|
|
|
; CHECK-NEXT: bl __fixunssfti
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov x2, x19
|
|
|
|
; CHECK-NEXT: mov x3, x20
|
|
|
|
; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: fcmp s0, #0.0
|
|
|
|
; CHECK-NEXT: csel x8, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: fcmp s0, s9
|
|
|
|
; CHECK-NEXT: csinv x8, x8, xzr, le
|
|
|
|
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: csinv x1, x9, xzr, le
|
|
|
|
; CHECK-NEXT: fmov d0, x8
|
|
|
|
; CHECK-NEXT: mov v0.d[1], x1
|
|
|
|
; CHECK-NEXT: fmov x0, d0
|
|
|
|
; CHECK-NEXT: add sp, sp, #64 // =64
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i128> @llvm.fptoui.sat.v2f32.v2i128(<2 x float> %f)
|
|
|
|
ret <2 x i128> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
;
|
|
|
|
; 2-Vector double to unsigned integer -- result size variation
|
|
|
|
;
|
|
|
|
|
|
|
|
declare <2 x i1> @llvm.fptoui.sat.v2f64.v2i1 (<2 x double>)
|
|
|
|
declare <2 x i8> @llvm.fptoui.sat.v2f64.v2i8 (<2 x double>)
|
|
|
|
declare <2 x i13> @llvm.fptoui.sat.v2f64.v2i13 (<2 x double>)
|
|
|
|
declare <2 x i16> @llvm.fptoui.sat.v2f64.v2i16 (<2 x double>)
|
|
|
|
declare <2 x i19> @llvm.fptoui.sat.v2f64.v2i19 (<2 x double>)
|
|
|
|
declare <2 x i50> @llvm.fptoui.sat.v2f64.v2i50 (<2 x double>)
|
|
|
|
declare <2 x i64> @llvm.fptoui.sat.v2f64.v2i64 (<2 x double>)
|
|
|
|
declare <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double>)
|
|
|
|
declare <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double>)
|
|
|
|
|
|
|
|
define <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f64_v2i1:
|
|
|
|
; CHECK: // %bb.0:
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fmov d2, #1.00000000
|
|
|
|
; CHECK-NEXT: mov d3, v0.d[1]
|
|
|
|
; CHECK-NEXT: fmaxnm d0, d0, d1
|
|
|
|
; CHECK-NEXT: fmaxnm d1, d3, d1
|
|
|
|
; CHECK-NEXT: fminnm d0, d0, d2
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d0
|
|
|
|
; CHECK-NEXT: fminnm d1, d1, d2
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i1> @llvm.fptoui.sat.v2f64.v2i1(<2 x double> %f)
|
|
|
|
ret <2 x i1> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i8> @test_unsigned_v2f64_v2i8(<2 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f64_v2i8:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: mov x8, #246290604621824
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: movk x8, #16495, lsl #48
|
|
|
|
; CHECK-NEXT: mov d2, v0.d[1]
|
|
|
|
; CHECK-NEXT: fmaxnm d0, d0, d1
|
|
|
|
; CHECK-NEXT: fmov d3, x8
|
|
|
|
; CHECK-NEXT: fmaxnm d1, d2, d1
|
|
|
|
; CHECK-NEXT: fminnm d0, d0, d3
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d0
|
|
|
|
; CHECK-NEXT: fminnm d1, d1, d3
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i8> @llvm.fptoui.sat.v2f64.v2i8(<2 x double> %f)
|
|
|
|
ret <2 x i8> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i13> @test_unsigned_v2f64_v2i13(<2 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f64_v2i13:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: mov x8, #280375465082880
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: movk x8, #16575, lsl #48
|
|
|
|
; CHECK-NEXT: mov d2, v0.d[1]
|
|
|
|
; CHECK-NEXT: fmaxnm d0, d0, d1
|
|
|
|
; CHECK-NEXT: fmov d3, x8
|
|
|
|
; CHECK-NEXT: fmaxnm d1, d2, d1
|
|
|
|
; CHECK-NEXT: fminnm d0, d0, d3
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d0
|
|
|
|
; CHECK-NEXT: fminnm d1, d1, d3
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i13> @llvm.fptoui.sat.v2f64.v2i13(<2 x double> %f)
|
|
|
|
ret <2 x i13> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i16> @test_unsigned_v2f64_v2i16(<2 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f64_v2i16:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: mov x8, #281337537757184
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: movk x8, #16623, lsl #48
|
|
|
|
; CHECK-NEXT: mov d2, v0.d[1]
|
|
|
|
; CHECK-NEXT: fmaxnm d0, d0, d1
|
|
|
|
; CHECK-NEXT: fmov d3, x8
|
|
|
|
; CHECK-NEXT: fmaxnm d1, d2, d1
|
|
|
|
; CHECK-NEXT: fminnm d0, d0, d3
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d0
|
|
|
|
; CHECK-NEXT: fminnm d1, d1, d3
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i16> @llvm.fptoui.sat.v2f64.v2i16(<2 x double> %f)
|
|
|
|
ret <2 x i16> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f64_v2i19:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: mov x8, #281457796841472
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: movk x8, #16671, lsl #48
|
|
|
|
; CHECK-NEXT: mov d2, v0.d[1]
|
|
|
|
; CHECK-NEXT: fmaxnm d0, d0, d1
|
|
|
|
; CHECK-NEXT: fmov d3, x8
|
|
|
|
; CHECK-NEXT: fmaxnm d1, d2, d1
|
|
|
|
; CHECK-NEXT: fminnm d0, d0, d3
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d0
|
|
|
|
; CHECK-NEXT: fminnm d1, d1, d3
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i19> @llvm.fptoui.sat.v2f64.v2i19(<2 x double> %f)
|
|
|
|
ret <2 x i19> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f64_v2i32_duplicate:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov d1, v0.d[1]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, d1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i32> @llvm.fptoui.sat.v2f64.v2i32(<2 x double> %f)
|
|
|
|
ret <2 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f64_v2i50:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: mov x8, #-8
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: movk x8, #17167, lsl #48
|
|
|
|
; CHECK-NEXT: mov d2, v0.d[1]
|
|
|
|
; CHECK-NEXT: fmaxnm d0, d0, d1
|
|
|
|
; CHECK-NEXT: fmov d3, x8
|
|
|
|
; CHECK-NEXT: fmaxnm d1, d2, d1
|
|
|
|
; CHECK-NEXT: fminnm d0, d0, d3
|
|
|
|
; CHECK-NEXT: fcvtzu x8, d0
|
|
|
|
; CHECK-NEXT: fminnm d1, d1, d3
|
|
|
|
; CHECK-NEXT: fmov d0, x8
|
|
|
|
; CHECK-NEXT: fcvtzu x8, d1
|
|
|
|
; CHECK-NEXT: mov v0.d[1], x8
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i50> @llvm.fptoui.sat.v2f64.v2i50(<2 x double> %f)
|
|
|
|
ret <2 x i50> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i64> @test_unsigned_v2f64_v2i64(<2 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f64_v2i64:
|
|
|
|
; CHECK: // %bb.0:
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvtzu x8, d0
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov d1, v0.d[1]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fmov d0, x8
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu x8, d1
|
|
|
|
; CHECK-NEXT: mov v0.d[1], x8
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i64> @llvm.fptoui.sat.v2f64.v2i64(<2 x double> %f)
|
|
|
|
ret <2 x i64> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f64_v2i100:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: sub sp, sp, #64 // =64
|
|
|
|
; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x30, x21, [sp, #32] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
|
|
; CHECK-NEXT: .cfi_offset w20, -16
|
|
|
|
; CHECK-NEXT: .cfi_offset w21, -24
|
|
|
|
; CHECK-NEXT: .cfi_offset w30, -32
|
|
|
|
; CHECK-NEXT: .cfi_offset b8, -40
|
|
|
|
; CHECK-NEXT: .cfi_offset b9, -48
|
|
|
|
; CHECK-NEXT: mov d8, v0.d[1]
|
|
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: mov v0.16b, v8.16b
|
|
|
|
; CHECK-NEXT: bl __fixunsdfti
|
|
|
|
; CHECK-NEXT: mov x8, #5057542381537067007
|
|
|
|
; CHECK-NEXT: fcmp d8, #0.0
|
|
|
|
; CHECK-NEXT: fmov d9, x8
|
|
|
|
; CHECK-NEXT: mov x21, #68719476735
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: csel x10, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: fcmp d8, d9
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: csel x19, x21, x10, gt
|
|
|
|
; CHECK-NEXT: csinv x20, x9, xzr, le
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: bl __fixunsdfti
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov x2, x20
|
|
|
|
; CHECK-NEXT: mov x3, x19
|
|
|
|
; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: fcmp d0, #0.0
|
|
|
|
; CHECK-NEXT: csel x8, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: fcmp d0, d9
|
|
|
|
; CHECK-NEXT: csinv x8, x8, xzr, le
|
|
|
|
; CHECK-NEXT: csel x1, x21, x9, gt
|
|
|
|
; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: fmov d0, x8
|
|
|
|
; CHECK-NEXT: mov v0.d[1], x1
|
|
|
|
; CHECK-NEXT: fmov x0, d0
|
|
|
|
; CHECK-NEXT: add sp, sp, #64 // =64
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double> %f)
|
|
|
|
ret <2 x i100> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v2f64_v2i128:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: sub sp, sp, #64 // =64
|
|
|
|
; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 64
|
|
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
|
|
; CHECK-NEXT: .cfi_offset w20, -16
|
|
|
|
; CHECK-NEXT: .cfi_offset w30, -32
|
|
|
|
; CHECK-NEXT: .cfi_offset b8, -40
|
|
|
|
; CHECK-NEXT: .cfi_offset b9, -48
|
|
|
|
; CHECK-NEXT: mov d8, v0.d[1]
|
|
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: mov v0.16b, v8.16b
|
|
|
|
; CHECK-NEXT: bl __fixunsdfti
|
|
|
|
; CHECK-NEXT: mov x8, #5183643171103440895
|
|
|
|
; CHECK-NEXT: fcmp d8, #0.0
|
|
|
|
; CHECK-NEXT: fmov d9, x8
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: csel x10, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: fcmp d8, d9
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: csinv x19, x10, xzr, le
|
|
|
|
; CHECK-NEXT: csinv x20, x9, xzr, le
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: bl __fixunsdfti
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov x2, x19
|
|
|
|
; CHECK-NEXT: mov x3, x20
|
|
|
|
; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: fcmp d0, #0.0
|
|
|
|
; CHECK-NEXT: csel x8, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: fcmp d0, d9
|
|
|
|
; CHECK-NEXT: csinv x8, x8, xzr, le
|
|
|
|
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: csinv x1, x9, xzr, le
|
|
|
|
; CHECK-NEXT: fmov d0, x8
|
|
|
|
; CHECK-NEXT: mov v0.d[1], x1
|
|
|
|
; CHECK-NEXT: fmov x0, d0
|
|
|
|
; CHECK-NEXT: add sp, sp, #64 // =64
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double> %f)
|
|
|
|
ret <2 x i128> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
;
|
|
|
|
; 4-Vector half to unsigned integer -- result size variation
|
|
|
|
;
|
|
|
|
|
|
|
|
declare <4 x i1> @llvm.fptoui.sat.v4f16.v4i1 (<4 x half>)
|
|
|
|
declare <4 x i8> @llvm.fptoui.sat.v4f16.v4i8 (<4 x half>)
|
|
|
|
declare <4 x i13> @llvm.fptoui.sat.v4f16.v4i13 (<4 x half>)
|
|
|
|
declare <4 x i16> @llvm.fptoui.sat.v4f16.v4i16 (<4 x half>)
|
|
|
|
declare <4 x i19> @llvm.fptoui.sat.v4f16.v4i19 (<4 x half>)
|
|
|
|
declare <4 x i50> @llvm.fptoui.sat.v4f16.v4i50 (<4 x half>)
|
|
|
|
declare <4 x i64> @llvm.fptoui.sat.v4f16.v4i64 (<4 x half>)
|
|
|
|
declare <4 x i100> @llvm.fptoui.sat.v4f16.v4i100(<4 x half>)
|
|
|
|
declare <4 x i128> @llvm.fptoui.sat.v4f16.v4i128(<4 x half>)
|
|
|
|
|
|
|
|
define <4 x i1> @test_unsigned_v4f16_v4i1(<4 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v4f16_v4i1:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
|
|
; CHECK-NEXT: fcvt s1, h0
|
|
|
|
; CHECK-NEXT: mov h3, v0.h[1]
|
|
|
|
; CHECK-NEXT: mov h4, v0.h[2]
|
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d2, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvt s3, h3
|
|
|
|
; CHECK-NEXT: fcvt s4, h4
|
|
|
|
; CHECK-NEXT: fcvt s0, h0
|
|
|
|
; CHECK-NEXT: fmaxnm s1, s1, s2
|
|
|
|
; CHECK-NEXT: fmaxnm s3, s3, s2
|
|
|
|
; CHECK-NEXT: fmaxnm s4, s4, s2
|
|
|
|
; CHECK-NEXT: fmaxnm s0, s0, s2
|
|
|
|
; CHECK-NEXT: fmov s2, #1.00000000
|
|
|
|
; CHECK-NEXT: fminnm s1, s1, s2
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: fminnm s1, s3, s2
|
|
|
|
; CHECK-NEXT: fminnm s3, s4, s2
|
|
|
|
; CHECK-NEXT: fminnm s2, s0, s2
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: mov v0.h[1], w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s3
|
|
|
|
; CHECK-NEXT: mov v0.h[2], w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s2
|
|
|
|
; CHECK-NEXT: mov v0.h[3], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <4 x i1> @llvm.fptoui.sat.v4f16.v4i1(<4 x half> %f)
|
|
|
|
ret <4 x i1> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i8> @test_unsigned_v4f16_v4i8(<4 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v4f16_v4i8:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
|
|
; CHECK-NEXT: fcvt s1, h0
|
|
|
|
; CHECK-NEXT: mov h3, v0.h[1]
|
|
|
|
; CHECK-NEXT: mov h4, v0.h[2]
|
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d2, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov w8, #1132396544
|
|
|
|
; CHECK-NEXT: fcvt s3, h3
|
|
|
|
; CHECK-NEXT: fcvt s4, h4
|
|
|
|
; CHECK-NEXT: fcvt s0, h0
|
|
|
|
; CHECK-NEXT: fmaxnm s1, s1, s2
|
|
|
|
; CHECK-NEXT: fmaxnm s3, s3, s2
|
|
|
|
; CHECK-NEXT: fmaxnm s4, s4, s2
|
|
|
|
; CHECK-NEXT: fmaxnm s0, s0, s2
|
|
|
|
; CHECK-NEXT: fmov s2, w8
|
|
|
|
; CHECK-NEXT: fminnm s1, s1, s2
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: fminnm s1, s3, s2
|
|
|
|
; CHECK-NEXT: fminnm s3, s4, s2
|
|
|
|
; CHECK-NEXT: fminnm s2, s0, s2
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: mov v0.h[1], w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s3
|
|
|
|
; CHECK-NEXT: mov v0.h[2], w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s2
|
|
|
|
; CHECK-NEXT: mov v0.h[3], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <4 x i8> @llvm.fptoui.sat.v4f16.v4i8(<4 x half> %f)
|
|
|
|
ret <4 x i8> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i13> @test_unsigned_v4f16_v4i13(<4 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v4f16_v4i13:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
|
|
; CHECK-NEXT: fcvt s1, h0
|
|
|
|
; CHECK-NEXT: mov w8, #63488
|
|
|
|
; CHECK-NEXT: mov h3, v0.h[1]
|
|
|
|
; CHECK-NEXT: mov h4, v0.h[2]
|
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d2, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: movk w8, #17919, lsl #16
|
|
|
|
; CHECK-NEXT: fcvt s3, h3
|
|
|
|
; CHECK-NEXT: fcvt s4, h4
|
|
|
|
; CHECK-NEXT: fcvt s0, h0
|
|
|
|
; CHECK-NEXT: fmaxnm s1, s1, s2
|
|
|
|
; CHECK-NEXT: fmaxnm s3, s3, s2
|
|
|
|
; CHECK-NEXT: fmaxnm s4, s4, s2
|
|
|
|
; CHECK-NEXT: fmaxnm s0, s0, s2
|
|
|
|
; CHECK-NEXT: fmov s2, w8
|
|
|
|
; CHECK-NEXT: fminnm s1, s1, s2
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: fminnm s1, s3, s2
|
|
|
|
; CHECK-NEXT: fminnm s3, s4, s2
|
|
|
|
; CHECK-NEXT: fminnm s2, s0, s2
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: mov v0.h[1], w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s3
|
|
|
|
; CHECK-NEXT: mov v0.h[2], w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s2
|
|
|
|
; CHECK-NEXT: mov v0.h[3], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <4 x i13> @llvm.fptoui.sat.v4f16.v4i13(<4 x half> %f)
|
|
|
|
ret <4 x i13> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i16> @test_unsigned_v4f16_v4i16(<4 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v4f16_v4i16:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
|
|
; CHECK-NEXT: fcvt s1, h0
|
|
|
|
; CHECK-NEXT: mov w8, #65280
|
|
|
|
; CHECK-NEXT: mov h3, v0.h[1]
|
|
|
|
; CHECK-NEXT: mov h4, v0.h[2]
|
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d2, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: movk w8, #18303, lsl #16
|
|
|
|
; CHECK-NEXT: fcvt s3, h3
|
|
|
|
; CHECK-NEXT: fcvt s4, h4
|
|
|
|
; CHECK-NEXT: fcvt s0, h0
|
|
|
|
; CHECK-NEXT: fmaxnm s1, s1, s2
|
|
|
|
; CHECK-NEXT: fmaxnm s3, s3, s2
|
|
|
|
; CHECK-NEXT: fmaxnm s4, s4, s2
|
|
|
|
; CHECK-NEXT: fmaxnm s0, s0, s2
|
|
|
|
; CHECK-NEXT: fmov s2, w8
|
|
|
|
; CHECK-NEXT: fminnm s1, s1, s2
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: fminnm s1, s3, s2
|
|
|
|
; CHECK-NEXT: fminnm s3, s4, s2
|
|
|
|
; CHECK-NEXT: fminnm s2, s0, s2
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: mov v0.h[1], w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s3
|
|
|
|
; CHECK-NEXT: mov v0.h[2], w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s2
|
|
|
|
; CHECK-NEXT: mov v0.h[3], w8
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <4 x i16> @llvm.fptoui.sat.v4f16.v4i16(<4 x half> %f)
|
|
|
|
ret <4 x i16> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i19> @test_unsigned_v4f16_v4i19(<4 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v4f16_v4i19:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
|
|
; CHECK-NEXT: fcvt s1, h0
|
|
|
|
; CHECK-NEXT: mov w8, #65504
|
|
|
|
; CHECK-NEXT: mov h3, v0.h[1]
|
|
|
|
; CHECK-NEXT: mov h4, v0.h[2]
|
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
2021-04-06 15:53:42 +08:00
|
|
|
; CHECK-NEXT: movi d2, #0000000000000000
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: movk w8, #18687, lsl #16
|
|
|
|
; CHECK-NEXT: fcvt s3, h3
|
|
|
|
; CHECK-NEXT: fcvt s4, h4
|
|
|
|
; CHECK-NEXT: fcvt s0, h0
|
|
|
|
; CHECK-NEXT: fmaxnm s1, s1, s2
|
|
|
|
; CHECK-NEXT: fmaxnm s3, s3, s2
|
|
|
|
; CHECK-NEXT: fmaxnm s4, s4, s2
|
|
|
|
; CHECK-NEXT: fmaxnm s0, s0, s2
|
|
|
|
; CHECK-NEXT: fmov s2, w8
|
|
|
|
; CHECK-NEXT: fminnm s1, s1, s2
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: fminnm s1, s3, s2
|
|
|
|
; CHECK-NEXT: fminnm s3, s4, s2
|
|
|
|
; CHECK-NEXT: fminnm s2, s0, s2
|
|
|
|
; CHECK-NEXT: fmov s0, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
|
|
|
; CHECK-NEXT: mov v0.s[1], w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s3
|
|
|
|
; CHECK-NEXT: mov v0.s[2], w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s2
|
|
|
|
; CHECK-NEXT: mov v0.s[3], w8
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <4 x i19> @llvm.fptoui.sat.v4f16.v4i19(<4 x half> %f)
|
|
|
|
ret <4 x i19> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i32> @test_unsigned_v4f16_v4i32_duplicate(<4 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v4f16_v4i32_duplicate:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s1, h0
|
|
|
|
; CHECK-NEXT: mov h2, v0.h[1]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s1
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s2, h2
|
|
|
|
; CHECK-NEXT: fmov s1, w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s2
|
|
|
|
; CHECK-NEXT: mov h2, v0.h[2]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s2, h2
|
|
|
|
; CHECK-NEXT: fcvt s0, h0
|
|
|
|
; CHECK-NEXT: mov v1.s[1], w8
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu w8, s2
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov v1.s[2], w8
|
|
|
|
; CHECK-NEXT: fcvtzu w8, s0
|
|
|
|
; CHECK-NEXT: mov v1.s[3], w8
|
|
|
|
; CHECK-NEXT: mov v0.16b, v1.16b
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <4 x i32> @llvm.fptoui.sat.v4f16.v4i32(<4 x half> %f)
|
|
|
|
ret <4 x i32> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i50> @test_unsigned_v4f16_v4i50(<4 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v4f16_v4i50:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
|
|
; CHECK-NEXT: fcvt s1, h0
|
|
|
|
; CHECK-NEXT: mov w8, #1484783615
|
|
|
|
; CHECK-NEXT: fcvtzu x10, s1
|
|
|
|
; CHECK-NEXT: fcmp s1, #0.0
|
|
|
|
; CHECK-NEXT: fmov s2, w8
|
|
|
|
; CHECK-NEXT: csel x8, xzr, x10, lt
|
|
|
|
; CHECK-NEXT: fcmp s1, s2
|
|
|
|
; CHECK-NEXT: mov h1, v0.h[1]
|
|
|
|
; CHECK-NEXT: mov x9, #1125899906842623
|
|
|
|
; CHECK-NEXT: fcvt s1, h1
|
|
|
|
; CHECK-NEXT: fcvtzu x10, s1
|
|
|
|
; CHECK-NEXT: csel x0, x9, x8, gt
|
|
|
|
; CHECK-NEXT: fcmp s1, #0.0
|
|
|
|
; CHECK-NEXT: csel x8, xzr, x10, lt
|
|
|
|
; CHECK-NEXT: fcmp s1, s2
|
|
|
|
; CHECK-NEXT: mov h1, v0.h[2]
|
|
|
|
; CHECK-NEXT: fcvt s1, h1
|
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
|
|
|
; CHECK-NEXT: fcvtzu x10, s1
|
|
|
|
; CHECK-NEXT: csel x1, x9, x8, gt
|
|
|
|
; CHECK-NEXT: fcmp s1, #0.0
|
|
|
|
; CHECK-NEXT: fcvt s0, h0
|
|
|
|
; CHECK-NEXT: csel x8, xzr, x10, lt
|
|
|
|
; CHECK-NEXT: fcmp s1, s2
|
|
|
|
; CHECK-NEXT: fcvtzu x11, s0
|
|
|
|
; CHECK-NEXT: csel x2, x9, x8, gt
|
|
|
|
; CHECK-NEXT: fcmp s0, #0.0
|
|
|
|
; CHECK-NEXT: csel x8, xzr, x11, lt
|
|
|
|
; CHECK-NEXT: fcmp s0, s2
|
|
|
|
; CHECK-NEXT: csel x3, x9, x8, gt
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <4 x i50> @llvm.fptoui.sat.v4f16.v4i50(<4 x half> %f)
|
|
|
|
ret <4 x i50> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v4f16_v4i64:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s1, h0
|
|
|
|
; CHECK-NEXT: mov h2, v0.h[1]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvtzu x8, s1
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: fcvt s1, h2
|
|
|
|
; CHECK-NEXT: fmov d2, x8
|
|
|
|
; CHECK-NEXT: fcvtzu x8, s1
|
|
|
|
; CHECK-NEXT: mov h1, v0.h[2]
|
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: fcvt s1, h1
|
2021-04-27 17:12:11 +08:00
|
|
|
; CHECK-NEXT: mov v2.d[1], x8
|
|
|
|
; CHECK-NEXT: fcvtzu x8, s1
|
|
|
|
; CHECK-NEXT: fcvt s0, h0
|
|
|
|
; CHECK-NEXT: fmov d1, x8
|
|
|
|
; CHECK-NEXT: fcvtzu x8, s0
|
|
|
|
; CHECK-NEXT: mov v1.d[1], x8
|
|
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
Add intrinsics for saturating float to int casts
This patch adds support for the fptoui.sat and fptosi.sat intrinsics,
which provide basically the same functionality as the existing fptoui
and fptosi instructions, but will saturate (or return 0 for NaN) on
values unrepresentable in the target type, instead of returning
poison. Related mailing list discussion can be found at:
https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ
The intrinsics have overloaded source and result type and support
vector operands:
i32 @llvm.fptoui.sat.i32.f32(float %f)
i100 @llvm.fptoui.sat.i100.f64(double %f)
<4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f)
// etc
On the SelectionDAG layer two new ISD opcodes are added,
FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands
and one result. The second operand is an integer constant specifying
the scalar saturation width. The idea here is that initially the
second operand and the scalar width of the result type are the same,
but they may change during type legalization. For example:
i19 @llvm.fptsi.sat.i19.f32(float %f)
// builds
i19 fp_to_sint_sat f, 19
// type legalizes (through integer result promotion)
i32 fp_to_sint_sat f, 19
I went for this approach, because saturated conversion does not
compose well. There is no good way of "adjusting" a saturating
conversion to i32 into one to i19 short of saturating twice.
Specifying the saturation width separately allows directly saturating
to the correct width.
There are two baseline expansions for the fp_to_xint_sat opcodes. If
the integer bounds can be exactly represented in the float type and
fminnum/fmaxnum are legal, we can expand to something like:
f = fmaxnum f, FP(MIN)
f = fminnum f, FP(MAX)
i = fptoxi f
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
If the bounds cannot be exactly represented, we expand to something
like this instead:
i = fptoxi f
i = select f ult FP(MIN), MIN, i
i = select f ogt FP(MAX), MAX, i
i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN
It should be noted that this expansion assumes a non-trapping fptoxi.
Initial tests are for AArch64, x86_64 and ARM. This exercises all of
the scalar and vector legalization. ARM is included to test float
softening.
Original patch by @nikic and @ebevhan (based on D54696).
Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <4 x i64> @llvm.fptoui.sat.v4f16.v4i64(<4 x half> %f)
|
|
|
|
ret <4 x i64> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v4f16_v4i100:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: sub sp, sp, #96 // =96
|
|
|
|
; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x30, x25, [sp, #32] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 96
|
|
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
|
|
; CHECK-NEXT: .cfi_offset w20, -16
|
|
|
|
; CHECK-NEXT: .cfi_offset w21, -24
|
|
|
|
; CHECK-NEXT: .cfi_offset w22, -32
|
|
|
|
; CHECK-NEXT: .cfi_offset w23, -40
|
|
|
|
; CHECK-NEXT: .cfi_offset w24, -48
|
|
|
|
; CHECK-NEXT: .cfi_offset w25, -56
|
|
|
|
; CHECK-NEXT: .cfi_offset w30, -64
|
|
|
|
; CHECK-NEXT: .cfi_offset b8, -72
|
|
|
|
; CHECK-NEXT: .cfi_offset b9, -80
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
|
|
; CHECK-NEXT: mov h1, v0.h[2]
|
|
|
|
; CHECK-NEXT: fcvt s8, h1
|
|
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: mov v0.16b, v8.16b
|
|
|
|
; CHECK-NEXT: bl __fixunssfti
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov w8, #1904214015
|
|
|
|
; CHECK-NEXT: fcmp s8, #0.0
|
|
|
|
; CHECK-NEXT: fmov s9, w8
|
|
|
|
; CHECK-NEXT: mov h0, v0.h[1]
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: csel x10, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: fcmp s8, s9
|
|
|
|
; CHECK-NEXT: fcvt s8, h0
|
|
|
|
; CHECK-NEXT: mov x25, #68719476735
|
|
|
|
; CHECK-NEXT: mov v0.16b, v8.16b
|
|
|
|
; CHECK-NEXT: csel x19, x25, x10, gt
|
|
|
|
; CHECK-NEXT: csinv x20, x9, xzr, le
|
|
|
|
; CHECK-NEXT: bl __fixunssfti
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: fcmp s8, #0.0
|
|
|
|
; CHECK-NEXT: csel x8, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
|
|
|
; CHECK-NEXT: fcmp s8, s9
|
|
|
|
; CHECK-NEXT: fcvt s8, h0
|
|
|
|
; CHECK-NEXT: mov v0.16b, v8.16b
|
|
|
|
; CHECK-NEXT: csel x21, x25, x9, gt
|
|
|
|
; CHECK-NEXT: csinv x22, x8, xzr, le
|
|
|
|
; CHECK-NEXT: bl __fixunssfti
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: fcmp s8, #0.0
|
|
|
|
; CHECK-NEXT: csel x8, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: fcmp s8, s9
|
|
|
|
; CHECK-NEXT: fcvt s8, h0
|
|
|
|
; CHECK-NEXT: mov v0.16b, v8.16b
|
|
|
|
; CHECK-NEXT: csel x23, x25, x9, gt
|
|
|
|
; CHECK-NEXT: csinv x24, x8, xzr, le
|
|
|
|
; CHECK-NEXT: bl __fixunssfti
|
|
|
|
; CHECK-NEXT: fcmp s8, #0.0
|
|
|
|
; CHECK-NEXT: csel x8, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: fcmp s8, s9
|
|
|
|
; CHECK-NEXT: csinv x8, x8, xzr, le
|
|
|
|
; CHECK-NEXT: csel x1, x25, x9, gt
|
|
|
|
; CHECK-NEXT: mov x2, x22
|
|
|
|
; CHECK-NEXT: mov x3, x21
|
|
|
|
; CHECK-NEXT: mov x4, x20
|
|
|
|
; CHECK-NEXT: mov x5, x19
|
|
|
|
; CHECK-NEXT: mov x6, x24
|
|
|
|
; CHECK-NEXT: mov x7, x23
|
|
|
|
; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldp x30, x25, [sp, #32] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: fmov d0, x8
|
|
|
|
; CHECK-NEXT: mov v0.d[1], x1
|
|
|
|
; CHECK-NEXT: fmov x0, d0
|
|
|
|
; CHECK-NEXT: add sp, sp, #96 // =96
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <4 x i100> @llvm.fptoui.sat.v4f16.v4i100(<4 x half> %f)
|
|
|
|
ret <4 x i100> %x
|
|
|
|
}
|
|
|
|
|
|
|
|
define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
|
|
|
|
; CHECK-LABEL: test_unsigned_v4f16_v4i128:
|
|
|
|
; CHECK: // %bb.0:
|
|
|
|
; CHECK-NEXT: sub sp, sp, #96 // =96
|
|
|
|
; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 96
|
|
|
|
; CHECK-NEXT: .cfi_offset w19, -8
|
|
|
|
; CHECK-NEXT: .cfi_offset w20, -16
|
|
|
|
; CHECK-NEXT: .cfi_offset w21, -24
|
|
|
|
; CHECK-NEXT: .cfi_offset w22, -32
|
|
|
|
; CHECK-NEXT: .cfi_offset w23, -40
|
|
|
|
; CHECK-NEXT: .cfi_offset w24, -48
|
|
|
|
; CHECK-NEXT: .cfi_offset w30, -64
|
|
|
|
; CHECK-NEXT: .cfi_offset b8, -72
|
|
|
|
; CHECK-NEXT: .cfi_offset b9, -80
|
|
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
|
|
; CHECK-NEXT: mov h1, v0.h[1]
|
|
|
|
; CHECK-NEXT: fcvt s8, h1
|
|
|
|
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
|
|
|
|
; CHECK-NEXT: mov v0.16b, v8.16b
|
|
|
|
; CHECK-NEXT: bl __fixunssfti
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: mov w8, #2139095039
|
|
|
|
; CHECK-NEXT: fcmp s8, #0.0
|
|
|
|
; CHECK-NEXT: fmov s9, w8
|
|
|
|
; CHECK-NEXT: mov h0, v0.h[2]
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: csel x10, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: fcmp s8, s9
|
|
|
|
; CHECK-NEXT: fcvt s8, h0
|
|
|
|
; CHECK-NEXT: mov v0.16b, v8.16b
|
|
|
|
; CHECK-NEXT: csinv x19, x10, xzr, le
|
|
|
|
; CHECK-NEXT: csinv x20, x9, xzr, le
|
|
|
|
; CHECK-NEXT: bl __fixunssfti
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: fcmp s8, #0.0
|
|
|
|
; CHECK-NEXT: csel x8, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: mov h0, v0.h[3]
|
|
|
|
; CHECK-NEXT: fcmp s8, s9
|
|
|
|
; CHECK-NEXT: fcvt s8, h0
|
|
|
|
; CHECK-NEXT: mov v0.16b, v8.16b
|
|
|
|
; CHECK-NEXT: csinv x21, x9, xzr, le
|
|
|
|
; CHECK-NEXT: csinv x22, x8, xzr, le
|
|
|
|
; CHECK-NEXT: bl __fixunssfti
|
|
|
|
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: fcmp s8, #0.0
|
|
|
|
; CHECK-NEXT: csel x8, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: fcmp s8, s9
|
|
|
|
; CHECK-NEXT: fcvt s8, h0
|
|
|
|
; CHECK-NEXT: mov v0.16b, v8.16b
|
|
|
|
; CHECK-NEXT: csinv x23, x9, xzr, le
|
|
|
|
; CHECK-NEXT: csinv x24, x8, xzr, le
|
|
|
|
; CHECK-NEXT: bl __fixunssfti
|
|
|
|
; CHECK-NEXT: fcmp s8, #0.0
|
|
|
|
; CHECK-NEXT: csel x8, xzr, x0, lt
|
|
|
|
; CHECK-NEXT: csel x9, xzr, x1, lt
|
|
|
|
; CHECK-NEXT: fcmp s8, s9
|
|
|
|
; CHECK-NEXT: csinv x8, x8, xzr, le
|
|
|
|
; CHECK-NEXT: mov x2, x19
|
|
|
|
; CHECK-NEXT: mov x3, x20
|
|
|
|
; CHECK-NEXT: mov x4, x21
|
|
|
|
; CHECK-NEXT: mov x5, x22
|
|
|
|
; CHECK-NEXT: mov x6, x23
|
|
|
|
; CHECK-NEXT: mov x7, x24
|
|
|
|
; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
|
|
|
|
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
|
|
|
|
; CHECK-NEXT: csinv x1, x9, xzr, le
|
|
|
|
; CHECK-NEXT: fmov d0, x8
|
|
|
|
; CHECK-NEXT: mov v0.d[1], x1
|
|
|
|
; CHECK-NEXT: fmov x0, d0
|
|
|
|
; CHECK-NEXT: add sp, sp, #96 // =96
|
|
|
|
; CHECK-NEXT: ret
|
|
|
|
%x = call <4 x i128> @llvm.fptoui.sat.v4f16.v4i128(<4 x half> %f)
|
|
|
|
ret <4 x i128> %x
|
|
|
|
}
|
|
|
|
|