llvm-project/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

2197 lines
76 KiB
LLVM
Raw Normal View History

Add intrinsics for saturating float to int casts This patch adds support for the fptoui.sat and fptosi.sat intrinsics, which provide basically the same functionality as the existing fptoui and fptosi instructions, but will saturate (or return 0 for NaN) on values unrepresentable in the target type, instead of returning poison. Related mailing list discussion can be found at: https://groups.google.com/d/msg/llvm-dev/cgDFaBmCnDQ/CZAIMj4IBAAJ The intrinsics have overloaded source and result type and support vector operands: i32 @llvm.fptoui.sat.i32.f32(float %f) i100 @llvm.fptoui.sat.i100.f64(double %f) <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(half %f) // etc On the SelectionDAG layer two new ISD opcodes are added, FP_TO_UINT_SAT and FP_TO_SINT_SAT. These opcodes have two operands and one result. The second operand is an integer constant specifying the scalar saturation width. The idea here is that initially the second operand and the scalar width of the result type are the same, but they may change during type legalization. For example: i19 @llvm.fptsi.sat.i19.f32(float %f) // builds i19 fp_to_sint_sat f, 19 // type legalizes (through integer result promotion) i32 fp_to_sint_sat f, 19 I went for this approach, because saturated conversion does not compose well. There is no good way of "adjusting" a saturating conversion to i32 into one to i19 short of saturating twice. Specifying the saturation width separately allows directly saturating to the correct width. There are two baseline expansions for the fp_to_xint_sat opcodes. If the integer bounds can be exactly represented in the float type and fminnum/fmaxnum are legal, we can expand to something like: f = fmaxnum f, FP(MIN) f = fminnum f, FP(MAX) i = fptoxi f i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN If the bounds cannot be exactly represented, we expand to something like this instead: i = fptoxi f i = select f ult FP(MIN), MIN, i i = select f ogt FP(MAX), MAX, i i = select f uo f, 0, i # unnecessary if unsigned as 0 = MIN It should be noted that this expansion assumes a non-trapping fptoxi. Initial tests are for AArch64, x86_64 and ARM. This exercises all of the scalar and vector legalization. ARM is included to test float softening. Original patch by @nikic and @ebevhan (based on D54696). Differential Revision: https://reviews.llvm.org/D54749
2020-12-18 04:33:32 +08:00
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
;
; Float to unsigned 32-bit -- Vector size variation
;
declare <1 x i32> @llvm.fptoui.sat.v1f32.v1i32 (<1 x float>)
declare <2 x i32> @llvm.fptoui.sat.v2f32.v2i32 (<2 x float>)
declare <3 x i32> @llvm.fptoui.sat.v3f32.v3i32 (<3 x float>)
declare <4 x i32> @llvm.fptoui.sat.v4f32.v4i32 (<4 x float>)
declare <5 x i32> @llvm.fptoui.sat.v5f32.v5i32 (<5 x float>)
declare <6 x i32> @llvm.fptoui.sat.v6f32.v6i32 (<6 x float>)
declare <7 x i32> @llvm.fptoui.sat.v7f32.v7i32 (<7 x float>)
declare <8 x i32> @llvm.fptoui.sat.v8f32.v8i32 (<8 x float>)
define <1 x i32> @test_unsigned_v1f32_v1i32(<1 x float> %f) {
; CHECK-LABEL: test_unsigned_v1f32_v1i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: mov w8, #1333788671
; CHECK-NEXT: fmov s2, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: fcvtzu w9, s0
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s0, s2
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fmov s0, w9
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <1 x i32> @llvm.fptoui.sat.v1f32.v1i32(<1 x float> %f)
ret <1 x i32> %x
}
define <2 x i32> @test_unsigned_v2f32_v2i32(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: mov w8, #1333788671
; CHECK-NEXT: fmov s2, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: fcvtzu w9, s0
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s0, s2
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fmov s0, w9
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <2 x i32> @llvm.fptoui.sat.v2f32.v2i32(<2 x float> %f)
ret <2 x i32> %x
}
define <3 x i32> @test_unsigned_v3f32_v3i32(<3 x float> %f) {
; CHECK-LABEL: test_unsigned_v3f32_v3i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: mov w8, #1333788671
; CHECK-NEXT: fmov s3, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: fcvtzu w9, s0
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s0, s3
; CHECK-NEXT: mov s2, v0.s[2]
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: mov s1, v0.s[3]
; CHECK-NEXT: fmov s0, w9
; CHECK-NEXT: fcvtzu w9, s2
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: mov v0.s[2], w9
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: mov v0.s[3], w8
; CHECK-NEXT: ret
%x = call <3 x i32> @llvm.fptoui.sat.v3f32.v3i32(<3 x float> %f)
ret <3 x i32> %x
}
define <4 x i32> @test_unsigned_v4f32_v4i32(<4 x float> %f) {
; CHECK-LABEL: test_unsigned_v4f32_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: mov w8, #1333788671
; CHECK-NEXT: fmov s3, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: fcvtzu w9, s0
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s0, s3
; CHECK-NEXT: mov s2, v0.s[2]
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: mov s1, v0.s[3]
; CHECK-NEXT: fmov s0, w9
; CHECK-NEXT: fcvtzu w9, s2
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: mov v0.s[2], w9
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: mov v0.s[3], w8
; CHECK-NEXT: ret
%x = call <4 x i32> @llvm.fptoui.sat.v4f32.v4i32(<4 x float> %f)
ret <4 x i32> %x
}
define <5 x i32> @test_unsigned_v5f32_v5i32(<5 x float> %f) {
; CHECK-LABEL: test_unsigned_v5f32_v5i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w9, #1333788671
; CHECK-NEXT: fcvtzu w8, s0
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: fmov s5, w9
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s0, s5
; CHECK-NEXT: fcvtzu w10, s1
; CHECK-NEXT: csinv w0, w8, wzr, le
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w8, wzr, w10, lt
; CHECK-NEXT: fcmp s1, s5
; CHECK-NEXT: fcvtzu w11, s2
; CHECK-NEXT: csinv w1, w8, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: csel w8, wzr, w11, lt
; CHECK-NEXT: fcmp s2, s5
; CHECK-NEXT: fcvtzu w12, s3
; CHECK-NEXT: csinv w2, w8, wzr, le
; CHECK-NEXT: fcmp s3, #0.0
; CHECK-NEXT: csel w8, wzr, w12, lt
; CHECK-NEXT: fcmp s3, s5
; CHECK-NEXT: fcvtzu w9, s4
; CHECK-NEXT: csinv w3, w8, wzr, le
; CHECK-NEXT: fcmp s4, #0.0
; CHECK-NEXT: csel w8, wzr, w9, lt
; CHECK-NEXT: fcmp s4, s5
; CHECK-NEXT: csinv w4, w8, wzr, le
; CHECK-NEXT: ret
%x = call <5 x i32> @llvm.fptoui.sat.v5f32.v5i32(<5 x float> %f)
ret <5 x i32> %x
}
define <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) {
; CHECK-LABEL: test_unsigned_v6f32_v6i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w9, #1333788671
; CHECK-NEXT: fcvtzu w8, s5
; CHECK-NEXT: fcmp s5, #0.0
; CHECK-NEXT: fmov s6, w9
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s5, s6
; CHECK-NEXT: fcvtzu w10, s4
; CHECK-NEXT: csinv w5, w8, wzr, le
; CHECK-NEXT: fcmp s4, #0.0
; CHECK-NEXT: csel w8, wzr, w10, lt
; CHECK-NEXT: fcmp s4, s6
; CHECK-NEXT: fcvtzu w11, s0
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: fmov s4, w8
; CHECK-NEXT: csel w8, wzr, w11, lt
; CHECK-NEXT: fcmp s0, s6
; CHECK-NEXT: fcvtzu w12, s1
; CHECK-NEXT: csinv w0, w8, wzr, le
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w8, wzr, w12, lt
; CHECK-NEXT: fcmp s1, s6
; CHECK-NEXT: fcvtzu w13, s2
; CHECK-NEXT: csinv w1, w8, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: csel w8, wzr, w13, lt
; CHECK-NEXT: fcmp s2, s6
; CHECK-NEXT: fcvtzu w9, s3
; CHECK-NEXT: csinv w2, w8, wzr, le
; CHECK-NEXT: fcmp s3, #0.0
; CHECK-NEXT: mov v4.s[1], w5
; CHECK-NEXT: csel w8, wzr, w9, lt
; CHECK-NEXT: fcmp s3, s6
; CHECK-NEXT: csinv w3, w8, wzr, le
; CHECK-NEXT: fmov w4, s4
; CHECK-NEXT: ret
%x = call <6 x i32> @llvm.fptoui.sat.v6f32.v6i32(<6 x float> %f)
ret <6 x i32> %x
}
define <7 x i32> @test_unsigned_v7f32_v7i32(<7 x float> %f) {
; CHECK-LABEL: test_unsigned_v7f32_v7i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w9, #1333788671
; CHECK-NEXT: fcvtzu w8, s5
; CHECK-NEXT: fcmp s5, #0.0
; CHECK-NEXT: fmov s7, w9
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s5, s7
; CHECK-NEXT: fcvtzu w10, s4
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s4, #0.0
; CHECK-NEXT: csel w10, wzr, w10, lt
; CHECK-NEXT: fcmp s4, s7
; CHECK-NEXT: fcvtzu w11, s6
; CHECK-NEXT: csinv w10, w10, wzr, le
; CHECK-NEXT: fcmp s6, #0.0
; CHECK-NEXT: fmov s4, w10
; CHECK-NEXT: csel w10, wzr, w11, lt
; CHECK-NEXT: fcmp s6, s7
; CHECK-NEXT: fcvtzu w12, s0
; CHECK-NEXT: csinv w6, w10, wzr, le
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: mov v4.s[1], w8
; CHECK-NEXT: csel w8, wzr, w12, lt
; CHECK-NEXT: fcmp s0, s7
; CHECK-NEXT: fcvtzu w13, s1
; CHECK-NEXT: csinv w0, w8, wzr, le
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w8, wzr, w13, lt
; CHECK-NEXT: fcmp s1, s7
; CHECK-NEXT: fcvtzu w14, s2
; CHECK-NEXT: csinv w1, w8, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: csel w8, wzr, w14, lt
; CHECK-NEXT: fcmp s2, s7
; CHECK-NEXT: fcvtzu w9, s3
; CHECK-NEXT: csinv w2, w8, wzr, le
; CHECK-NEXT: fcmp s3, #0.0
; CHECK-NEXT: mov v4.s[2], w6
; CHECK-NEXT: csel w8, wzr, w9, lt
; CHECK-NEXT: fcmp s3, s7
; CHECK-NEXT: csinv w3, w8, wzr, le
; CHECK-NEXT: mov w5, v4.s[1]
; CHECK-NEXT: fmov w4, s4
; CHECK-NEXT: ret
%x = call <7 x i32> @llvm.fptoui.sat.v7f32.v7i32(<7 x float> %f)
ret <7 x i32> %x
}
define <8 x i32> @test_unsigned_v8f32_v8i32(<8 x float> %f) {
; CHECK-LABEL: test_unsigned_v8f32_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov s2, v0.s[1]
; CHECK-NEXT: mov w8, #1333788671
; CHECK-NEXT: fmov s4, w8
; CHECK-NEXT: fcvtzu w8, s2
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s2, s4
; CHECK-NEXT: fcvtzu w9, s0
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s0, s4
; CHECK-NEXT: mov s3, v0.s[2]
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: mov s2, v0.s[3]
; CHECK-NEXT: fmov s0, w9
; CHECK-NEXT: fcvtzu w9, s3
; CHECK-NEXT: fcmp s3, #0.0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s3, s4
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: mov v0.s[2], w9
; CHECK-NEXT: fcvtzu w9, s2
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s2, s4
; CHECK-NEXT: mov s3, v1.s[1]
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: mov v0.s[3], w9
; CHECK-NEXT: fcvtzu w9, s3
; CHECK-NEXT: fcmp s3, #0.0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s3, s4
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s1, s4
; CHECK-NEXT: mov s2, v1.s[2]
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: mov s3, v1.s[3]
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: fcvtzu w8, s2
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s2, s4
; CHECK-NEXT: mov v1.s[1], w9
; CHECK-NEXT: fcvtzu w9, s3
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s3, #0.0
; CHECK-NEXT: mov v1.s[2], w8
; CHECK-NEXT: csel w8, wzr, w9, lt
; CHECK-NEXT: fcmp s3, s4
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: mov v1.s[3], w8
; CHECK-NEXT: ret
%x = call <8 x i32> @llvm.fptoui.sat.v8f32.v8i32(<8 x float> %f)
ret <8 x i32> %x
}
;
; Double to unsigned 32-bit -- Vector size variation
;
declare <1 x i32> @llvm.fptoui.sat.v1f64.v1i32 (<1 x double>)
declare <2 x i32> @llvm.fptoui.sat.v2f64.v2i32 (<2 x double>)
declare <3 x i32> @llvm.fptoui.sat.v3f64.v3i32 (<3 x double>)
declare <4 x i32> @llvm.fptoui.sat.v4f64.v4i32 (<4 x double>)
declare <5 x i32> @llvm.fptoui.sat.v5f64.v5i32 (<5 x double>)
declare <6 x i32> @llvm.fptoui.sat.v6f64.v6i32 (<6 x double>)
define <1 x i32> @test_unsigned_v1f64_v1i32(<1 x double> %f) {
; CHECK-LABEL: test_unsigned_v1f64_v1i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474974613504
; CHECK-NEXT: fmov d1, xzr
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fminnm d0, d0, d1
; CHECK-NEXT: fcvtzu w8, d0
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: ret
%x = call <1 x i32> @llvm.fptoui.sat.v1f64.v1i32(<1 x double> %f)
ret <1 x i32> %x
}
define <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474974613504
; CHECK-NEXT: fmov d1, xzr
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: mov d2, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d3, x8
; CHECK-NEXT: fmaxnm d1, d2, d1
; CHECK-NEXT: fminnm d0, d0, d3
; CHECK-NEXT: fcvtzu w8, d0
; CHECK-NEXT: fminnm d1, d1, d3
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, d1
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <2 x i32> @llvm.fptoui.sat.v2f64.v2i32(<2 x double> %f)
ret <2 x i32> %x
}
define <3 x i32> @test_unsigned_v3f64_v3i32(<3 x double> %f) {
; CHECK-LABEL: test_unsigned_v3f64_v3i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474974613504
; CHECK-NEXT: fmov d3, xzr
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: fmaxnm d0, d0, d3
; CHECK-NEXT: fmov d4, x8
; CHECK-NEXT: fmaxnm d1, d1, d3
; CHECK-NEXT: fmaxnm d2, d2, d3
; CHECK-NEXT: fmaxnm d3, d3, d0
; CHECK-NEXT: fminnm d0, d0, d4
; CHECK-NEXT: fminnm d1, d1, d4
; CHECK-NEXT: fcvtzu w8, d0
; CHECK-NEXT: fminnm d2, d2, d4
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, d1
; CHECK-NEXT: fminnm d3, d3, d4
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: fcvtzu w8, d2
; CHECK-NEXT: mov v0.s[2], w8
; CHECK-NEXT: fcvtzu w8, d3
; CHECK-NEXT: mov v0.s[3], w8
; CHECK-NEXT: ret
%x = call <3 x i32> @llvm.fptoui.sat.v3f64.v3i32(<3 x double> %f)
ret <3 x i32> %x
}
define <4 x i32> @test_unsigned_v4f64_v4i32(<4 x double> %f) {
; CHECK-LABEL: test_unsigned_v4f64_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474974613504
; CHECK-NEXT: fmov d2, xzr
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: mov d3, v0.d[1]
; CHECK-NEXT: mov d4, v1.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d2
; CHECK-NEXT: fmaxnm d3, d3, d2
; CHECK-NEXT: fmaxnm d1, d1, d2
; CHECK-NEXT: fmaxnm d2, d4, d2
; CHECK-NEXT: fmov d4, x8
; CHECK-NEXT: fminnm d0, d0, d4
; CHECK-NEXT: fminnm d3, d3, d4
; CHECK-NEXT: fcvtzu w8, d0
; CHECK-NEXT: fminnm d1, d1, d4
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, d3
; CHECK-NEXT: fminnm d2, d2, d4
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: fcvtzu w8, d1
; CHECK-NEXT: mov v0.s[2], w8
; CHECK-NEXT: fcvtzu w8, d2
; CHECK-NEXT: mov v0.s[3], w8
; CHECK-NEXT: ret
%x = call <4 x i32> @llvm.fptoui.sat.v4f64.v4i32(<4 x double> %f)
ret <4 x i32> %x
}
define <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) {
; CHECK-LABEL: test_unsigned_v5f64_v5i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474974613504
; CHECK-NEXT: fmov d5, xzr
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: fmaxnm d0, d0, d5
; CHECK-NEXT: fmov d6, x8
; CHECK-NEXT: fmaxnm d1, d1, d5
; CHECK-NEXT: fmaxnm d2, d2, d5
; CHECK-NEXT: fmaxnm d3, d3, d5
; CHECK-NEXT: fmaxnm d4, d4, d5
; CHECK-NEXT: fminnm d0, d0, d6
; CHECK-NEXT: fminnm d1, d1, d6
; CHECK-NEXT: fminnm d2, d2, d6
; CHECK-NEXT: fminnm d3, d3, d6
; CHECK-NEXT: fminnm d4, d4, d6
; CHECK-NEXT: fcvtzu w0, d0
; CHECK-NEXT: fcvtzu w1, d1
; CHECK-NEXT: fcvtzu w2, d2
; CHECK-NEXT: fcvtzu w3, d3
; CHECK-NEXT: fcvtzu w4, d4
; CHECK-NEXT: ret
%x = call <5 x i32> @llvm.fptoui.sat.v5f64.v5i32(<5 x double> %f)
ret <5 x i32> %x
}
define <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
; CHECK-LABEL: test_unsigned_v6f64_v6i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474974613504
; CHECK-NEXT: fmov d6, xzr
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: fmaxnm d0, d0, d6
; CHECK-NEXT: fmov d7, x8
; CHECK-NEXT: fmaxnm d1, d1, d6
; CHECK-NEXT: fmaxnm d2, d2, d6
; CHECK-NEXT: fmaxnm d3, d3, d6
; CHECK-NEXT: fmaxnm d4, d4, d6
; CHECK-NEXT: fmaxnm d5, d5, d6
; CHECK-NEXT: fminnm d0, d0, d7
; CHECK-NEXT: fminnm d1, d1, d7
; CHECK-NEXT: fminnm d2, d2, d7
; CHECK-NEXT: fminnm d3, d3, d7
; CHECK-NEXT: fminnm d4, d4, d7
; CHECK-NEXT: fminnm d5, d5, d7
; CHECK-NEXT: fcvtzu w0, d0
; CHECK-NEXT: fcvtzu w1, d1
; CHECK-NEXT: fcvtzu w2, d2
; CHECK-NEXT: fcvtzu w3, d3
; CHECK-NEXT: fcvtzu w4, d4
; CHECK-NEXT: fcvtzu w5, d5
; CHECK-NEXT: ret
%x = call <6 x i32> @llvm.fptoui.sat.v6f64.v6i32(<6 x double> %f)
ret <6 x i32> %x
}
;
; FP128 to unsigned 32-bit -- Vector size variation
;
declare <1 x i32> @llvm.fptoui.sat.v1f128.v1i32 (<1 x fp128>)
declare <2 x i32> @llvm.fptoui.sat.v2f128.v2i32 (<2 x fp128>)
declare <3 x i32> @llvm.fptoui.sat.v3f128.v3i32 (<3 x fp128>)
declare <4 x i32> @llvm.fptoui.sat.v4f128.v4i32 (<4 x fp128>)
define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) {
; CHECK-LABEL: test_unsigned_v1f128_v1i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #32 // =32
; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: adrp x8, .LCPI14_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: bl __getf2
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov w19, w0
; CHECK-NEXT: bl __fixunstfsi
; CHECK-NEXT: adrp x8, .LCPI14_1
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_1]
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: cmp w19, #0 // =0
; CHECK-NEXT: csel w19, wzr, w0, lt
; CHECK-NEXT: bl __gttf2
; CHECK-NEXT: cmp w0, #0 // =0
; CHECK-NEXT: csinv w8, w19, wzr, le
; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: add sp, sp, #32 // =32
; CHECK-NEXT: ret
%x = call <1 x i32> @llvm.fptoui.sat.v1f128.v1i32(<1 x fp128> %f)
ret <1 x i32> %x
}
define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) {
; CHECK-LABEL: test_unsigned_v2f128_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #96 // =96
; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 96
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: adrp x8, .LCPI15_0
; CHECK-NEXT: mov v2.16b, v1.16b
; CHECK-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0]
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: bl __getf2
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: mov w19, w0
; CHECK-NEXT: bl __fixunstfsi
; CHECK-NEXT: adrp x8, .LCPI15_1
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1]
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: cmp w19, #0 // =0
; CHECK-NEXT: csel w19, wzr, w0, lt
; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
; CHECK-NEXT: bl __gttf2
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: cmp w0, #0 // =0
; CHECK-NEXT: csinv w20, w19, wzr, le
; CHECK-NEXT: bl __getf2
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: mov w19, w0
; CHECK-NEXT: bl __fixunstfsi
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-NEXT: cmp w19, #0 // =0
; CHECK-NEXT: csel w19, wzr, w0, lt
; CHECK-NEXT: bl __gttf2
; CHECK-NEXT: cmp w0, #0 // =0
; CHECK-NEXT: csinv w8, w19, wzr, le
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: mov v0.s[1], w20
; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: add sp, sp, #96 // =96
; CHECK-NEXT: ret
%x = call <2 x i32> @llvm.fptoui.sat.v2f128.v2i32(<2 x fp128> %f)
ret <2 x i32> %x
}
define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-LABEL: test_unsigned_v3f128_v3i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #112 // =112
; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: adrp x8, .LCPI16_0
; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill
; CHECK-NEXT: mov v2.16b, v1.16b
; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: bl __getf2
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov w19, w0
; CHECK-NEXT: bl __fixunstfsi
; CHECK-NEXT: adrp x8, .LCPI16_1
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_1]
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: cmp w19, #0 // =0
; CHECK-NEXT: csel w19, wzr, w0, lt
; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: bl __gttf2
; CHECK-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
; CHECK-NEXT: cmp w0, #0 // =0
; CHECK-NEXT: csinv w20, w19, wzr, le
; CHECK-NEXT: bl __getf2
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: mov w19, w0
; CHECK-NEXT: bl __fixunstfsi
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: cmp w19, #0 // =0
; CHECK-NEXT: csel w19, wzr, w0, lt
; CHECK-NEXT: bl __gttf2
; CHECK-NEXT: cmp w0, #0 // =0
; CHECK-NEXT: csinv w8, w19, wzr, le
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: mov v0.s[1], w20
; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: bl __getf2
; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: mov w19, w0
; CHECK-NEXT: bl __fixunstfsi
; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: cmp w19, #0 // =0
; CHECK-NEXT: csel w19, wzr, w0, lt
; CHECK-NEXT: bl __gttf2
; CHECK-NEXT: cmp w0, #0 // =0
; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: csinv w8, w19, wzr, le
; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-NEXT: mov v0.s[2], w8
; CHECK-NEXT: add sp, sp, #112 // =112
; CHECK-NEXT: ret
%x = call <3 x i32> @llvm.fptoui.sat.v3f128.v3i32(<3 x fp128> %f)
ret <3 x i32> %x
}
define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-LABEL: test_unsigned_v4f128_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #128 // =128
; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 128
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: adrp x8, .LCPI17_0
; CHECK-NEXT: stp q0, q2, [sp, #16] // 32-byte Folded Spill
; CHECK-NEXT: mov v2.16b, v1.16b
; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: str q3, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __getf2
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov w19, w0
; CHECK-NEXT: bl __fixunstfsi
; CHECK-NEXT: adrp x8, .LCPI17_1
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_1]
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: cmp w19, #0 // =0
; CHECK-NEXT: csel w19, wzr, w0, lt
; CHECK-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: bl __gttf2
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: cmp w0, #0 // =0
; CHECK-NEXT: csinv w20, w19, wzr, le
; CHECK-NEXT: bl __getf2
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: mov w19, w0
; CHECK-NEXT: bl __fixunstfsi
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: cmp w19, #0 // =0
; CHECK-NEXT: csel w19, wzr, w0, lt
; CHECK-NEXT: bl __gttf2
; CHECK-NEXT: cmp w0, #0 // =0
; CHECK-NEXT: csinv w8, w19, wzr, le
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: mov v0.s[1], w20
; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: bl __getf2
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: mov w19, w0
; CHECK-NEXT: bl __fixunstfsi
; CHECK-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
; CHECK-NEXT: cmp w19, #0 // =0
; CHECK-NEXT: csel w19, wzr, w0, lt
; CHECK-NEXT: bl __gttf2
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: cmp w0, #0 // =0
; CHECK-NEXT: csinv w8, w19, wzr, le
; CHECK-NEXT: mov v0.s[2], w8
; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: ldp q1, q0, [sp, #64] // 32-byte Folded Reload
; CHECK-NEXT: bl __getf2
; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: mov w19, w0
; CHECK-NEXT: bl __fixunstfsi
; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: cmp w19, #0 // =0
; CHECK-NEXT: csel w19, wzr, w0, lt
; CHECK-NEXT: bl __gttf2
; CHECK-NEXT: cmp w0, #0 // =0
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: csinv w8, w19, wzr, le
; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
; CHECK-NEXT: mov v0.s[3], w8
; CHECK-NEXT: add sp, sp, #128 // =128
; CHECK-NEXT: ret
%x = call <4 x i32> @llvm.fptoui.sat.v4f128.v4i32(<4 x fp128> %f)
ret <4 x i32> %x
}
;
; FP16 to unsigned 32-bit -- Vector size variation
;
declare <1 x i32> @llvm.fptoui.sat.v1f16.v1i32 (<1 x half>)
declare <2 x i32> @llvm.fptoui.sat.v2f16.v2i32 (<2 x half>)
declare <3 x i32> @llvm.fptoui.sat.v3f16.v3i32 (<3 x half>)
declare <4 x i32> @llvm.fptoui.sat.v4f16.v4i32 (<4 x half>)
declare <5 x i32> @llvm.fptoui.sat.v5f16.v5i32 (<5 x half>)
declare <6 x i32> @llvm.fptoui.sat.v6f16.v6i32 (<6 x half>)
declare <7 x i32> @llvm.fptoui.sat.v7f16.v7i32 (<7 x half>)
declare <8 x i32> @llvm.fptoui.sat.v8f16.v8i32 (<8 x half>)
define <1 x i32> @test_unsigned_v1f16_v1i32(<1 x half> %f) {
; CHECK-LABEL: test_unsigned_v1f16_v1i32:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: mov w8, #1333788671
; CHECK-NEXT: fcvtzu w9, s0
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: csel w8, wzr, w9, lt
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: ret
%x = call <1 x i32> @llvm.fptoui.sat.v1f16.v1i32(<1 x half> %f)
ret <1 x i32> %x
}
define <2 x i32> @test_unsigned_v2f16_v2i32(<2 x half> %f) {
; CHECK-LABEL: test_unsigned_v2f16_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: mov w8, #1333788671
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fmov s2, w8
; CHECK-NEXT: fcvtzu w9, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: fcvtzu w8, s0
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s0, s2
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: mov v0.s[1], w9
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <2 x i32> @llvm.fptoui.sat.v2f16.v2i32(<2 x half> %f)
ret <2 x i32> %x
}
define <3 x i32> @test_unsigned_v3f16_v3i32(<3 x half> %f) {
; CHECK-LABEL: test_unsigned_v3f16_v3i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: mov w8, #1333788671
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fmov s3, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fcvt s2, h0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: fcvtzu w9, s2
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: mov h1, v0.h[2]
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fcvt s2, h0
; CHECK-NEXT: fmov s0, w9
; CHECK-NEXT: fcvtzu w9, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: fcvtzu w8, s2
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: mov v0.s[2], w9
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: mov v0.s[3], w8
; CHECK-NEXT: ret
%x = call <3 x i32> @llvm.fptoui.sat.v3f16.v3i32(<3 x half> %f)
ret <3 x i32> %x
}
define <4 x i32> @test_unsigned_v4f16_v4i32(<4 x half> %f) {
; CHECK-LABEL: test_unsigned_v4f16_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: mov w8, #1333788671
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fmov s3, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fcvt s2, h0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: fcvtzu w9, s2
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: mov h1, v0.h[2]
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fcvt s2, h0
; CHECK-NEXT: fmov s0, w9
; CHECK-NEXT: fcvtzu w9, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: fcvtzu w8, s2
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: mov v0.s[2], w9
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: mov v0.s[3], w8
; CHECK-NEXT: ret
%x = call <4 x i32> @llvm.fptoui.sat.v4f16.v4i32(<4 x half> %f)
ret <4 x i32> %x
}
define <5 x i32> @test_unsigned_v5f16_v5i32(<5 x half> %f) {
; CHECK-LABEL: test_unsigned_v5f16_v5i32:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvt s1, h0
; CHECK-NEXT: mov w8, #1333788671
; CHECK-NEXT: fcvtzu w9, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fmov s2, w8
; CHECK-NEXT: csel w8, wzr, w9, lt
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fcvtzu w9, s1
; CHECK-NEXT: csinv w0, w8, wzr, le
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w8, wzr, w9, lt
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: mov h1, v0.h[2]
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fcvtzu w9, s1
; CHECK-NEXT: csinv w1, w8, wzr, le
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w8, wzr, w9, lt
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: mov h1, v0.h[3]
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: fcvtzu w9, s1
; CHECK-NEXT: csinv w2, w8, wzr, le
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: csel w8, wzr, w9, lt
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: fcvtzu w10, s0
; CHECK-NEXT: csinv w3, w8, wzr, le
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: csel w8, wzr, w10, lt
; CHECK-NEXT: fcmp s0, s2
; CHECK-NEXT: csinv w4, w8, wzr, le
; CHECK-NEXT: ret
%x = call <5 x i32> @llvm.fptoui.sat.v5f16.v5i32(<5 x half> %f)
ret <5 x i32> %x
}
define <6 x i32> @test_unsigned_v6f16_v6i32(<6 x half> %f) {
; CHECK-LABEL: test_unsigned_v6f16_v6i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: mov h2, v1.h[1]
; CHECK-NEXT: mov w8, #1333788671
; CHECK-NEXT: fcvt s2, h2
; CHECK-NEXT: fmov s3, w8
; CHECK-NEXT: fcvtzu w8, s2
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fcvtzu w9, s1
; CHECK-NEXT: csinv w5, w8, wzr, le
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fcvt s2, h0
; CHECK-NEXT: csel w8, wzr, w9, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: fcvtzu w9, s2
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: mov h2, v0.h[2]
; CHECK-NEXT: fcvtzu w10, s1
; CHECK-NEXT: csinv w0, w9, wzr, le
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fcvt s2, h2
; CHECK-NEXT: csel w9, wzr, w10, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fcvtzu w11, s2
; CHECK-NEXT: csinv w1, w9, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: csel w8, wzr, w11, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: fcvtzu w12, s0
; CHECK-NEXT: csinv w2, w8, wzr, le
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: mov v1.s[1], w5
; CHECK-NEXT: csel w8, wzr, w12, lt
; CHECK-NEXT: fcmp s0, s3
; CHECK-NEXT: csinv w3, w8, wzr, le
; CHECK-NEXT: fmov w4, s1
; CHECK-NEXT: ret
%x = call <6 x i32> @llvm.fptoui.sat.v6f16.v6i32(<6 x half> %f)
ret <6 x i32> %x
}
define <7 x i32> @test_unsigned_v7f16_v7i32(<7 x half> %f) {
; CHECK-LABEL: test_unsigned_v7f16_v7i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: mov h2, v1.h[1]
; CHECK-NEXT: mov w8, #1333788671
; CHECK-NEXT: fcvt s2, h2
; CHECK-NEXT: fmov s3, w8
; CHECK-NEXT: fcvtzu w8, s2
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: fcvt s2, h1
; CHECK-NEXT: fcvtzu w9, s2
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: mov h1, v1.h[2]
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fcvtzu w10, s1
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fcvt s2, h0
; CHECK-NEXT: csel w10, wzr, w10, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: fcvtzu w11, s2
; CHECK-NEXT: csinv w6, w10, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: csel w10, wzr, w11, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: mov h2, v0.h[2]
; CHECK-NEXT: fcvtzu w11, s1
; CHECK-NEXT: csinv w0, w10, wzr, le
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fcvt s2, h2
; CHECK-NEXT: csel w10, wzr, w11, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fcvtzu w12, s2
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: csinv w1, w10, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: mov v1.s[1], w8
; CHECK-NEXT: csel w8, wzr, w12, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: fcvtzu w13, s0
; CHECK-NEXT: csinv w2, w8, wzr, le
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: mov v1.s[2], w6
; CHECK-NEXT: csel w8, wzr, w13, lt
; CHECK-NEXT: fcmp s0, s3
; CHECK-NEXT: csinv w3, w8, wzr, le
; CHECK-NEXT: mov w5, v1.s[1]
; CHECK-NEXT: fmov w4, s1
; CHECK-NEXT: ret
%x = call <7 x i32> @llvm.fptoui.sat.v7f16.v7i32(<7 x half> %f)
ret <7 x i32> %x
}
define <8 x i32> @test_unsigned_v8f16_v8i32(<8 x half> %f) {
; CHECK-LABEL: test_unsigned_v8f16_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: mov w8, #1333788671
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fmov s4, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fcvt s2, h0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s1, s4
; CHECK-NEXT: fcvtzu w9, s2
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: mov h3, v0.h[2]
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s2, s4
; CHECK-NEXT: fcvt s3, h3
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: mov h1, v0.h[3]
; CHECK-NEXT: ext v5.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: fmov s0, w9
; CHECK-NEXT: fcvtzu w9, s3
; CHECK-NEXT: fcmp s3, #0.0
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s3, s4
; CHECK-NEXT: mov h2, v5.h[1]
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s1, s4
; CHECK-NEXT: fcvt s2, h2
; CHECK-NEXT: mov v0.s[2], w9
; CHECK-NEXT: fcvtzu w9, s2
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: fcvt s1, h5
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s2, s4
; CHECK-NEXT: mov v0.s[3], w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: mov h2, v5.h[2]
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s1, s4
; CHECK-NEXT: fcvt s2, h2
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: mov h3, v5.h[3]
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: fcvtzu w8, s2
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: fcvt s3, h3
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s2, s4
; CHECK-NEXT: mov v1.s[1], w9
; CHECK-NEXT: fcvtzu w9, s3
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s3, #0.0
; CHECK-NEXT: mov v1.s[2], w8
; CHECK-NEXT: csel w8, wzr, w9, lt
; CHECK-NEXT: fcmp s3, s4
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: mov v1.s[3], w8
; CHECK-NEXT: ret
%x = call <8 x i32> @llvm.fptoui.sat.v8f16.v8i32(<8 x half> %f)
ret <8 x i32> %x
}
;
; 2-Vector float to unsigned integer -- result size variation
;
declare <2 x i1> @llvm.fptoui.sat.v2f32.v2i1 (<2 x float>)
declare <2 x i8> @llvm.fptoui.sat.v2f32.v2i8 (<2 x float>)
declare <2 x i13> @llvm.fptoui.sat.v2f32.v2i13 (<2 x float>)
declare <2 x i16> @llvm.fptoui.sat.v2f32.v2i16 (<2 x float>)
declare <2 x i19> @llvm.fptoui.sat.v2f32.v2i19 (<2 x float>)
declare <2 x i50> @llvm.fptoui.sat.v2f32.v2i50 (<2 x float>)
declare <2 x i64> @llvm.fptoui.sat.v2f32.v2i64 (<2 x float>)
declare <2 x i100> @llvm.fptoui.sat.v2f32.v2i100(<2 x float>)
declare <2 x i128> @llvm.fptoui.sat.v2f32.v2i128(<2 x float>)
define <2 x i1> @test_unsigned_v2f32_v2i1(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov s1, wzr
; CHECK-NEXT: fmov s2, #1.00000000
; CHECK-NEXT: mov s3, v0.s[1]
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fmaxnm s1, s3, s1
; CHECK-NEXT: fminnm s0, s0, s2
; CHECK-NEXT: fcvtzu w8, s0
; CHECK-NEXT: fminnm s1, s1, s2
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <2 x i1> @llvm.fptoui.sat.v2f32.v2i1(<2 x float> %f)
ret <2 x i1> %x
}
define <2 x i8> @test_unsigned_v2f32_v2i8(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov s1, wzr
; CHECK-NEXT: mov w8, #1132396544
; CHECK-NEXT: mov s2, v0.s[1]
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fmov s3, w8
; CHECK-NEXT: fmaxnm s1, s2, s1
; CHECK-NEXT: fminnm s0, s0, s3
; CHECK-NEXT: fcvtzu w8, s0
; CHECK-NEXT: fminnm s1, s1, s3
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <2 x i8> @llvm.fptoui.sat.v2f32.v2i8(<2 x float> %f)
ret <2 x i8> %x
}
define <2 x i13> @test_unsigned_v2f32_v2i13(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i13:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #63488
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov s1, wzr
; CHECK-NEXT: movk w8, #17919, lsl #16
; CHECK-NEXT: mov s2, v0.s[1]
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fmov s3, w8
; CHECK-NEXT: fmaxnm s1, s2, s1
; CHECK-NEXT: fminnm s0, s0, s3
; CHECK-NEXT: fcvtzu w8, s0
; CHECK-NEXT: fminnm s1, s1, s3
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <2 x i13> @llvm.fptoui.sat.v2f32.v2i13(<2 x float> %f)
ret <2 x i13> %x
}
define <2 x i16> @test_unsigned_v2f32_v2i16(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65280
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov s1, wzr
; CHECK-NEXT: movk w8, #18303, lsl #16
; CHECK-NEXT: mov s2, v0.s[1]
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fmov s3, w8
; CHECK-NEXT: fmaxnm s1, s2, s1
; CHECK-NEXT: fminnm s0, s0, s3
; CHECK-NEXT: fcvtzu w8, s0
; CHECK-NEXT: fminnm s1, s1, s3
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <2 x i16> @llvm.fptoui.sat.v2f32.v2i16(<2 x float> %f)
ret <2 x i16> %x
}
define <2 x i19> @test_unsigned_v2f32_v2i19(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i19:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65504
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov s1, wzr
; CHECK-NEXT: movk w8, #18687, lsl #16
; CHECK-NEXT: mov s2, v0.s[1]
; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fmov s3, w8
; CHECK-NEXT: fmaxnm s1, s2, s1
; CHECK-NEXT: fminnm s0, s0, s3
; CHECK-NEXT: fcvtzu w8, s0
; CHECK-NEXT: fminnm s1, s1, s3
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <2 x i19> @llvm.fptoui.sat.v2f32.v2i19(<2 x float> %f)
ret <2 x i19> %x
}
define <2 x i32> @test_unsigned_v2f32_v2i32_duplicate(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i32_duplicate:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: mov w8, #1333788671
; CHECK-NEXT: fmov s2, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: fcvtzu w9, s0
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s0, s2
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fmov s0, w9
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <2 x i32> @llvm.fptoui.sat.v2f32.v2i32(<2 x float> %f)
ret <2 x i32> %x
}
define <2 x i50> @test_unsigned_v2f32_v2i50(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i50:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: mov w8, #1484783615
; CHECK-NEXT: fmov s2, w8
; CHECK-NEXT: fcvtzu x8, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: mov x9, #1125899906842623
; CHECK-NEXT: csel x8, xzr, x8, lt
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: fcvtzu x10, s0
; CHECK-NEXT: csel x8, x9, x8, gt
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: csel x10, xzr, x10, lt
; CHECK-NEXT: fcmp s0, s2
; CHECK-NEXT: csel x9, x9, x10, gt
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: mov v0.d[1], x8
; CHECK-NEXT: ret
%x = call <2 x i50> @llvm.fptoui.sat.v2f32.v2i50(<2 x float> %f)
ret <2 x i50> %x
}
define <2 x i64> @test_unsigned_v2f32_v2i64(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s1, v0.s[1]
; CHECK-NEXT: mov w8, #1602224127
; CHECK-NEXT: fmov s2, w8
; CHECK-NEXT: fcvtzu x8, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel x8, xzr, x8, lt
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: fcvtzu x9, s0
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: csel x9, xzr, x9, lt
; CHECK-NEXT: fcmp s0, s2
; CHECK-NEXT: csinv x9, x9, xzr, le
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: mov v0.d[1], x8
; CHECK-NEXT: ret
%x = call <2 x i64> @llvm.fptoui.sat.v2f32.v2i64(<2 x float> %f)
ret <2 x i64> %x
}
define <2 x i100> @test_unsigned_v2f32_v2i100(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i100:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #64 // =64
; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x21, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: .cfi_offset b8, -40
; CHECK-NEXT: .cfi_offset b9, -48
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s8, v0.s[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: mov w8, #1904214015
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: fmov s9, w8
; CHECK-NEXT: mov x21, #68719476735
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: csel x10, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: csel x19, x21, x10, gt
; CHECK-NEXT: csinv x20, x9, xzr, le
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov x2, x20
; CHECK-NEXT: mov x3, x19
; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s0, s9
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: csel x1, x21, x9, gt
; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: add sp, sp, #64 // =64
; CHECK-NEXT: ret
%x = call <2 x i100> @llvm.fptoui.sat.v2f32.v2i100(<2 x float> %f)
ret <2 x i100> %x
}
define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) {
; CHECK-LABEL: test_unsigned_v2f32_v2i128:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #64 // =64
; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: .cfi_offset b8, -40
; CHECK-NEXT: .cfi_offset b9, -48
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov s8, v0.s[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: mov w8, #2139095039
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: fmov s9, w8
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: csel x10, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: csinv x19, x10, xzr, le
; CHECK-NEXT: csinv x20, x9, xzr, le
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov x2, x19
; CHECK-NEXT: mov x3, x20
; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s0, s9
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: csinv x1, x9, xzr, le
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: add sp, sp, #64 // =64
; CHECK-NEXT: ret
%x = call <2 x i128> @llvm.fptoui.sat.v2f32.v2i128(<2 x float> %f)
ret <2 x i128> %x
}
;
; 2-Vector double to unsigned integer -- result size variation
;
declare <2 x i1> @llvm.fptoui.sat.v2f64.v2i1 (<2 x double>)
declare <2 x i8> @llvm.fptoui.sat.v2f64.v2i8 (<2 x double>)
declare <2 x i13> @llvm.fptoui.sat.v2f64.v2i13 (<2 x double>)
declare <2 x i16> @llvm.fptoui.sat.v2f64.v2i16 (<2 x double>)
declare <2 x i19> @llvm.fptoui.sat.v2f64.v2i19 (<2 x double>)
declare <2 x i50> @llvm.fptoui.sat.v2f64.v2i50 (<2 x double>)
declare <2 x i64> @llvm.fptoui.sat.v2f64.v2i64 (<2 x double>)
declare <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double>)
declare <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double>)
define <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d1, xzr
; CHECK-NEXT: fmov d2, #1.00000000
; CHECK-NEXT: mov d3, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmaxnm d1, d3, d1
; CHECK-NEXT: fminnm d0, d0, d2
; CHECK-NEXT: fcvtzu w8, d0
; CHECK-NEXT: fminnm d1, d1, d2
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, d1
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <2 x i1> @llvm.fptoui.sat.v2f64.v2i1(<2 x double> %f)
ret <2 x i1> %x
}
define <2 x i8> @test_unsigned_v2f64_v2i8(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #246290604621824
; CHECK-NEXT: fmov d1, xzr
; CHECK-NEXT: movk x8, #16495, lsl #48
; CHECK-NEXT: mov d2, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d3, x8
; CHECK-NEXT: fmaxnm d1, d2, d1
; CHECK-NEXT: fminnm d0, d0, d3
; CHECK-NEXT: fcvtzu w8, d0
; CHECK-NEXT: fminnm d1, d1, d3
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, d1
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <2 x i8> @llvm.fptoui.sat.v2f64.v2i8(<2 x double> %f)
ret <2 x i8> %x
}
define <2 x i13> @test_unsigned_v2f64_v2i13(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i13:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #280375465082880
; CHECK-NEXT: fmov d1, xzr
; CHECK-NEXT: movk x8, #16575, lsl #48
; CHECK-NEXT: mov d2, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d3, x8
; CHECK-NEXT: fmaxnm d1, d2, d1
; CHECK-NEXT: fminnm d0, d0, d3
; CHECK-NEXT: fcvtzu w8, d0
; CHECK-NEXT: fminnm d1, d1, d3
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, d1
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <2 x i13> @llvm.fptoui.sat.v2f64.v2i13(<2 x double> %f)
ret <2 x i13> %x
}
define <2 x i16> @test_unsigned_v2f64_v2i16(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281337537757184
; CHECK-NEXT: fmov d1, xzr
; CHECK-NEXT: movk x8, #16623, lsl #48
; CHECK-NEXT: mov d2, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d3, x8
; CHECK-NEXT: fmaxnm d1, d2, d1
; CHECK-NEXT: fminnm d0, d0, d3
; CHECK-NEXT: fcvtzu w8, d0
; CHECK-NEXT: fminnm d1, d1, d3
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, d1
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <2 x i16> @llvm.fptoui.sat.v2f64.v2i16(<2 x double> %f)
ret <2 x i16> %x
}
define <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i19:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281457796841472
; CHECK-NEXT: fmov d1, xzr
; CHECK-NEXT: movk x8, #16671, lsl #48
; CHECK-NEXT: mov d2, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d3, x8
; CHECK-NEXT: fmaxnm d1, d2, d1
; CHECK-NEXT: fminnm d0, d0, d3
; CHECK-NEXT: fcvtzu w8, d0
; CHECK-NEXT: fminnm d1, d1, d3
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, d1
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <2 x i19> @llvm.fptoui.sat.v2f64.v2i19(<2 x double> %f)
ret <2 x i19> %x
}
define <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i32_duplicate:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #281474974613504
; CHECK-NEXT: fmov d1, xzr
; CHECK-NEXT: movk x8, #16879, lsl #48
; CHECK-NEXT: mov d2, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d3, x8
; CHECK-NEXT: fmaxnm d1, d2, d1
; CHECK-NEXT: fminnm d0, d0, d3
; CHECK-NEXT: fcvtzu w8, d0
; CHECK-NEXT: fminnm d1, d1, d3
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, d1
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <2 x i32> @llvm.fptoui.sat.v2f64.v2i32(<2 x double> %f)
ret <2 x i32> %x
}
define <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i50:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-8
; CHECK-NEXT: fmov d1, xzr
; CHECK-NEXT: movk x8, #17167, lsl #48
; CHECK-NEXT: mov d2, v0.d[1]
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: fmov d3, x8
; CHECK-NEXT: fmaxnm d1, d2, d1
; CHECK-NEXT: fminnm d0, d0, d3
; CHECK-NEXT: fcvtzu x8, d0
; CHECK-NEXT: fminnm d1, d1, d3
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: fcvtzu x8, d1
; CHECK-NEXT: mov v0.d[1], x8
; CHECK-NEXT: ret
%x = call <2 x i50> @llvm.fptoui.sat.v2f64.v2i50(<2 x double> %f)
ret <2 x i50> %x
}
define <2 x i64> @test_unsigned_v2f64_v2i64(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov d1, v0.d[1]
; CHECK-NEXT: mov x8, #4895412794951729151
; CHECK-NEXT: fmov d2, x8
; CHECK-NEXT: fcvtzu x8, d1
; CHECK-NEXT: fcmp d1, #0.0
; CHECK-NEXT: csel x8, xzr, x8, lt
; CHECK-NEXT: fcmp d1, d2
; CHECK-NEXT: fcvtzu x9, d0
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp d0, #0.0
; CHECK-NEXT: csel x9, xzr, x9, lt
; CHECK-NEXT: fcmp d0, d2
; CHECK-NEXT: csinv x9, x9, xzr, le
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: mov v0.d[1], x8
; CHECK-NEXT: ret
%x = call <2 x i64> @llvm.fptoui.sat.v2f64.v2i64(<2 x double> %f)
ret <2 x i64> %x
}
define <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i100:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #64 // =64
; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x21, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: .cfi_offset b8, -40
; CHECK-NEXT: .cfi_offset b9, -48
; CHECK-NEXT: mov d8, v0.d[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: mov x8, #5057542381537067007
; CHECK-NEXT: fcmp d8, #0.0
; CHECK-NEXT: fmov d9, x8
; CHECK-NEXT: mov x21, #68719476735
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: csel x10, xzr, x1, lt
; CHECK-NEXT: fcmp d8, d9
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: csel x19, x21, x10, gt
; CHECK-NEXT: csinv x20, x9, xzr, le
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov x2, x20
; CHECK-NEXT: mov x3, x19
; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: fcmp d0, #0.0
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp d0, d9
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: csel x1, x21, x9, gt
; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: add sp, sp, #64 // =64
; CHECK-NEXT: ret
%x = call <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double> %f)
ret <2 x i100> %x
}
define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) {
; CHECK-LABEL: test_unsigned_v2f64_v2i128:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #64 // =64
; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: .cfi_offset b8, -40
; CHECK-NEXT: .cfi_offset b9, -48
; CHECK-NEXT: mov d8, v0.d[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: mov x8, #5183643171103440895
; CHECK-NEXT: fcmp d8, #0.0
; CHECK-NEXT: fmov d9, x8
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: csel x10, xzr, x0, lt
; CHECK-NEXT: fcmp d8, d9
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: csinv x19, x10, xzr, le
; CHECK-NEXT: csinv x20, x9, xzr, le
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: bl __fixunsdfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov x2, x19
; CHECK-NEXT: mov x3, x20
; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: fcmp d0, #0.0
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp d0, d9
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: csinv x1, x9, xzr, le
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: add sp, sp, #64 // =64
; CHECK-NEXT: ret
%x = call <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double> %f)
ret <2 x i128> %x
}
;
; 4-Vector half to unsigned integer -- result size variation
;
declare <4 x i1> @llvm.fptoui.sat.v4f16.v4i1 (<4 x half>)
declare <4 x i8> @llvm.fptoui.sat.v4f16.v4i8 (<4 x half>)
declare <4 x i13> @llvm.fptoui.sat.v4f16.v4i13 (<4 x half>)
declare <4 x i16> @llvm.fptoui.sat.v4f16.v4i16 (<4 x half>)
declare <4 x i19> @llvm.fptoui.sat.v4f16.v4i19 (<4 x half>)
declare <4 x i50> @llvm.fptoui.sat.v4f16.v4i50 (<4 x half>)
declare <4 x i64> @llvm.fptoui.sat.v4f16.v4i64 (<4 x half>)
declare <4 x i100> @llvm.fptoui.sat.v4f16.v4i100(<4 x half>)
declare <4 x i128> @llvm.fptoui.sat.v4f16.v4i128(<4 x half>)
define <4 x i1> @test_unsigned_v4f16_v4i1(<4 x half> %f) {
; CHECK-LABEL: test_unsigned_v4f16_v4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fcvt s1, h0
; CHECK-NEXT: mov h3, v0.h[1]
; CHECK-NEXT: mov h4, v0.h[2]
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fmov s2, wzr
; CHECK-NEXT: fcvt s3, h3
; CHECK-NEXT: fcvt s4, h4
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: fmaxnm s1, s1, s2
; CHECK-NEXT: fmaxnm s3, s3, s2
; CHECK-NEXT: fmaxnm s4, s4, s2
; CHECK-NEXT: fmaxnm s0, s0, s2
; CHECK-NEXT: fmov s2, #1.00000000
; CHECK-NEXT: fminnm s1, s1, s2
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: fminnm s1, s3, s2
; CHECK-NEXT: fminnm s3, s4, s2
; CHECK-NEXT: fminnm s2, s0, s2
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: mov v0.h[1], w8
; CHECK-NEXT: fcvtzu w8, s3
; CHECK-NEXT: mov v0.h[2], w8
; CHECK-NEXT: fcvtzu w8, s2
; CHECK-NEXT: mov v0.h[3], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <4 x i1> @llvm.fptoui.sat.v4f16.v4i1(<4 x half> %f)
ret <4 x i1> %x
}
define <4 x i8> @test_unsigned_v4f16_v4i8(<4 x half> %f) {
; CHECK-LABEL: test_unsigned_v4f16_v4i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fcvt s1, h0
; CHECK-NEXT: mov h3, v0.h[1]
; CHECK-NEXT: mov h4, v0.h[2]
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fmov s2, wzr
; CHECK-NEXT: mov w8, #1132396544
; CHECK-NEXT: fcvt s3, h3
; CHECK-NEXT: fcvt s4, h4
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: fmaxnm s1, s1, s2
; CHECK-NEXT: fmaxnm s3, s3, s2
; CHECK-NEXT: fmaxnm s4, s4, s2
; CHECK-NEXT: fmaxnm s0, s0, s2
; CHECK-NEXT: fmov s2, w8
; CHECK-NEXT: fminnm s1, s1, s2
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: fminnm s1, s3, s2
; CHECK-NEXT: fminnm s3, s4, s2
; CHECK-NEXT: fminnm s2, s0, s2
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: mov v0.h[1], w8
; CHECK-NEXT: fcvtzu w8, s3
; CHECK-NEXT: mov v0.h[2], w8
; CHECK-NEXT: fcvtzu w8, s2
; CHECK-NEXT: mov v0.h[3], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <4 x i8> @llvm.fptoui.sat.v4f16.v4i8(<4 x half> %f)
ret <4 x i8> %x
}
define <4 x i13> @test_unsigned_v4f16_v4i13(<4 x half> %f) {
; CHECK-LABEL: test_unsigned_v4f16_v4i13:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fcvt s1, h0
; CHECK-NEXT: mov w8, #63488
; CHECK-NEXT: mov h3, v0.h[1]
; CHECK-NEXT: mov h4, v0.h[2]
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fmov s2, wzr
; CHECK-NEXT: movk w8, #17919, lsl #16
; CHECK-NEXT: fcvt s3, h3
; CHECK-NEXT: fcvt s4, h4
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: fmaxnm s1, s1, s2
; CHECK-NEXT: fmaxnm s3, s3, s2
; CHECK-NEXT: fmaxnm s4, s4, s2
; CHECK-NEXT: fmaxnm s0, s0, s2
; CHECK-NEXT: fmov s2, w8
; CHECK-NEXT: fminnm s1, s1, s2
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: fminnm s1, s3, s2
; CHECK-NEXT: fminnm s3, s4, s2
; CHECK-NEXT: fminnm s2, s0, s2
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: mov v0.h[1], w8
; CHECK-NEXT: fcvtzu w8, s3
; CHECK-NEXT: mov v0.h[2], w8
; CHECK-NEXT: fcvtzu w8, s2
; CHECK-NEXT: mov v0.h[3], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <4 x i13> @llvm.fptoui.sat.v4f16.v4i13(<4 x half> %f)
ret <4 x i13> %x
}
define <4 x i16> @test_unsigned_v4f16_v4i16(<4 x half> %f) {
; CHECK-LABEL: test_unsigned_v4f16_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fcvt s1, h0
; CHECK-NEXT: mov w8, #65280
; CHECK-NEXT: mov h3, v0.h[1]
; CHECK-NEXT: mov h4, v0.h[2]
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fmov s2, wzr
; CHECK-NEXT: movk w8, #18303, lsl #16
; CHECK-NEXT: fcvt s3, h3
; CHECK-NEXT: fcvt s4, h4
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: fmaxnm s1, s1, s2
; CHECK-NEXT: fmaxnm s3, s3, s2
; CHECK-NEXT: fmaxnm s4, s4, s2
; CHECK-NEXT: fmaxnm s0, s0, s2
; CHECK-NEXT: fmov s2, w8
; CHECK-NEXT: fminnm s1, s1, s2
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: fminnm s1, s3, s2
; CHECK-NEXT: fminnm s3, s4, s2
; CHECK-NEXT: fminnm s2, s0, s2
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: mov v0.h[1], w8
; CHECK-NEXT: fcvtzu w8, s3
; CHECK-NEXT: mov v0.h[2], w8
; CHECK-NEXT: fcvtzu w8, s2
; CHECK-NEXT: mov v0.h[3], w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%x = call <4 x i16> @llvm.fptoui.sat.v4f16.v4i16(<4 x half> %f)
ret <4 x i16> %x
}
define <4 x i19> @test_unsigned_v4f16_v4i19(<4 x half> %f) {
; CHECK-LABEL: test_unsigned_v4f16_v4i19:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fcvt s1, h0
; CHECK-NEXT: mov w8, #65504
; CHECK-NEXT: mov h3, v0.h[1]
; CHECK-NEXT: mov h4, v0.h[2]
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fmov s2, wzr
; CHECK-NEXT: movk w8, #18687, lsl #16
; CHECK-NEXT: fcvt s3, h3
; CHECK-NEXT: fcvt s4, h4
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: fmaxnm s1, s1, s2
; CHECK-NEXT: fmaxnm s3, s3, s2
; CHECK-NEXT: fmaxnm s4, s4, s2
; CHECK-NEXT: fmaxnm s0, s0, s2
; CHECK-NEXT: fmov s2, w8
; CHECK-NEXT: fminnm s1, s1, s2
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: fminnm s1, s3, s2
; CHECK-NEXT: fminnm s3, s4, s2
; CHECK-NEXT: fminnm s2, s0, s2
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: fcvtzu w8, s3
; CHECK-NEXT: mov v0.s[2], w8
; CHECK-NEXT: fcvtzu w8, s2
; CHECK-NEXT: mov v0.s[3], w8
; CHECK-NEXT: ret
%x = call <4 x i19> @llvm.fptoui.sat.v4f16.v4i19(<4 x half> %f)
ret <4 x i19> %x
}
define <4 x i32> @test_unsigned_v4f16_v4i32_duplicate(<4 x half> %f) {
; CHECK-LABEL: test_unsigned_v4f16_v4i32_duplicate:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: mov w8, #1333788671
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fmov s3, w8
; CHECK-NEXT: fcvtzu w8, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fcvt s2, h0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: fcvtzu w9, s2
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: mov h1, v0.h[2]
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fcvt s2, h0
; CHECK-NEXT: fmov s0, w9
; CHECK-NEXT: fcvtzu w9, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel w9, wzr, w9, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: mov v0.s[1], w8
; CHECK-NEXT: fcvtzu w8, s2
; CHECK-NEXT: csinv w9, w9, wzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: csel w8, wzr, w8, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: mov v0.s[2], w9
; CHECK-NEXT: csinv w8, w8, wzr, le
; CHECK-NEXT: mov v0.s[3], w8
; CHECK-NEXT: ret
%x = call <4 x i32> @llvm.fptoui.sat.v4f16.v4i32(<4 x half> %f)
ret <4 x i32> %x
}
define <4 x i50> @test_unsigned_v4f16_v4i50(<4 x half> %f) {
; CHECK-LABEL: test_unsigned_v4f16_v4i50:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fcvt s1, h0
; CHECK-NEXT: mov w8, #1484783615
; CHECK-NEXT: fcvtzu x10, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fmov s2, w8
; CHECK-NEXT: csel x8, xzr, x10, lt
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: mov x9, #1125899906842623
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fcvtzu x10, s1
; CHECK-NEXT: csel x0, x9, x8, gt
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: csel x8, xzr, x10, lt
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: mov h1, v0.h[2]
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fcvtzu x10, s1
; CHECK-NEXT: csel x1, x9, x8, gt
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: csel x8, xzr, x10, lt
; CHECK-NEXT: fcmp s1, s2
; CHECK-NEXT: fcvtzu x11, s0
; CHECK-NEXT: csel x2, x9, x8, gt
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: csel x8, xzr, x11, lt
; CHECK-NEXT: fcmp s0, s2
; CHECK-NEXT: csel x3, x9, x8, gt
; CHECK-NEXT: ret
%x = call <4 x i50> @llvm.fptoui.sat.v4f16.v4i50(<4 x half> %f)
ret <4 x i50> %x
}
define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) {
; CHECK-LABEL: test_unsigned_v4f16_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: mov w8, #1602224127
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fmov s3, w8
; CHECK-NEXT: fcvtzu x8, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fcvt s2, h0
; CHECK-NEXT: csel x8, xzr, x8, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: mov h1, v0.h[3]
; CHECK-NEXT: fcvtzu x9, s2
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s2, #0.0
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: csel x9, xzr, x9, lt
; CHECK-NEXT: fcmp s2, s3
; CHECK-NEXT: mov h0, v0.h[2]
; CHECK-NEXT: csinv x9, x9, xzr, le
; CHECK-NEXT: fcvtzu x10, s1
; CHECK-NEXT: fcmp s1, #0.0
; CHECK-NEXT: fcvt s4, h0
; CHECK-NEXT: csel x10, xzr, x10, lt
; CHECK-NEXT: fcmp s1, s3
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: fcvtzu x9, s4
; CHECK-NEXT: csinv x10, x10, xzr, le
; CHECK-NEXT: fcmp s4, #0.0
; CHECK-NEXT: csel x9, xzr, x9, lt
; CHECK-NEXT: fcmp s4, s3
; CHECK-NEXT: csinv x9, x9, xzr, le
; CHECK-NEXT: fmov d1, x9
; CHECK-NEXT: mov v0.d[1], x8
; CHECK-NEXT: mov v1.d[1], x10
; CHECK-NEXT: ret
%x = call <4 x i64> @llvm.fptoui.sat.v4f16.v4i64(<4 x half> %f)
ret <4 x i64> %x
}
define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
; CHECK-LABEL: test_unsigned_v4f16_v4i100:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #96 // =96
; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x25, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 96
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w25, -56
; CHECK-NEXT: .cfi_offset w30, -64
; CHECK-NEXT: .cfi_offset b8, -72
; CHECK-NEXT: .cfi_offset b9, -80
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov h1, v0.h[2]
; CHECK-NEXT: fcvt s8, h1
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov w8, #1904214015
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: fmov s9, w8
; CHECK-NEXT: mov h0, v0.h[1]
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: csel x10, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov x25, #68719476735
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: csel x19, x25, x10, gt
; CHECK-NEXT: csinv x20, x9, xzr, le
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: csel x21, x25, x9, gt
; CHECK-NEXT: csinv x22, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: csel x23, x25, x9, gt
; CHECK-NEXT: csinv x24, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: csel x1, x25, x9, gt
; CHECK-NEXT: mov x2, x22
; CHECK-NEXT: mov x3, x21
; CHECK-NEXT: mov x4, x20
; CHECK-NEXT: mov x5, x19
; CHECK-NEXT: mov x6, x24
; CHECK-NEXT: mov x7, x23
; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp x30, x25, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: add sp, sp, #96 // =96
; CHECK-NEXT: ret
%x = call <4 x i100> @llvm.fptoui.sat.v4f16.v4i100(<4 x half> %f)
ret <4 x i100> %x
}
define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
; CHECK-LABEL: test_unsigned_v4f16_v4i128:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #96 // =96
; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 96
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w24, -48
; CHECK-NEXT: .cfi_offset w30, -64
; CHECK-NEXT: .cfi_offset b8, -72
; CHECK-NEXT: .cfi_offset b9, -80
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: fcvt s8, h1
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov w8, #2139095039
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: fmov s9, w8
; CHECK-NEXT: mov h0, v0.h[2]
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: csel x10, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: csinv x19, x10, xzr, le
; CHECK-NEXT: csinv x20, x9, xzr, le
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: csel x8, xzr, x1, lt
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: csinv x21, x9, xzr, le
; CHECK-NEXT: csinv x22, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: csel x8, xzr, x1, lt
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: mov v0.16b, v8.16b
; CHECK-NEXT: csinv x23, x9, xzr, le
; CHECK-NEXT: csinv x24, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: mov x2, x19
; CHECK-NEXT: mov x3, x20
; CHECK-NEXT: mov x4, x21
; CHECK-NEXT: mov x5, x22
; CHECK-NEXT: mov x6, x23
; CHECK-NEXT: mov x7, x24
; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: csinv x1, x9, xzr, le
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov v0.d[1], x1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: add sp, sp, #96 // =96
; CHECK-NEXT: ret
%x = call <4 x i128> @llvm.fptoui.sat.v4f16.v4i128(<4 x half> %f)
ret <4 x i128> %x
}