llvm-project/llvm/test/CodeGen/AArch64/fp16-v16-instructions.ll

; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s


define <16 x half> @sitofp_i32(<16 x i32> %a) #0 {
; CHECK-LABEL: sitofp_i32:
; CHECK-DAG: scvtf [[S0:v[0-9]+\.4s]], v0.4s
; CHECK-DAG: scvtf [[S1:v[0-9]+\.4s]], v1.4s
; CHECK-DAG: scvtf [[S2:v[0-9]+\.4s]], v2.4s
; CHECK-DAG: scvtf [[S3:v[0-9]+\.4s]], v3.4s
; CHECK-DAG: fcvtn v0.4h, [[S0]]
; CHECK-DAG: fcvtn v1.4h, [[S2]]
; CHECK-DAG: v[[R1:[0-9]+]].4h, [[S1]]
; CHECK-DAG: v[[R3:[0-9]+]].4h, [[S3]]
; CHECK-DAg: ins v0.d[1], v[[R1]].d[0]
; CHECK-DAG: ins v1.d[1], v[[R3]].d[0]

  %1 = sitofp <16 x i32> %a to <16 x half>
  ret <16 x half> %1
}


define <16 x half> @sitofp_i64(<16 x i64> %a) #0 {
; CHECK-LABEL: sitofp_i64:
; CHECK-DAG: scvtf [[D0:v[0-9]+\.2d]], v0.2d
; CHECK-DAG: scvtf [[D1:v[0-9]+\.2d]], v1.2d
; CHECK-DAG: scvtf [[D2:v[0-9]+\.2d]], v2.2d
; CHECK-DAG: scvtf [[D3:v[0-9]+\.2d]], v3.2d
; CHECK-DAG: scvtf [[D4:v[0-9]+\.2d]], v4.2d
; CHECK-DAG: scvtf [[D5:v[0-9]+\.2d]], v5.2d
; CHECK-DAG: scvtf [[D6:v[0-9]+\.2d]], v6.2d
; CHECK-DAG: scvtf [[D7:v[0-9]+\.2d]], v7.2d

; CHECK-DAG: fcvtn [[S0:v[0-9]+]].2s, [[D0]]
; CHECK-DAG: fcvtn [[S1:v[0-9]+]].2s, [[D2]]
; CHECK-DAG: fcvtn [[S2:v[0-9]+]].2s, [[D4]]
; CHECK-DAG: fcvtn [[S3:v[0-9]+]].2s, [[D6]]

; CHECK-DAG: fcvtn2 [[S0]].4s, [[D1]]
; CHECK-DAG: fcvtn2 [[S1]].4s, [[D3]]
; CHECK-DAG: fcvtn2 [[S2]].4s, [[D5]]
; CHECK-DAG: fcvtn2 [[S3]].4s, [[D7]]

; CHECK-DAG: fcvtn v0.4h, [[S0]].4s
; CHECK-DAG: fcvtn v1.4h, [[S2]].4s
; CHECK-DAG: fcvtn v[[R1:[0-9]+]].4h, [[S1]].4s
; CHECK-DAG: fcvtn v[[R3:[0-9]+]].4h, [[S3]].4s
; CHECK-DAG: ins v0.d[1], v[[R1]].d[0]
; CHECK-DAG: ins v1.d[1], v[[R3]].d[0]

  %1 = sitofp <16 x i64> %a to <16 x half>
  ret <16 x half> %1
}


define <16 x half> @uitofp_i32(<16 x i32> %a) #0 {
; CHECK-LABEL: uitofp_i32:
; CHECK-DAG: ucvtf [[S0:v[0-9]+\.4s]], v0.4s
; CHECK-DAG: ucvtf [[S1:v[0-9]+\.4s]], v1.4s
; CHECK-DAG: ucvtf [[S2:v[0-9]+\.4s]], v2.4s
; CHECK-DAG: ucvtf [[S3:v[0-9]+\.4s]], v3.4s
; CHECK-DAG: fcvtn v0.4h, [[S0]]
; CHECK-DAG: fcvtn v1.4h, [[S2]]
; CHECK-DAG: v[[R1:[0-9]+]].4h, [[S1]]
; CHECK-DAG: v[[R3:[0-9]+]].4h, [[S3]]
; CHECK-DAg: ins v0.d[1], v[[R1]].d[0]
; CHECK-DAG: ins v1.d[1], v[[R3]].d[0]

  %1 = uitofp <16 x i32> %a to <16 x half>
  ret <16 x half> %1
}


define <16 x half> @uitofp_i64(<16 x i64> %a) #0 {
; CHECK-LABEL: uitofp_i64:
; CHECK-DAG: ucvtf [[D0:v[0-9]+\.2d]], v0.2d
; CHECK-DAG: ucvtf [[D1:v[0-9]+\.2d]], v1.2d
; CHECK-DAG: ucvtf [[D2:v[0-9]+\.2d]], v2.2d
; CHECK-DAG: ucvtf [[D3:v[0-9]+\.2d]], v3.2d
; CHECK-DAG: ucvtf [[D4:v[0-9]+\.2d]], v4.2d
; CHECK-DAG: ucvtf [[D5:v[0-9]+\.2d]], v5.2d
; CHECK-DAG: ucvtf [[D6:v[0-9]+\.2d]], v6.2d
; CHECK-DAG: ucvtf [[D7:v[0-9]+\.2d]], v7.2d

; CHECK-DAG: fcvtn [[S0:v[0-9]+]].2s, [[D0]]
; CHECK-DAG: fcvtn [[S1:v[0-9]+]].2s, [[D2]]
; CHECK-DAG: fcvtn [[S2:v[0-9]+]].2s, [[D4]]
; CHECK-DAG: fcvtn [[S3:v[0-9]+]].2s, [[D6]]

; CHECK-DAG: fcvtn2 [[S0]].4s, [[D1]]
; CHECK-DAG: fcvtn2 [[S1]].4s, [[D3]]
; CHECK-DAG: fcvtn2 [[S2]].4s, [[D5]]
; CHECK-DAG: fcvtn2 [[S3]].4s, [[D7]]

; CHECK-DAG: fcvtn v0.4h, [[S0]].4s
; CHECK-DAG: fcvtn v1.4h, [[S2]].4s
; CHECK-DAG: fcvtn v[[R1:[0-9]+]].4h, [[S1]].4s
; CHECK-DAG: fcvtn v[[R3:[0-9]+]].4h, [[S3]].4s
; CHECK-DAG: ins v0.d[1], v[[R1]].d[0]
; CHECK-DAG: ins v1.d[1], v[[R3]].d[0]

  %1 = uitofp <16 x i64> %a to <16 x half>
  ret <16 x half> %1
}

attributes #0 = { nounwind }
[AArch64] Handle vec4, vec8, vec16 *itofp for half Summary: Set operation action for SINT_TO_FP and UINT_TO_FP nodes with v4i32, v8i8, v8i16 inputs to allow promotion of v4f16 results. Add tests for sitofp and uitofp for vec4, vec8, vec16, and i8, i16, i32, and i64 vectors. Only missing tests are for v16i8 and v16i16 as the shift operations are too complicated to write a proper check sequence. The conversions from v4i64 to v4f16 do not depend on this patch - v4i64 is split and the conversion gets handled while lowering v2i64. I am adding a test here for completeness. Reviewers: aemerson, rengolin, ab, jmolloy, srhines Subscribers: rengolin, aemerson, llvm-commits Differential Revision: http://reviews.llvm.org/D9166 llvm-svn: 235609 2015-04-24 01:16:27 +08:00			`; RUN: llc < %s -mtriple=aarch64-none-eabi \| FileCheck %s`


			`define <16 x half> @sitofp_i32(<16 x i32> %a) #0 {`
			`; CHECK-LABEL: sitofp_i32:`
			`; CHECK-DAG: scvtf [[S0:v[0-9]+\.4s]], v0.4s`
			`; CHECK-DAG: scvtf [[S1:v[0-9]+\.4s]], v1.4s`
			`; CHECK-DAG: scvtf [[S2:v[0-9]+\.4s]], v2.4s`
			`; CHECK-DAG: scvtf [[S3:v[0-9]+\.4s]], v3.4s`
			`; CHECK-DAG: fcvtn v0.4h, [[S0]]`
			`; CHECK-DAG: fcvtn v1.4h, [[S2]]`
			`; CHECK-DAG: v[[R1:[0-9]+]].4h, [[S1]]`
			`; CHECK-DAG: v[[R3:[0-9]+]].4h, [[S3]]`
Revert r294437 as it broke an asan buildbot. llvm-svn: 294523 2017-02-09 05:41:16 +08:00			`; CHECK-DAg: ins v0.d[1], v[[R1]].d[0]`
			`; CHECK-DAG: ins v1.d[1], v[[R3]].d[0]`
[AArch64] Handle vec4, vec8, vec16 *itofp for half Summary: Set operation action for SINT_TO_FP and UINT_TO_FP nodes with v4i32, v8i8, v8i16 inputs to allow promotion of v4f16 results. Add tests for sitofp and uitofp for vec4, vec8, vec16, and i8, i16, i32, and i64 vectors. Only missing tests are for v16i8 and v16i16 as the shift operations are too complicated to write a proper check sequence. The conversions from v4i64 to v4f16 do not depend on this patch - v4i64 is split and the conversion gets handled while lowering v2i64. I am adding a test here for completeness. Reviewers: aemerson, rengolin, ab, jmolloy, srhines Subscribers: rengolin, aemerson, llvm-commits Differential Revision: http://reviews.llvm.org/D9166 llvm-svn: 235609 2015-04-24 01:16:27 +08:00
			`%1 = sitofp <16 x i32> %a to <16 x half>`
			`ret <16 x half> %1`
			`}`


			`define <16 x half> @sitofp_i64(<16 x i64> %a) #0 {`
			`; CHECK-LABEL: sitofp_i64:`
			`; CHECK-DAG: scvtf [[D0:v[0-9]+\.2d]], v0.2d`
			`; CHECK-DAG: scvtf [[D1:v[0-9]+\.2d]], v1.2d`
			`; CHECK-DAG: scvtf [[D2:v[0-9]+\.2d]], v2.2d`
			`; CHECK-DAG: scvtf [[D3:v[0-9]+\.2d]], v3.2d`
			`; CHECK-DAG: scvtf [[D4:v[0-9]+\.2d]], v4.2d`
			`; CHECK-DAG: scvtf [[D5:v[0-9]+\.2d]], v5.2d`
			`; CHECK-DAG: scvtf [[D6:v[0-9]+\.2d]], v6.2d`
			`; CHECK-DAG: scvtf [[D7:v[0-9]+\.2d]], v7.2d`

			`; CHECK-DAG: fcvtn [[S0:v[0-9]+]].2s, [[D0]]`
			`; CHECK-DAG: fcvtn [[S1:v[0-9]+]].2s, [[D2]]`
			`; CHECK-DAG: fcvtn [[S2:v[0-9]+]].2s, [[D4]]`
			`; CHECK-DAG: fcvtn [[S3:v[0-9]+]].2s, [[D6]]`

			`; CHECK-DAG: fcvtn2 [[S0]].4s, [[D1]]`
			`; CHECK-DAG: fcvtn2 [[S1]].4s, [[D3]]`
			`; CHECK-DAG: fcvtn2 [[S2]].4s, [[D5]]`
			`; CHECK-DAG: fcvtn2 [[S3]].4s, [[D7]]`

			`; CHECK-DAG: fcvtn v0.4h, [[S0]].4s`
			`; CHECK-DAG: fcvtn v1.4h, [[S2]].4s`
			`; CHECK-DAG: fcvtn v[[R1:[0-9]+]].4h, [[S1]].4s`
			`; CHECK-DAG: fcvtn v[[R3:[0-9]+]].4h, [[S3]].4s`
Revert r294437 as it broke an asan buildbot. llvm-svn: 294523 2017-02-09 05:41:16 +08:00			`; CHECK-DAG: ins v0.d[1], v[[R1]].d[0]`
			`; CHECK-DAG: ins v1.d[1], v[[R3]].d[0]`
[AArch64] Handle vec4, vec8, vec16 *itofp for half Summary: Set operation action for SINT_TO_FP and UINT_TO_FP nodes with v4i32, v8i8, v8i16 inputs to allow promotion of v4f16 results. Add tests for sitofp and uitofp for vec4, vec8, vec16, and i8, i16, i32, and i64 vectors. Only missing tests are for v16i8 and v16i16 as the shift operations are too complicated to write a proper check sequence. The conversions from v4i64 to v4f16 do not depend on this patch - v4i64 is split and the conversion gets handled while lowering v2i64. I am adding a test here for completeness. Reviewers: aemerson, rengolin, ab, jmolloy, srhines Subscribers: rengolin, aemerson, llvm-commits Differential Revision: http://reviews.llvm.org/D9166 llvm-svn: 235609 2015-04-24 01:16:27 +08:00
			`%1 = sitofp <16 x i64> %a to <16 x half>`
			`ret <16 x half> %1`
			`}`


			`define <16 x half> @uitofp_i32(<16 x i32> %a) #0 {`
			`; CHECK-LABEL: uitofp_i32:`
			`; CHECK-DAG: ucvtf [[S0:v[0-9]+\.4s]], v0.4s`
			`; CHECK-DAG: ucvtf [[S1:v[0-9]+\.4s]], v1.4s`
			`; CHECK-DAG: ucvtf [[S2:v[0-9]+\.4s]], v2.4s`
			`; CHECK-DAG: ucvtf [[S3:v[0-9]+\.4s]], v3.4s`
			`; CHECK-DAG: fcvtn v0.4h, [[S0]]`
			`; CHECK-DAG: fcvtn v1.4h, [[S2]]`
			`; CHECK-DAG: v[[R1:[0-9]+]].4h, [[S1]]`
			`; CHECK-DAG: v[[R3:[0-9]+]].4h, [[S3]]`
Revert r294437 as it broke an asan buildbot. llvm-svn: 294523 2017-02-09 05:41:16 +08:00			`; CHECK-DAg: ins v0.d[1], v[[R1]].d[0]`
			`; CHECK-DAG: ins v1.d[1], v[[R3]].d[0]`
[AArch64] Handle vec4, vec8, vec16 *itofp for half Summary: Set operation action for SINT_TO_FP and UINT_TO_FP nodes with v4i32, v8i8, v8i16 inputs to allow promotion of v4f16 results. Add tests for sitofp and uitofp for vec4, vec8, vec16, and i8, i16, i32, and i64 vectors. Only missing tests are for v16i8 and v16i16 as the shift operations are too complicated to write a proper check sequence. The conversions from v4i64 to v4f16 do not depend on this patch - v4i64 is split and the conversion gets handled while lowering v2i64. I am adding a test here for completeness. Reviewers: aemerson, rengolin, ab, jmolloy, srhines Subscribers: rengolin, aemerson, llvm-commits Differential Revision: http://reviews.llvm.org/D9166 llvm-svn: 235609 2015-04-24 01:16:27 +08:00
			`%1 = uitofp <16 x i32> %a to <16 x half>`
			`ret <16 x half> %1`
			`}`


			`define <16 x half> @uitofp_i64(<16 x i64> %a) #0 {`
			`; CHECK-LABEL: uitofp_i64:`
			`; CHECK-DAG: ucvtf [[D0:v[0-9]+\.2d]], v0.2d`
			`; CHECK-DAG: ucvtf [[D1:v[0-9]+\.2d]], v1.2d`
			`; CHECK-DAG: ucvtf [[D2:v[0-9]+\.2d]], v2.2d`
			`; CHECK-DAG: ucvtf [[D3:v[0-9]+\.2d]], v3.2d`
			`; CHECK-DAG: ucvtf [[D4:v[0-9]+\.2d]], v4.2d`
			`; CHECK-DAG: ucvtf [[D5:v[0-9]+\.2d]], v5.2d`
			`; CHECK-DAG: ucvtf [[D6:v[0-9]+\.2d]], v6.2d`
			`; CHECK-DAG: ucvtf [[D7:v[0-9]+\.2d]], v7.2d`

			`; CHECK-DAG: fcvtn [[S0:v[0-9]+]].2s, [[D0]]`
			`; CHECK-DAG: fcvtn [[S1:v[0-9]+]].2s, [[D2]]`
			`; CHECK-DAG: fcvtn [[S2:v[0-9]+]].2s, [[D4]]`
			`; CHECK-DAG: fcvtn [[S3:v[0-9]+]].2s, [[D6]]`

			`; CHECK-DAG: fcvtn2 [[S0]].4s, [[D1]]`
			`; CHECK-DAG: fcvtn2 [[S1]].4s, [[D3]]`
			`; CHECK-DAG: fcvtn2 [[S2]].4s, [[D5]]`
			`; CHECK-DAG: fcvtn2 [[S3]].4s, [[D7]]`

			`; CHECK-DAG: fcvtn v0.4h, [[S0]].4s`
			`; CHECK-DAG: fcvtn v1.4h, [[S2]].4s`
			`; CHECK-DAG: fcvtn v[[R1:[0-9]+]].4h, [[S1]].4s`
			`; CHECK-DAG: fcvtn v[[R3:[0-9]+]].4h, [[S3]].4s`
Revert r294437 as it broke an asan buildbot. llvm-svn: 294523 2017-02-09 05:41:16 +08:00			`; CHECK-DAG: ins v0.d[1], v[[R1]].d[0]`
			`; CHECK-DAG: ins v1.d[1], v[[R3]].d[0]`
[AArch64] Handle vec4, vec8, vec16 *itofp for half Summary: Set operation action for SINT_TO_FP and UINT_TO_FP nodes with v4i32, v8i8, v8i16 inputs to allow promotion of v4f16 results. Add tests for sitofp and uitofp for vec4, vec8, vec16, and i8, i16, i32, and i64 vectors. Only missing tests are for v16i8 and v16i16 as the shift operations are too complicated to write a proper check sequence. The conversions from v4i64 to v4f16 do not depend on this patch - v4i64 is split and the conversion gets handled while lowering v2i64. I am adding a test here for completeness. Reviewers: aemerson, rengolin, ab, jmolloy, srhines Subscribers: rengolin, aemerson, llvm-commits Differential Revision: http://reviews.llvm.org/D9166 llvm-svn: 235609 2015-04-24 01:16:27 +08:00
			`%1 = uitofp <16 x i64> %a to <16 x half>`
			`ret <16 x half> %1`
			`}`

			`attributes #0 = { nounwind }`