llvm-project/llvm/test/CodeGen/X86/half.ll

; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -asm-verbose=false \
; RUN:   | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -asm-verbose=false \
; RUN:    | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C

define void @test_load_store(half* %in, half* %out) {
; CHECK-LABEL: test_load_store:
; CHECK: movw (%rdi), [[TMP:%[a-z0-9]+]]
; CHECK: movw [[TMP]], (%rsi)
  %val = load half, half* %in
  store half %val, half* %out
  ret void
}

define i16 @test_bitcast_from_half(half* %addr) {
; CHECK-LABEL: test_bitcast_from_half:
; CHECK: movzwl (%rdi), %eax
  %val = load half, half* %addr
  %val_int = bitcast half %val to i16
  ret i16 %val_int
}

define void @test_bitcast_to_half(half* %addr, i16 %in) {
; CHECK-LABEL: test_bitcast_to_half:
; CHECK: movw %si, (%rdi)
  %val_fp = bitcast i16 %in to half
  store half %val_fp, half* %addr
  ret void
}

define float @test_extend32(half* %addr) {
; CHECK-LABEL: test_extend32:

; CHECK-LIBCALL: jmp __gnu_h2f_ieee
; CHECK-F16C: vcvtph2ps
  %val16 = load half, half* %addr
  %val32 = fpext half %val16 to float
  ret float %val32
}

define double @test_extend64(half* %addr) {
; CHECK-LABEL: test_extend64:

; CHECK-LIBCALL: callq __gnu_h2f_ieee
; CHECK-LIBCALL: cvtss2sd
; CHECK-F16C: vcvtph2ps
; CHECK-F16C: vcvtss2sd
  %val16 = load half, half* %addr
  %val32 = fpext half %val16 to double
  ret double %val32
}

define void @test_trunc32(float %in, half* %addr) {
; CHECK-LABEL: test_trunc32:

; CHECK-LIBCALL: callq __gnu_f2h_ieee
; CHECK-F16C: vcvtps2ph
  %val16 = fptrunc float %in to half
  store half %val16, half* %addr
  ret void
}

define void @test_trunc64(double %in, half* %addr) {
; CHECK-LABEL: test_trunc64:

; CHECK-LIBCALL: callq __truncdfhf2
; CHECK-F16C: callq __truncdfhf2
  %val16 = fptrunc double %in to half
  store half %val16, half* %addr
  ret void
}

define i64 @test_fptosi_i64(half* %p) #0 {
; CHECK-LABEL: test_fptosi_i64:

; CHECK-LIBCALL-NEXT: pushq %rax
; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi
; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax
; CHECK-LIBCALL-NEXT: popq %rcx
; CHECK-LIBCALL-NEXT: retq

; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vcvttss2si [[REG2]], %rax
; CHECK-F16C-NEXT: retq
  %a = load half, half* %p, align 2
  %r = fptosi half %a to i64
  ret i64 %r
}

define void @test_sitofp_i64(i64 %a, half* %p) #0 {
; CHECK-LABEL: test_sitofp_i64:

; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z]+]]
; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]]
; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0
; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]])
; CHECK_LIBCALL-NEXT: popq [[ADDR]]
; CHECK_LIBCALL-NEXT: retq

; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]]
; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG0]], [[REG0]]
; CHECK-F16C-NEXT: vmovd [[REG0]], %eax
; CHECK-F16C-NEXT: movw %ax, (%rsi)
; CHECK-F16C-NEXT: retq
  %r = sitofp i64 %a to half
  store half %r, half* %p
  ret void
}

define i64 @test_fptoui_i64(half* %p) #0 {
; CHECK-LABEL: test_fptoui_i64:

; FP_TO_UINT is expanded using FP_TO_SINT
; CHECK-LIBCALL-NEXT: pushq %rax
; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi
; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
; CHECK-LIBCALL-NEXT: movss {{.[A-Z_0-9]+}}(%rip), [[REG1:%[a-z0-9]+]]
; CHECK-LIBCALL-NEXT: movaps %xmm0, [[REG2:%[a-z0-9]+]]
; CHECK-LIBCALL-NEXT: subss [[REG1]], [[REG2]]
; CHECK-LIBCALL-NEXT: cvttss2si [[REG2]], [[REG3:%[a-z0-9]+]]
; CHECK-LIBCALL-NEXT: movabsq  $-9223372036854775808, [[REG4:%[a-z0-9]+]]
; CHECK-LIBCALL-NEXT: xorq [[REG3]], [[REG4]]
; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, [[REG5:%[a-z0-9]+]]
; CHECK-LIBCALL-NEXT: ucomiss [[REG1]], %xmm0
; CHECK-LIBCALL-NEXT: cmovaeq [[REG4]], [[REG5]]
; CHECK-LIBCALL-NEXT: popq %rcx
; CHECK-LIBCALL-NEXT: retq

; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vmovss {{.[A-Z_0-9]+}}(%rip), [[REG3:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vsubss [[REG3]], [[REG2]], [[REG4:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vcvttss2si [[REG4]], [[REG5:%[a-z0-9]+]]
; CHECK-F16C-NEXT: movabsq $-9223372036854775808, [[REG6:%[a-z0-9]+]]
; CHECK-F16C-NEXT: xorq [[REG5]], [[REG6:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vcvttss2si [[REG2]], [[REG7:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vucomiss [[REG3]], [[REG2]]
; CHECK-F16C-NEXT: cmovaeq [[REG6]], %rax
; CHECK-F16C-NEXT: retq
  %a = load half, half* %p, align 2
  %r = fptoui half %a to i64
  ret i64 %r
}

define void @test_uitofp_i64(i64 %a, half* %p) #0 {
; CHECK-LABEL: test_uitofp_i64:
; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z0-9]+]]
; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]]
; CHECK-NEXT: movl %edi, [[REG0:%[a-z0-9]+]]
; CHECK-NEXT: andl $1, [[REG0]]
; CHECK-NEXT: testq %rdi, %rdi
; CHECK-NEXT: js [[LABEL1:.LBB[0-9_]+]]

; simple conversion to float if non-negative
; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]], [[REG1]]
; CHECK-NEXT: jmp [[LABEL2:.LBB[0-9_]+]]

; convert using shift+or if negative
; CHECK-NEXT: [[LABEL1]]:
; CHECK-NEXT: shrq %rdi
; CHECK-NEXT: orq %rdi, [[REG2:%[a-z0-9]+]]
; CHECK-LIBCALL-NEXT: cvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]]
; CHECK-LIBCALL-NEXT: addss [[REG3]], [[REG1]]
; CHECK-F16C-NEXT: vcvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]], [[REG3]]
; CHECK-F16C-NEXT: vaddss [[REG3]], [[REG3]], [[REG1:[%a-z0-9]+]]

; convert float to half
; CHECK-NEXT: [[LABEL2]]:
; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]])
; CHECK-LIBCALL-NEXT: popq [[ADDR]]
; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG1]], [[REG4:%[a-z0-9]+]]
; CHECK-F16C-NEXT: vmovd [[REG4]], %eax
; CHECK-F16C-NEXT: movw %ax, (%rsi)
; CHECK-NEXT: retq

  %r = uitofp i64 %a to half
  store half %r, half* %p
  ret void
}

define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 {
; CHECK-LABEL: test_extend32_vec4:

; CHECK-LIBCALL: callq __gnu_h2f_ieee
; CHECK-LIBCALL: callq __gnu_h2f_ieee
; CHECK-LIBCALL: callq __gnu_h2f_ieee
; CHECK-LIBCALL: callq __gnu_h2f_ieee
; CHECK-F16C: vcvtph2ps
; CHECK-F16C: vcvtph2ps
; CHECK-F16C: vcvtph2ps
; CHECK-F16C: vcvtph2ps
  %a = load <4 x half>, <4 x half>* %p, align 8
  %b = fpext <4 x half> %a to <4 x float>
  ret <4 x float> %b
}

define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 {
; CHECK-LABEL: test_extend64_vec4

; CHECK-LIBCALL: callq __gnu_h2f_ieee
; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee
; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee
; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee
; CHECK-LIBCALL-DAG: cvtss2sd
; CHECK-LIBCALL-DAG: cvtss2sd
; CHECK-LIBCALL-DAG: cvtss2sd
; CHECK-LIBCALL: cvtss2sd
; CHECK-F16C: vcvtph2ps
; CHECK-F16C-DAG: vcvtph2ps
; CHECK-F16C-DAG: vcvtph2ps
; CHECK-F16C-DAG: vcvtph2ps
; CHECK-F16C-DAG: vcvtss2sd
; CHECK-F16C-DAG: vcvtss2sd
; CHECK-F16C-DAG: vcvtss2sd
; CHECK-F16C: vcvtss2sd
  %a = load <4 x half>, <4 x half>* %p, align 8
  %b = fpext <4 x half> %a to <4 x double>
  ret <4 x double> %b
}

define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) {
; CHECK-LABEL: test_trunc32_vec4:

; CHECK-LIBCALL: callq __gnu_f2h_ieee
; CHECK-LIBCALL: callq __gnu_f2h_ieee
; CHECK-LIBCALL: callq __gnu_f2h_ieee
; CHECK-LIBCALL: callq __gnu_f2h_ieee
; CHECK-F16C: vcvtps2ph
; CHECK-F16C: vcvtps2ph
; CHECK-F16C: vcvtps2ph
; CHECK-F16C: vcvtps2ph
; CHECK: movw
; CHECK: movw
; CHECK: movw
; CHECK: movw
  %v = fptrunc <4 x float> %a to <4 x half>
  store <4 x half> %v, <4 x half>* %p
  ret void
}

define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) {
; CHECK-LABEL: test_trunc64_vec4:
; CHECK: callq  __truncdfhf2
; CHECK: callq  __truncdfhf2
; CHECK: callq  __truncdfhf2
; CHECK: callq  __truncdfhf2
; CHECK: movw
; CHECK: movw
; CHECK: movw
; CHECK: movw
  %v = fptrunc <4 x double> %a to <4 x half>
  store <4 x half> %v, <4 x half>* %p
  ret void
}

attributes #0 = { nounwind }
[X86] Updates to X86 backend for f16 promotion Summary: r235215 adds support for f16 to be considered as a load/store type and promote f16 operations to f32. This patch has miscellaneous fixes for the X86 backend so all f16 operations are handled: 1. Set loadextaction for f16 vectors to expand. 2. Handle FP_EXTEND in a switch statement when handling v2f32 3. Do not fold (FP_TO_SINT (load f16)) into FP_TO_INT*_IN_MEM or (store (SINT_TO_FP )) to a FILD. Tests included. Reviewers: ab, srhines, delena Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D9092 llvm-svn: 237004 2015-05-12 01:14:39 +08:00			`; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -asm-verbose=false \`
			`; RUN: \| FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL`
			`; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -asm-verbose=false \`
			`; RUN: \| FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C`
CodeGen: soften f16 type by default instead of marking legal. Actual support for softening f16 operations is still limited, and can be added when it's needed. But Soften is much closer to being a useful thing to try than keeping it Legal when no registers can actually hold such values. Longer term, we probably want something between Soften and Promote semantics for most targets, it'll be more efficient to promote the 4 basic operations to f32 than libcall them. llvm-svn: 213372 2014-07-18 20:41:46 +08:00
			`define void @test_load_store(half* %in, half* %out) {`
			`; CHECK-LABEL: test_load_store:`
			`; CHECK: movw (%rdi), [[TMP:%[a-z0-9]+]]`
			`; CHECK: movw [[TMP]], (%rsi)`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%val = load half, half* %in`
CodeGen: soften f16 type by default instead of marking legal. Actual support for softening f16 operations is still limited, and can be added when it's needed. But Soften is much closer to being a useful thing to try than keeping it Legal when no registers can actually hold such values. Longer term, we probably want something between Soften and Promote semantics for most targets, it'll be more efficient to promote the 4 basic operations to f32 than libcall them. llvm-svn: 213372 2014-07-18 20:41:46 +08:00			`store half %val, half* %out`
			`ret void`
			`}`

			`define i16 @test_bitcast_from_half(half* %addr) {`
			`; CHECK-LABEL: test_bitcast_from_half:`
			`; CHECK: movzwl (%rdi), %eax`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%val = load half, half* %addr`
CodeGen: soften f16 type by default instead of marking legal. Actual support for softening f16 operations is still limited, and can be added when it's needed. But Soften is much closer to being a useful thing to try than keeping it Legal when no registers can actually hold such values. Longer term, we probably want something between Soften and Promote semantics for most targets, it'll be more efficient to promote the 4 basic operations to f32 than libcall them. llvm-svn: 213372 2014-07-18 20:41:46 +08:00			`%val_int = bitcast half %val to i16`
			`ret i16 %val_int`
			`}`

			`define void @test_bitcast_to_half(half* %addr, i16 %in) {`
			`; CHECK-LABEL: test_bitcast_to_half:`
			`; CHECK: movw %si, (%rdi)`
			`%val_fp = bitcast i16 %in to half`
			`store half %val_fp, half* %addr`
			`ret void`
			`}`
X86: support fpext/fptrunc operations to and from 16-bit floats. llvm-svn: 213374 2014-07-18 21:01:25 +08:00
			`define float @test_extend32(half* %addr) {`
			`; CHECK-LABEL: test_extend32:`

			`; CHECK-LIBCALL: jmp __gnu_h2f_ieee`
[X86] Updates to X86 backend for f16 promotion Summary: r235215 adds support for f16 to be considered as a load/store type and promote f16 operations to f32. This patch has miscellaneous fixes for the X86 backend so all f16 operations are handled: 1. Set loadextaction for f16 vectors to expand. 2. Handle FP_EXTEND in a switch statement when handling v2f32 3. Do not fold (FP_TO_SINT (load f16)) into FP_TO_INT*_IN_MEM or (store (SINT_TO_FP )) to a FILD. Tests included. Reviewers: ab, srhines, delena Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D9092 llvm-svn: 237004 2015-05-12 01:14:39 +08:00			`; CHECK-F16C: vcvtph2ps`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%val16 = load half, half* %addr`
X86: support fpext/fptrunc operations to and from 16-bit floats. llvm-svn: 213374 2014-07-18 21:01:25 +08:00			`%val32 = fpext half %val16 to float`
			`ret float %val32`
			`}`

			`define double @test_extend64(half* %addr) {`
			`; CHECK-LABEL: test_extend64:`

			`; CHECK-LIBCALL: callq __gnu_h2f_ieee`
			`; CHECK-LIBCALL: cvtss2sd`
[X86] Updates to X86 backend for f16 promotion Summary: r235215 adds support for f16 to be considered as a load/store type and promote f16 operations to f32. This patch has miscellaneous fixes for the X86 backend so all f16 operations are handled: 1. Set loadextaction for f16 vectors to expand. 2. Handle FP_EXTEND in a switch statement when handling v2f32 3. Do not fold (FP_TO_SINT (load f16)) into FP_TO_INT*_IN_MEM or (store (SINT_TO_FP )) to a FILD. Tests included. Reviewers: ab, srhines, delena Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D9092 llvm-svn: 237004 2015-05-12 01:14:39 +08:00			`; CHECK-F16C: vcvtph2ps`
			`; CHECK-F16C: vcvtss2sd`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%val16 = load half, half* %addr`
X86: support fpext/fptrunc operations to and from 16-bit floats. llvm-svn: 213374 2014-07-18 21:01:25 +08:00			`%val32 = fpext half %val16 to double`
			`ret double %val32`
			`}`

			`define void @test_trunc32(float %in, half* %addr) {`
			`; CHECK-LABEL: test_trunc32:`

			`; CHECK-LIBCALL: callq __gnu_f2h_ieee`
[X86] Updates to X86 backend for f16 promotion Summary: r235215 adds support for f16 to be considered as a load/store type and promote f16 operations to f32. This patch has miscellaneous fixes for the X86 backend so all f16 operations are handled: 1. Set loadextaction for f16 vectors to expand. 2. Handle FP_EXTEND in a switch statement when handling v2f32 3. Do not fold (FP_TO_SINT (load f16)) into FP_TO_INT*_IN_MEM or (store (SINT_TO_FP )) to a FILD. Tests included. Reviewers: ab, srhines, delena Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D9092 llvm-svn: 237004 2015-05-12 01:14:39 +08:00			`; CHECK-F16C: vcvtps2ph`
X86: support fpext/fptrunc operations to and from 16-bit floats. llvm-svn: 213374 2014-07-18 21:01:25 +08:00			`%val16 = fptrunc float %in to half`
			`store half %val16, half* %addr`
			`ret void`
			`}`

			`define void @test_trunc64(double %in, half* %addr) {`
			`; CHECK-LABEL: test_trunc64:`

			`; CHECK-LIBCALL: callq __truncdfhf2`
[X86] Updates to X86 backend for f16 promotion Summary: r235215 adds support for f16 to be considered as a load/store type and promote f16 operations to f32. This patch has miscellaneous fixes for the X86 backend so all f16 operations are handled: 1. Set loadextaction for f16 vectors to expand. 2. Handle FP_EXTEND in a switch statement when handling v2f32 3. Do not fold (FP_TO_SINT (load f16)) into FP_TO_INT*_IN_MEM or (store (SINT_TO_FP )) to a FILD. Tests included. Reviewers: ab, srhines, delena Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D9092 llvm-svn: 237004 2015-05-12 01:14:39 +08:00			`; CHECK-F16C: callq __truncdfhf2`
X86: support fpext/fptrunc operations to and from 16-bit floats. llvm-svn: 213374 2014-07-18 21:01:25 +08:00			`%val16 = fptrunc double %in to half`
			`store half %val16, half* %addr`
			`ret void`
			`}`
[X86] Updates to X86 backend for f16 promotion Summary: r235215 adds support for f16 to be considered as a load/store type and promote f16 operations to f32. This patch has miscellaneous fixes for the X86 backend so all f16 operations are handled: 1. Set loadextaction for f16 vectors to expand. 2. Handle FP_EXTEND in a switch statement when handling v2f32 3. Do not fold (FP_TO_SINT (load f16)) into FP_TO_INT*_IN_MEM or (store (SINT_TO_FP )) to a FILD. Tests included. Reviewers: ab, srhines, delena Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D9092 llvm-svn: 237004 2015-05-12 01:14:39 +08:00
			`define i64 @test_fptosi_i64(half* %p) #0 {`
			`; CHECK-LABEL: test_fptosi_i64:`

			`; CHECK-LIBCALL-NEXT: pushq %rax`
			`; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi`
			`; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee`
			`; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax`
findDeadCallerSavedReg needs to pay attention to calling convention Caller saved regs differ between SysV and Win64. Use the tail call available set to scavenge from. Refactor register info to create new helper to get at tail call GPRs. Added a new test case for windows. Fixed up a number of X64 tests since now RCX is preferred over RDX on SysV. Differential Revision: http://reviews.llvm.org/D14878 llvm-svn: 253927 2015-11-24 06:17:44 +08:00			`; CHECK-LIBCALL-NEXT: popq %rcx`
[X86] Updates to X86 backend for f16 promotion Summary: r235215 adds support for f16 to be considered as a load/store type and promote f16 operations to f32. This patch has miscellaneous fixes for the X86 backend so all f16 operations are handled: 1. Set loadextaction for f16 vectors to expand. 2. Handle FP_EXTEND in a switch statement when handling v2f32 3. Do not fold (FP_TO_SINT (load f16)) into FP_TO_INT*_IN_MEM or (store (SINT_TO_FP )) to a FILD. Tests included. Reviewers: ab, srhines, delena Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D9092 llvm-svn: 237004 2015-05-12 01:14:39 +08:00			`; CHECK-LIBCALL-NEXT: retq`

			`; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]]`
			`; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]]`
			`; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]]`
			`; CHECK-F16C-NEXT: vcvttss2si [[REG2]], %rax`
			`; CHECK-F16C-NEXT: retq`
			`%a = load half, half* %p, align 2`
			`%r = fptosi half %a to i64`
			`ret i64 %r`
			`}`

			`define void @test_sitofp_i64(i64 %a, half* %p) #0 {`
			`; CHECK-LABEL: test_sitofp_i64:`

			`; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z]+]]`
			`; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]]`
			`; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0`
			`; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee`
			`; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]])`
			`; CHECK_LIBCALL-NEXT: popq [[ADDR]]`
			`; CHECK_LIBCALL-NEXT: retq`

			`; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]]`
			`; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG0]], [[REG0]]`
			`; CHECK-F16C-NEXT: vmovd [[REG0]], %eax`
			`; CHECK-F16C-NEXT: movw %ax, (%rsi)`
			`; CHECK-F16C-NEXT: retq`
			`%r = sitofp i64 %a to half`
			`store half %r, half* %p`
			`ret void`
			`}`

			`define i64 @test_fptoui_i64(half* %p) #0 {`
			`; CHECK-LABEL: test_fptoui_i64:`

			`; FP_TO_UINT is expanded using FP_TO_SINT`
			`; CHECK-LIBCALL-NEXT: pushq %rax`
			`; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi`
			`; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee`
			`; CHECK-LIBCALL-NEXT: movss {{.[A-Z_0-9]+}}(%rip), [[REG1:%[a-z0-9]+]]`
			`; CHECK-LIBCALL-NEXT: movaps %xmm0, [[REG2:%[a-z0-9]+]]`
			`; CHECK-LIBCALL-NEXT: subss [[REG1]], [[REG2]]`
			`; CHECK-LIBCALL-NEXT: cvttss2si [[REG2]], [[REG3:%[a-z0-9]+]]`
			`; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, [[REG4:%[a-z0-9]+]]`
			`; CHECK-LIBCALL-NEXT: xorq [[REG3]], [[REG4]]`
			`; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, [[REG5:%[a-z0-9]+]]`
			`; CHECK-LIBCALL-NEXT: ucomiss [[REG1]], %xmm0`
			`; CHECK-LIBCALL-NEXT: cmovaeq [[REG4]], [[REG5]]`
findDeadCallerSavedReg needs to pay attention to calling convention Caller saved regs differ between SysV and Win64. Use the tail call available set to scavenge from. Refactor register info to create new helper to get at tail call GPRs. Added a new test case for windows. Fixed up a number of X64 tests since now RCX is preferred over RDX on SysV. Differential Revision: http://reviews.llvm.org/D14878 llvm-svn: 253927 2015-11-24 06:17:44 +08:00			`; CHECK-LIBCALL-NEXT: popq %rcx`
[X86] Updates to X86 backend for f16 promotion Summary: r235215 adds support for f16 to be considered as a load/store type and promote f16 operations to f32. This patch has miscellaneous fixes for the X86 backend so all f16 operations are handled: 1. Set loadextaction for f16 vectors to expand. 2. Handle FP_EXTEND in a switch statement when handling v2f32 3. Do not fold (FP_TO_SINT (load f16)) into FP_TO_INT*_IN_MEM or (store (SINT_TO_FP )) to a FILD. Tests included. Reviewers: ab, srhines, delena Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D9092 llvm-svn: 237004 2015-05-12 01:14:39 +08:00			`; CHECK-LIBCALL-NEXT: retq`

			`; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]]`
			`; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]]`
			`; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]]`
			`; CHECK-F16C-NEXT: vmovss {{.[A-Z_0-9]+}}(%rip), [[REG3:%[a-z0-9]+]]`
			`; CHECK-F16C-NEXT: vsubss [[REG3]], [[REG2]], [[REG4:%[a-z0-9]+]]`
			`; CHECK-F16C-NEXT: vcvttss2si [[REG4]], [[REG5:%[a-z0-9]+]]`
			`; CHECK-F16C-NEXT: movabsq $-9223372036854775808, [[REG6:%[a-z0-9]+]]`
			`; CHECK-F16C-NEXT: xorq [[REG5]], [[REG6:%[a-z0-9]+]]`
			`; CHECK-F16C-NEXT: vcvttss2si [[REG2]], [[REG7:%[a-z0-9]+]]`
			`; CHECK-F16C-NEXT: vucomiss [[REG3]], [[REG2]]`
			`; CHECK-F16C-NEXT: cmovaeq [[REG6]], %rax`
			`; CHECK-F16C-NEXT: retq`
			`%a = load half, half* %p, align 2`
			`%r = fptoui half %a to i64`
			`ret i64 %r`
			`}`

			`define void @test_uitofp_i64(i64 %a, half* %p) #0 {`
			`; CHECK-LABEL: test_uitofp_i64:`
			`; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z0-9]+]]`
			`; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]]`
			`; CHECK-NEXT: movl %edi, [[REG0:%[a-z0-9]+]]`
			`; CHECK-NEXT: andl $1, [[REG0]]`
			`; CHECK-NEXT: testq %rdi, %rdi`
			`; CHECK-NEXT: js [[LABEL1:.LBB[0-9_]+]]`

			`; simple conversion to float if non-negative`
			`; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]]`
			`; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]], [[REG1]]`
			`; CHECK-NEXT: jmp [[LABEL2:.LBB[0-9_]+]]`

			`; convert using shift+or if negative`
			`; CHECK-NEXT: [[LABEL1]]:`
			`; CHECK-NEXT: shrq %rdi`
			`; CHECK-NEXT: orq %rdi, [[REG2:%[a-z0-9]+]]`
			`; CHECK-LIBCALL-NEXT: cvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]]`
			`; CHECK-LIBCALL-NEXT: addss [[REG3]], [[REG1]]`
			`; CHECK-F16C-NEXT: vcvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]], [[REG3]]`
			`; CHECK-F16C-NEXT: vaddss [[REG3]], [[REG3]], [[REG1:[%a-z0-9]+]]`

			`; convert float to half`
			`; CHECK-NEXT: [[LABEL2]]:`
			`; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee`
			`; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]])`
			`; CHECK-LIBCALL-NEXT: popq [[ADDR]]`
			`; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG1]], [[REG4:%[a-z0-9]+]]`
			`; CHECK-F16C-NEXT: vmovd [[REG4]], %eax`
			`; CHECK-F16C-NEXT: movw %ax, (%rsi)`
			`; CHECK-NEXT: retq`

			`%r = uitofp i64 %a to half`
			`store half %r, half* %p`
			`ret void`
			`}`

			`define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 {`
			`; CHECK-LABEL: test_extend32_vec4:`

			`; CHECK-LIBCALL: callq __gnu_h2f_ieee`
			`; CHECK-LIBCALL: callq __gnu_h2f_ieee`
			`; CHECK-LIBCALL: callq __gnu_h2f_ieee`
			`; CHECK-LIBCALL: callq __gnu_h2f_ieee`
			`; CHECK-F16C: vcvtph2ps`
			`; CHECK-F16C: vcvtph2ps`
			`; CHECK-F16C: vcvtph2ps`
			`; CHECK-F16C: vcvtph2ps`
			`%a = load <4 x half>, <4 x half>* %p, align 8`
			`%b = fpext <4 x half> %a to <4 x float>`
			`ret <4 x float> %b`
			`}`

			`define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 {`
			`; CHECK-LABEL: test_extend64_vec4`

			`; CHECK-LIBCALL: callq __gnu_h2f_ieee`
			`; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee`
			`; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee`
			`; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee`
			`; CHECK-LIBCALL-DAG: cvtss2sd`
			`; CHECK-LIBCALL-DAG: cvtss2sd`
			`; CHECK-LIBCALL-DAG: cvtss2sd`
			`; CHECK-LIBCALL: cvtss2sd`
			`; CHECK-F16C: vcvtph2ps`
			`; CHECK-F16C-DAG: vcvtph2ps`
			`; CHECK-F16C-DAG: vcvtph2ps`
			`; CHECK-F16C-DAG: vcvtph2ps`
			`; CHECK-F16C-DAG: vcvtss2sd`
			`; CHECK-F16C-DAG: vcvtss2sd`
			`; CHECK-F16C-DAG: vcvtss2sd`
			`; CHECK-F16C: vcvtss2sd`
			`%a = load <4 x half>, <4 x half>* %p, align 8`
			`%b = fpext <4 x half> %a to <4 x double>`
			`ret <4 x double> %b`
			`}`

			`define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) {`
			`; CHECK-LABEL: test_trunc32_vec4:`

			`; CHECK-LIBCALL: callq __gnu_f2h_ieee`
			`; CHECK-LIBCALL: callq __gnu_f2h_ieee`
			`; CHECK-LIBCALL: callq __gnu_f2h_ieee`
			`; CHECK-LIBCALL: callq __gnu_f2h_ieee`
			`; CHECK-F16C: vcvtps2ph`
			`; CHECK-F16C: vcvtps2ph`
			`; CHECK-F16C: vcvtps2ph`
			`; CHECK-F16C: vcvtps2ph`
			`; CHECK: movw`
			`; CHECK: movw`
			`; CHECK: movw`
			`; CHECK: movw`
			`%v = fptrunc <4 x float> %a to <4 x half>`
			`store <4 x half> %v, <4 x half>* %p`
			`ret void`
			`}`

			`define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) {`
			`; CHECK-LABEL: test_trunc64_vec4:`
			`; CHECK: callq __truncdfhf2`
			`; CHECK: callq __truncdfhf2`
			`; CHECK: callq __truncdfhf2`
			`; CHECK: callq __truncdfhf2`
			`; CHECK: movw`
			`; CHECK: movw`
			`; CHECK: movw`
			`; CHECK: movw`
			`%v = fptrunc <4 x double> %a to <4 x half>`
			`store <4 x half> %v, <4 x half>* %p`
			`ret void`
			`}`

			`attributes #0 = { nounwind }`