llvm-project/llvm/test/CodeGen/X86/fp-cvt.ll

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1093 lines
32 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-unknown | FileCheck %s --check-prefixes=X86,X86-X87
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64,X64-X87
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=X64,X64-SSSE3
;
; fptosi
;
define i16 @fptosi_i16_fp80(x86_fp80 %a0) nounwind {
; X86-LABEL: fptosi_i16_fp80:
; X86: # %bb.0:
; X86-NEXT: subl $8, %esp
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistps {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $8, %esp
; X86-NEXT: retl
;
; X64-X87-LABEL: fptosi_i16_fp80:
; X64-X87: # %bb.0:
; X64-X87-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistps -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: retq
;
; X64-SSSE3-LABEL: fptosi_i16_fp80:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: fisttps -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-SSSE3-NEXT: retq
%1 = fptosi x86_fp80 %a0 to i16
ret i16 %1
}
define i16 @fptosi_i16_fp80_ld(x86_fp80 *%a0) nounwind {
; X86-LABEL: fptosi_i16_fp80_ld:
; X86: # %bb.0:
; X86-NEXT: subl $8, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: fldt (%eax)
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistps {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $8, %esp
; X86-NEXT: retl
;
; X64-X87-LABEL: fptosi_i16_fp80_ld:
; X64-X87: # %bb.0:
; X64-X87-NEXT: fldt (%rdi)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistps -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: retq
;
; X64-SSSE3-LABEL: fptosi_i16_fp80_ld:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: fldt (%rdi)
; X64-SSSE3-NEXT: fisttps -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-SSSE3-NEXT: retq
%1 = load x86_fp80, x86_fp80 *%a0
%2 = fptosi x86_fp80 %1 to i16
ret i16 %2
}
define i32 @fptosi_i32_fp80(x86_fp80 %a0) nounwind {
; X86-LABEL: fptosi_i32_fp80:
; X86: # %bb.0:
; X86-NEXT: subl $8, %esp
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fnstcw (%esp)
; X86-NEXT: movzwl (%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistpl {{[0-9]+}}(%esp)
; X86-NEXT: fldcw (%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $8, %esp
; X86-NEXT: retl
;
; X64-X87-LABEL: fptosi_i32_fp80:
; X64-X87: # %bb.0:
; X64-X87-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistpl -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: retq
;
; X64-SSSE3-LABEL: fptosi_i32_fp80:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: fisttpl -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-SSSE3-NEXT: retq
%1 = fptosi x86_fp80 %a0 to i32
ret i32 %1
}
define i32 @fptosi_i32_fp80_ld(x86_fp80 *%a0) nounwind {
; X86-LABEL: fptosi_i32_fp80_ld:
; X86: # %bb.0:
; X86-NEXT: subl $8, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: fldt (%eax)
; X86-NEXT: fnstcw (%esp)
; X86-NEXT: movzwl (%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistpl {{[0-9]+}}(%esp)
; X86-NEXT: fldcw (%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $8, %esp
; X86-NEXT: retl
;
; X64-X87-LABEL: fptosi_i32_fp80_ld:
; X64-X87: # %bb.0:
; X64-X87-NEXT: fldt (%rdi)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistpl -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: retq
;
; X64-SSSE3-LABEL: fptosi_i32_fp80_ld:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: fldt (%rdi)
; X64-SSSE3-NEXT: fisttpl -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-SSSE3-NEXT: retq
%1 = load x86_fp80, x86_fp80 *%a0
%2 = fptosi x86_fp80 %1 to i32
ret i32 %2
}
define i64 @fptosi_i64_fp80(x86_fp80 %a0) nounwind {
; X86-LABEL: fptosi_i64_fp80:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: fldt 8(%ebp)
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-X87-LABEL: fptosi_i64_fp80:
; X64-X87: # %bb.0:
; X64-X87-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; X64-X87-NEXT: retq
;
; X64-SSSE3-LABEL: fptosi_i64_fp80:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; X64-SSSE3-NEXT: retq
%1 = fptosi x86_fp80 %a0 to i64
ret i64 %1
}
define i64 @fptosi_i64_fp80_ld(x86_fp80 *%a0) nounwind {
; X86-LABEL: fptosi_i64_fp80_ld:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: fldt (%eax)
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-X87-LABEL: fptosi_i64_fp80_ld:
; X64-X87: # %bb.0:
; X64-X87-NEXT: fldt (%rdi)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; X64-X87-NEXT: retq
;
; X64-SSSE3-LABEL: fptosi_i64_fp80_ld:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: fldt (%rdi)
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; X64-SSSE3-NEXT: retq
%1 = load x86_fp80, x86_fp80 *%a0
%2 = fptosi x86_fp80 %1 to i64
ret i64 %2
}
;
; fptoui
;
define i16 @fptoui_i16_fp80(x86_fp80 %a0) nounwind {
; X86-LABEL: fptoui_i16_fp80:
; X86: # %bb.0:
; X86-NEXT: subl $8, %esp
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fnstcw (%esp)
; X86-NEXT: movzwl (%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistpl {{[0-9]+}}(%esp)
; X86-NEXT: fldcw (%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: addl $8, %esp
; X86-NEXT: retl
;
; X64-X87-LABEL: fptoui_i16_fp80:
; X64-X87: # %bb.0:
; X64-X87-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistpl -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: # kill: def $ax killed $ax killed $eax
; X64-X87-NEXT: retq
;
; X64-SSSE3-LABEL: fptoui_i16_fp80:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: fisttpl -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
; X64-SSSE3-NEXT: retq
%1 = fptoui x86_fp80 %a0 to i16
ret i16 %1
}
define i16 @fptoui_i16_fp80_ld(x86_fp80 *%a0) nounwind {
; X86-LABEL: fptoui_i16_fp80_ld:
; X86: # %bb.0:
; X86-NEXT: subl $8, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: fldt (%eax)
; X86-NEXT: fnstcw (%esp)
; X86-NEXT: movzwl (%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistpl {{[0-9]+}}(%esp)
; X86-NEXT: fldcw (%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: addl $8, %esp
; X86-NEXT: retl
;
; X64-X87-LABEL: fptoui_i16_fp80_ld:
; X64-X87: # %bb.0:
; X64-X87-NEXT: fldt (%rdi)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistpl -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: # kill: def $ax killed $ax killed $eax
; X64-X87-NEXT: retq
;
; X64-SSSE3-LABEL: fptoui_i16_fp80_ld:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: fldt (%rdi)
; X64-SSSE3-NEXT: fisttpl -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
; X64-SSSE3-NEXT: retq
%1 = load x86_fp80, x86_fp80 *%a0
%2 = fptoui x86_fp80 %1 to i16
ret i16 %2
}
define i32 @fptoui_i32_fp80(x86_fp80 %a0) nounwind {
; X86-LABEL: fptoui_i32_fp80:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: fldt 8(%ebp)
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-X87-LABEL: fptoui_i32_fp80:
; X64-X87: # %bb.0:
; X64-X87-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: retq
;
; X64-SSSE3-LABEL: fptoui_i32_fp80:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-SSSE3-NEXT: retq
%1 = fptoui x86_fp80 %a0 to i32
ret i32 %1
}
define i32 @fptoui_i32_fp80_ld(x86_fp80 *%a0) nounwind {
; X86-LABEL: fptoui_i32_fp80_ld:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: fldt (%eax)
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: orl $3072, %eax # imm = 0xC00
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-X87-LABEL: fptoui_i32_fp80_ld:
; X64-X87: # %bb.0:
; X64-X87-NEXT: fldt (%rdi)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: orl $3072, %eax # imm = 0xC00
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: retq
;
; X64-SSSE3-LABEL: fptoui_i32_fp80_ld:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: fldt (%rdi)
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-SSSE3-NEXT: retq
%1 = load x86_fp80, x86_fp80 *%a0
%2 = fptoui x86_fp80 %1 to i32
ret i32 %2
}
define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
; X86-LABEL: fptoui_i64_fp80:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: fldt 8(%ebp)
; X86-NEXT: flds {{\.LCPI.*}}
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-NEXT: fucom %st(1)
; X86-NEXT: fnstsw %ax
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: # kill: def $ah killed $ah killed $ax
; X86-NEXT: sahf
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-NEXT: setbe %al
; X86-NEXT: fldz
; X86-NEXT: ja .LBB10_2
; X86-NEXT: # %bb.1:
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-NEXT: fstp %st(0)
; X86-NEXT: fldz
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-NEXT: fxch %st(1)
; X86-NEXT: .LBB10_2:
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-NEXT: fstp %st(1)
; X86-NEXT: fsubrp %st, %st(1)
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl $3072, %ecx # imm = 0xC00
; X86-NEXT: movw %cx, {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-NEXT: movb %al, %dl
; X86-NEXT: shll $31, %edx
; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-X87-LABEL: fptoui_i64_fp80:
; X64-X87: # %bb.0:
; X64-X87-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-X87-NEXT: flds {{.*}}(%rip)
; X64-X87-NEXT: xorl %eax, %eax
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X64-X87-NEXT: fucomi %st(1), %st
; X64-X87-NEXT: setbe %al
; X64-X87-NEXT: fldz
; X64-X87-NEXT: fxch %st(1)
; X64-X87-NEXT: fcmovnbe %st(1), %st
; X64-X87-NEXT: fstp %st(1)
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X64-X87-NEXT: fsubrp %st, %st(1)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
; X64-X87-NEXT: orl $3072, %ecx # imm = 0xC00
; X64-X87-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: shlq $63, %rax
; X64-X87-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
; X64-X87-NEXT: retq
;
; X64-SSSE3-LABEL: fptoui_i64_fp80:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: flds {{.*}}(%rip)
; X64-SSSE3-NEXT: xorl %eax, %eax
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X64-SSSE3-NEXT: fucomi %st(1), %st
; X64-SSSE3-NEXT: fldz
; X64-SSSE3-NEXT: fxch %st(1)
; X64-SSSE3-NEXT: fcmovnbe %st(1), %st
; X64-SSSE3-NEXT: fstp %st(1)
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X64-SSSE3-NEXT: fsubrp %st, %st(1)
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: setbe %al
; X64-SSSE3-NEXT: shlq $63, %rax
; X64-SSSE3-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
; X64-SSSE3-NEXT: retq
%1 = fptoui x86_fp80 %a0 to i64
ret i64 %1
}
define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
; X86-LABEL: fptoui_i64_fp80_ld:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: fldt (%eax)
; X86-NEXT: flds {{\.LCPI.*}}
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-NEXT: fucom %st(1)
; X86-NEXT: fnstsw %ax
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: # kill: def $ah killed $ah killed $ax
; X86-NEXT: sahf
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-NEXT: setbe %al
; X86-NEXT: fldz
; X86-NEXT: ja .LBB11_2
; X86-NEXT: # %bb.1:
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-NEXT: fstp %st(0)
; X86-NEXT: fldz
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-NEXT: fxch %st(1)
; X86-NEXT: .LBB11_2:
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-NEXT: fstp %st(1)
; X86-NEXT: fsubrp %st, %st(1)
; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: orl $3072, %ecx # imm = 0xC00
; X86-NEXT: movw %cx, {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-NEXT: fldcw {{[0-9]+}}(%esp)
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X86-NEXT: movb %al, %dl
; X86-NEXT: shll $31, %edx
; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-X87-LABEL: fptoui_i64_fp80_ld:
; X64-X87: # %bb.0:
; X64-X87-NEXT: fldt (%rdi)
; X64-X87-NEXT: flds {{.*}}(%rip)
; X64-X87-NEXT: xorl %eax, %eax
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X64-X87-NEXT: fucomi %st(1), %st
; X64-X87-NEXT: setbe %al
; X64-X87-NEXT: fldz
; X64-X87-NEXT: fxch %st(1)
; X64-X87-NEXT: fcmovnbe %st(1), %st
; X64-X87-NEXT: fstp %st(1)
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X64-X87-NEXT: fsubrp %st, %st(1)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
; X64-X87-NEXT: orl $3072, %ecx # imm = 0xC00
; X64-X87-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: shlq $63, %rax
; X64-X87-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
; X64-X87-NEXT: retq
;
; X64-SSSE3-LABEL: fptoui_i64_fp80_ld:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: fldt (%rdi)
; X64-SSSE3-NEXT: flds {{.*}}(%rip)
; X64-SSSE3-NEXT: xorl %eax, %eax
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X64-SSSE3-NEXT: fucomi %st(1), %st
; X64-SSSE3-NEXT: fldz
; X64-SSSE3-NEXT: fxch %st(1)
; X64-SSSE3-NEXT: fcmovnbe %st(1), %st
; X64-SSSE3-NEXT: fstp %st(1)
[TargetLowering] Fix another potential FPE in expandFP_TO_UINT D53794 introduced code to perform the FP_TO_UINT expansion via FP_TO_SINT in a way that would never expose floating-point exceptions in the intermediate steps. Unfortunately, I just noticed there is still a way this can happen. As discussed in D53794, the compiler now generates this sequence: // Sel = Src < 0x8000000000000000 // Val = select Sel, Src, Src - 0x8000000000000000 // Ofs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val) ^ Ofs The problem is with the Src - 0x8000000000000000 expression. As I mentioned in the original review, that expression can never overflow or underflow if the original value is in range for FP_TO_UINT. But I missed that we can get an Inexact exception in the case where Src is a very small positive value. (In this case the result of the sub is ignored, but that doesn't help.) Instead, I'd suggest to use the following sequence: // Sel = Src < 0x8000000000000000 // FltOfs = select Sel, 0, 0x8000000000000000 // IntOfs = select Sel, 0, 0x8000000000000000 // Result = fp_to_sint(Val - FltOfs) ^ IntOfs In the case where the value is already in range of FP_TO_SINT, we now simply compute Val - 0, which now definitely cannot trap (unless Val is a NaN in which case we'd want to trap anyway). In the case where the value is not in range of FP_TO_SINT, but still in range of FP_TO_UINT, the sub can never be inexact, as Val is between 2^(n-1) and (2^n)-1, i.e. always has the 2^(n-1) bit set, and the sub is always simply clearing that bit. There is a slight complication in the case where Val is a constant, so we know at compile time whether Sel is true or false. In that scenario, the old code would automatically optimize the sub away, while this no longer happens with the new code. Instead, I've added extra code to check for this case and then just fall back to FP_TO_SINT directly. (This seems to catch even slightly more cases.) Original version of the patch by Ulrich Weigand. X86 changes added by Craig Topper Differential Revision: https://reviews.llvm.org/D67105
2019-12-07 06:11:04 +08:00
; X64-SSSE3-NEXT: fsubrp %st, %st(1)
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: setbe %al
; X64-SSSE3-NEXT: shlq $63, %rax
; X64-SSSE3-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
; X64-SSSE3-NEXT: retq
%1 = load x86_fp80, x86_fp80 *%a0
%2 = fptoui x86_fp80 %1 to i64
ret i64 %2
}
;
; sitofp
;
define x86_fp80 @sitofp_fp80_i16(i16 %a0) nounwind {
; X86-LABEL: sitofp_fp80_i16:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: filds {{[0-9]+}}(%esp)
; X86-NEXT: popl %eax
; X86-NEXT: retl
;
; X64-LABEL: sitofp_fp80_i16:
; X64: # %bb.0:
; X64-NEXT: movw %di, -{{[0-9]+}}(%rsp)
; X64-NEXT: filds -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
%1 = sitofp i16 %a0 to x86_fp80
ret x86_fp80 %1
}
define x86_fp80 @sitofp_fp80_i16_ld(i16 *%a0) nounwind {
; X86-LABEL: sitofp_fp80_i16_ld:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
; X86-NEXT: filds {{[0-9]+}}(%esp)
; X86-NEXT: popl %eax
; X86-NEXT: retl
;
; X64-LABEL: sitofp_fp80_i16_ld:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-NEXT: filds -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
%1 = load i16, i16 *%a0
%2 = sitofp i16 %1 to x86_fp80
ret x86_fp80 %2
}
define x86_fp80 @sitofp_fp80_i32(i32 %a0) nounwind {
; X86-LABEL: sitofp_fp80_i32:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: fildl (%esp)
; X86-NEXT: popl %eax
; X86-NEXT: retl
;
; X64-LABEL: sitofp_fp80_i32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
; X64-NEXT: fildl -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
%1 = sitofp i32 %a0 to x86_fp80
ret x86_fp80 %1
}
define x86_fp80 @sitofp_fp80_i32_ld(i32 *%a0) nounwind {
; X86-LABEL: sitofp_fp80_i32_ld:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: fildl (%esp)
; X86-NEXT: popl %eax
; X86-NEXT: retl
;
; X64-LABEL: sitofp_fp80_i32_ld:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fildl -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
%1 = load i32, i32 *%a0
%2 = sitofp i32 %1 to x86_fp80
ret x86_fp80 %2
}
define x86_fp80 @sitofp_fp80_i64(i64 %a0) nounwind {
; X86-LABEL: sitofp_fp80_i64:
; X86: # %bb.0:
; X86-NEXT: fildll {{[0-9]+}}(%esp)
; X86-NEXT: retl
;
; X64-LABEL: sitofp_fp80_i64:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
; X64-NEXT: fildll -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
%1 = sitofp i64 %a0 to x86_fp80
ret x86_fp80 %1
}
define x86_fp80 @sitofp_fp80_i64_ld(i64 *%a0) nounwind {
; X86-LABEL: sitofp_fp80_i64_ld:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: fildll (%eax)
; X86-NEXT: retl
;
; X64-LABEL: sitofp_fp80_i64_ld:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fildll -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
%1 = load i64, i64 *%a0
%2 = sitofp i64 %1 to x86_fp80
ret x86_fp80 %2
}
;
; uitofp
;
define x86_fp80 @uitofp_fp80_i16(i16 %a0) nounwind {
; X86-LABEL: uitofp_fp80_i16:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: fildl (%esp)
; X86-NEXT: popl %eax
; X86-NEXT: retl
;
; X64-LABEL: uitofp_fp80_i16:
; X64: # %bb.0:
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fildl -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
%1 = uitofp i16 %a0 to x86_fp80
ret x86_fp80 %1
}
define x86_fp80 @uitofp_fp80_i16_ld(i16 *%a0) nounwind {
; X86-LABEL: uitofp_fp80_i16_ld:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: fildl (%esp)
; X86-NEXT: popl %eax
; X86-NEXT: retl
;
; X64-LABEL: uitofp_fp80_i16_ld:
; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fildl -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
%1 = load i16, i16 *%a0
%2 = uitofp i16 %1 to x86_fp80
ret x86_fp80 %2
}
define x86_fp80 @uitofp_fp80_i32(i32 %a0) nounwind {
; X86-LABEL: uitofp_fp80_i32:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: fildll (%esp)
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: uitofp_fp80_i32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fildll -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
%1 = uitofp i32 %a0 to x86_fp80
ret x86_fp80 %1
}
define x86_fp80 @uitofp_fp80_i32_ld(i32 *%a0) nounwind {
; X86-LABEL: uitofp_fp80_i32_ld:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: fildll (%esp)
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: uitofp_fp80_i32_ld:
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; X64-NEXT: fildll -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
%1 = load i32, i32 *%a0
%2 = uitofp i32 %1 to x86_fp80
ret x86_fp80 %2
}
define x86_fp80 @uitofp_fp80_i64(i64 %a0) nounwind {
; X86-LABEL: uitofp_fp80_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: shrl $31, %ecx
; X86-NEXT: fildll (%esp)
; X86-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: uitofp_fp80_i64:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testq %rdi, %rdi
; X64-NEXT: sets %al
; X64-NEXT: fildll -{{[0-9]+}}(%rsp)
; X64-NEXT: fadds {{\.LCPI.*}}(,%rax,4)
; X64-NEXT: retq
%1 = uitofp i64 %a0 to x86_fp80
ret x86_fp80 %1
}
define x86_fp80 @uitofp_fp80_i64_ld(i64 *%a0) nounwind {
; X86-LABEL: uitofp_fp80_i64_ld:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl (%eax), %ecx
; X86-NEXT: movl 4(%eax), %eax
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, (%esp)
; X86-NEXT: shrl $31, %eax
; X86-NEXT: fildll (%esp)
; X86-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: uitofp_fp80_i64_ld:
; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: testq %rax, %rax
; X64-NEXT: sets %cl
; X64-NEXT: fildll -{{[0-9]+}}(%rsp)
; X64-NEXT: fadds {{\.LCPI.*}}(,%rcx,4)
; X64-NEXT: retq
%1 = load i64, i64 *%a0
%2 = uitofp i64 %1 to x86_fp80
ret x86_fp80 %2
}
;
; floor
;
define x86_fp80 @floor_fp80(x86_fp80 %a0) nounwind {
; X86-LABEL: floor_fp80:
; X86: # %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fstpt (%esp)
; X86-NEXT: calll floorl
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
; X64-LABEL: floor_fp80:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: callq floorl
; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq
%1 = call x86_fp80 @llvm.floor.f80(x86_fp80 %a0)
ret x86_fp80 %1
}
define x86_fp80 @floor_fp80_ld(x86_fp80 *%a0) nounwind {
; X86-LABEL: floor_fp80_ld:
; X86: # %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: fldt (%eax)
; X86-NEXT: fstpt (%esp)
; X86-NEXT: calll floorl
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
; X64-LABEL: floor_fp80_ld:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: fldt (%rdi)
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: callq floorl
; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq
%1 = load x86_fp80, x86_fp80 *%a0
%2 = call x86_fp80 @llvm.floor.f80(x86_fp80 %1)
ret x86_fp80 %2
}
declare x86_fp80 @llvm.floor.f80(x86_fp80 %p)
;
; ceil
;
define x86_fp80 @ceil_fp80(x86_fp80 %a0) nounwind {
; X86-LABEL: ceil_fp80:
; X86: # %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fstpt (%esp)
; X86-NEXT: calll ceill
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
; X64-LABEL: ceil_fp80:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: callq ceill
; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq
%1 = call x86_fp80 @llvm.ceil.f80(x86_fp80 %a0)
ret x86_fp80 %1
}
define x86_fp80 @ceil_fp80_ld(x86_fp80 *%a0) nounwind {
; X86-LABEL: ceil_fp80_ld:
; X86: # %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: fldt (%eax)
; X86-NEXT: fstpt (%esp)
; X86-NEXT: calll ceill
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
; X64-LABEL: ceil_fp80_ld:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: fldt (%rdi)
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: callq ceill
; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq
%1 = load x86_fp80, x86_fp80 *%a0
%2 = call x86_fp80 @llvm.ceil.f80(x86_fp80 %1)
ret x86_fp80 %2
}
declare x86_fp80 @llvm.ceil.f80(x86_fp80 %p)
;
; trunc
;
define x86_fp80 @trunc_fp80(x86_fp80 %a0) nounwind {
; X86-LABEL: trunc_fp80:
; X86: # %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fstpt (%esp)
; X86-NEXT: calll truncl
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
; X64-LABEL: trunc_fp80:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: callq truncl
; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq
%1 = call x86_fp80 @llvm.trunc.f80(x86_fp80 %a0)
ret x86_fp80 %1
}
define x86_fp80 @trunc_fp80_ld(x86_fp80 *%a0) nounwind {
; X86-LABEL: trunc_fp80_ld:
; X86: # %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: fldt (%eax)
; X86-NEXT: fstpt (%esp)
; X86-NEXT: calll truncl
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
; X64-LABEL: trunc_fp80_ld:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: fldt (%rdi)
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: callq truncl
; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq
%1 = load x86_fp80, x86_fp80 *%a0
%2 = call x86_fp80 @llvm.trunc.f80(x86_fp80 %1)
ret x86_fp80 %2
}
declare x86_fp80 @llvm.trunc.f80(x86_fp80 %p)
;
; rint
;
define x86_fp80 @rint_fp80(x86_fp80 %a0) nounwind {
; X86-LABEL: rint_fp80:
; X86: # %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: fldt {{[0-9]+}}(%esp)
; X86-NEXT: fstpt (%esp)
; X86-NEXT: calll rintl
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
; X64-LABEL: rint_fp80:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: callq rintl
; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq
%1 = call x86_fp80 @llvm.rint.f80(x86_fp80 %a0)
ret x86_fp80 %1
}
define x86_fp80 @rint_fp80_ld(x86_fp80 *%a0) nounwind {
; X86-LABEL: rint_fp80_ld:
; X86: # %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: fldt (%eax)
; X86-NEXT: fstpt (%esp)
; X86-NEXT: calll rintl
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
;
; X64-LABEL: rint_fp80_ld:
; X64: # %bb.0:
; X64-NEXT: subq $24, %rsp
; X64-NEXT: fldt (%rdi)
; X64-NEXT: fstpt (%rsp)
; X64-NEXT: callq rintl
; X64-NEXT: addq $24, %rsp
; X64-NEXT: retq
%1 = load x86_fp80, x86_fp80 *%a0
%2 = call x86_fp80 @llvm.rint.f80(x86_fp80 %1)
ret x86_fp80 %2
}
declare x86_fp80 @llvm.rint.f80(x86_fp80 %p)