[X86] Add 64-bit int to float/double conversion with AVX to X86FastISel::X86SelectSIToFP

Summary:
[X86] Teach fast isel to handle i64 sitofp with AVX.

For some reason we only handled i32 sitofp with AVX. But with SSE only we support i64 so we should do the same with AVX.

Also add i686 command lines for the 32-bit tests. 64-bit tests are in a separate file to avoid a fast-isel abort failure in 32-bit mode.

Reviewers: RKSimon, zvi

Reviewed By: RKSimon

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D39450

llvm-svn: 317102
This commit is contained in:
Craig Topper 2017-11-01 16:23:06 +00:00
parent 3d971e39f8
commit 5ae677e102
3 changed files with 184 additions and 5 deletions

View File

@ -2410,7 +2410,8 @@ bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
if (!Subtarget->hasAVX())
return false;
if (!I->getOperand(0)->getType()->isIntegerTy(32))
Type *InTy = I->getOperand(0)->getType();
if (!InTy->isIntegerTy(32) && !InTy->isIntegerTy(64))
return false;
// Select integer to float/double conversion.
@ -2423,11 +2424,11 @@ bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
if (I->getType()->isDoubleTy()) {
// sitofp int -> double
Opcode = X86::VCVTSI2SDrr;
Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI2SD64rr : X86::VCVTSI2SDrr;
RC = &X86::FR64RegClass;
} else if (I->getType()->isFloatTy()) {
// sitofp int -> float
Opcode = X86::VCVTSI2SSrr;
Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI2SS64rr : X86::VCVTSI2SSrr;
RC = &X86::FR32RegClass;
} else
return false;

View File

@ -0,0 +1,66 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
define double @long_to_double_rr(i64 %a) {
; SSE2-LABEL: long_to_double_rr:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: cvtsi2sdq %rdi, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: long_to_double_rr:
; AVX: # BB#0: # %entry
; AVX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%0 = sitofp i64 %a to double
ret double %0
}
define double @long_to_double_rm(i64* %a) {
; SSE2-LABEL: long_to_double_rm:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: long_to_double_rm:
; AVX: # BB#0: # %entry
; AVX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%0 = load i64, i64* %a
%1 = sitofp i64 %0 to double
ret double %1
}
define float @long_to_float_rr(i64 %a) {
; SSE2-LABEL: long_to_float_rr:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: cvtsi2ssq %rdi, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: long_to_float_rr:
; AVX: # BB#0: # %entry
; AVX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%0 = sitofp i64 %a to float
ret float %0
}
define float @long_to_float_rm(i64* %a) {
; SSE2-LABEL: long_to_float_rm:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: long_to_float_rm:
; AVX: # BB#0: # %entry
; AVX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%0 = load i64, i64* %a
%1 = sitofp i64 %0 to float
ret float %1
}

View File

@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=SSE2
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX
; RUN: llc -mtriple=i686-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=SSE2_X86
; RUN: llc -mtriple=i686-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=AVX_X86
define double @int_to_double_rr(i32 %a) {
@ -13,6 +15,39 @@ define double @int_to_double_rr(i32 %a) {
; AVX: # BB#0: # %entry
; AVX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0
; AVX-NEXT: retq
;
; SSE2_X86-LABEL: int_to_double_rr:
; SSE2_X86: # BB#0: # %entry
; SSE2_X86-NEXT: pushl %ebp
; SSE2_X86-NEXT: .cfi_def_cfa_offset 8
; SSE2_X86-NEXT: .cfi_offset %ebp, -8
; SSE2_X86-NEXT: movl %esp, %ebp
; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp
; SSE2_X86-NEXT: andl $-8, %esp
; SSE2_X86-NEXT: subl $8, %esp
; SSE2_X86-NEXT: movl 8(%ebp), %eax
; SSE2_X86-NEXT: cvtsi2sdl %eax, %xmm0
; SSE2_X86-NEXT: movsd %xmm0, (%esp)
; SSE2_X86-NEXT: fldl (%esp)
; SSE2_X86-NEXT: movl %ebp, %esp
; SSE2_X86-NEXT: popl %ebp
; SSE2_X86-NEXT: retl
;
; AVX_X86-LABEL: int_to_double_rr:
; AVX_X86: # BB#0: # %entry
; AVX_X86-NEXT: pushl %ebp
; AVX_X86-NEXT: .cfi_def_cfa_offset 8
; AVX_X86-NEXT: .cfi_offset %ebp, -8
; AVX_X86-NEXT: movl %esp, %ebp
; AVX_X86-NEXT: .cfi_def_cfa_register %ebp
; AVX_X86-NEXT: andl $-8, %esp
; AVX_X86-NEXT: subl $8, %esp
; AVX_X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0
; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
; AVX_X86-NEXT: fldl (%esp)
; AVX_X86-NEXT: movl %ebp, %esp
; AVX_X86-NEXT: popl %ebp
; AVX_X86-NEXT: retl
entry:
%0 = sitofp i32 %a to double
ret double %0
@ -28,6 +63,40 @@ define double @int_to_double_rm(i32* %a) {
; AVX: # BB#0: # %entry
; AVX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
;
; SSE2_X86-LABEL: int_to_double_rm:
; SSE2_X86: # BB#0: # %entry
; SSE2_X86-NEXT: pushl %ebp
; SSE2_X86-NEXT: .cfi_def_cfa_offset 8
; SSE2_X86-NEXT: .cfi_offset %ebp, -8
; SSE2_X86-NEXT: movl %esp, %ebp
; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp
; SSE2_X86-NEXT: andl $-8, %esp
; SSE2_X86-NEXT: subl $8, %esp
; SSE2_X86-NEXT: movl 8(%ebp), %eax
; SSE2_X86-NEXT: cvtsi2sdl (%eax), %xmm0
; SSE2_X86-NEXT: movsd %xmm0, (%esp)
; SSE2_X86-NEXT: fldl (%esp)
; SSE2_X86-NEXT: movl %ebp, %esp
; SSE2_X86-NEXT: popl %ebp
; SSE2_X86-NEXT: retl
;
; AVX_X86-LABEL: int_to_double_rm:
; AVX_X86: # BB#0: # %entry
; AVX_X86-NEXT: pushl %ebp
; AVX_X86-NEXT: .cfi_def_cfa_offset 8
; AVX_X86-NEXT: .cfi_offset %ebp, -8
; AVX_X86-NEXT: movl %esp, %ebp
; AVX_X86-NEXT: .cfi_def_cfa_register %ebp
; AVX_X86-NEXT: andl $-8, %esp
; AVX_X86-NEXT: subl $8, %esp
; AVX_X86-NEXT: movl 8(%ebp), %eax
; AVX_X86-NEXT: vcvtsi2sdl (%eax), %xmm0, %xmm0
; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
; AVX_X86-NEXT: fldl (%esp)
; AVX_X86-NEXT: movl %ebp, %esp
; AVX_X86-NEXT: popl %ebp
; AVX_X86-NEXT: retl
entry:
%0 = load i32, i32* %a
%1 = sitofp i32 %0 to double
@ -44,6 +113,27 @@ define float @int_to_float_rr(i32 %a) {
; AVX: # BB#0: # %entry
; AVX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0
; AVX-NEXT: retq
;
; SSE2_X86-LABEL: int_to_float_rr:
; SSE2_X86: # BB#0: # %entry
; SSE2_X86-NEXT: pushl %eax
; SSE2_X86-NEXT: .cfi_def_cfa_offset 8
; SSE2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE2_X86-NEXT: cvtsi2ssl %eax, %xmm0
; SSE2_X86-NEXT: movss %xmm0, (%esp)
; SSE2_X86-NEXT: flds (%esp)
; SSE2_X86-NEXT: popl %eax
; SSE2_X86-NEXT: retl
;
; AVX_X86-LABEL: int_to_float_rr:
; AVX_X86: # BB#0: # %entry
; AVX_X86-NEXT: pushl %eax
; AVX_X86-NEXT: .cfi_def_cfa_offset 8
; AVX_X86-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX_X86-NEXT: vmovss %xmm0, (%esp)
; AVX_X86-NEXT: flds (%esp)
; AVX_X86-NEXT: popl %eax
; AVX_X86-NEXT: retl
entry:
%0 = sitofp i32 %a to float
ret float %0
@ -59,6 +149,28 @@ define float @int_to_float_rm(i32* %a) {
; AVX: # BB#0: # %entry
; AVX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
;
; SSE2_X86-LABEL: int_to_float_rm:
; SSE2_X86: # BB#0: # %entry
; SSE2_X86-NEXT: pushl %eax
; SSE2_X86-NEXT: .cfi_def_cfa_offset 8
; SSE2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE2_X86-NEXT: cvtsi2ssl (%eax), %xmm0
; SSE2_X86-NEXT: movss %xmm0, (%esp)
; SSE2_X86-NEXT: flds (%esp)
; SSE2_X86-NEXT: popl %eax
; SSE2_X86-NEXT: retl
;
; AVX_X86-LABEL: int_to_float_rm:
; AVX_X86: # BB#0: # %entry
; AVX_X86-NEXT: pushl %eax
; AVX_X86-NEXT: .cfi_def_cfa_offset 8
; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX_X86-NEXT: vcvtsi2ssl (%eax), %xmm0, %xmm0
; AVX_X86-NEXT: vmovss %xmm0, (%esp)
; AVX_X86-NEXT: flds (%esp)
; AVX_X86-NEXT: popl %eax
; AVX_X86-NEXT: retl
entry:
%0 = load i32, i32* %a
%1 = sitofp i32 %0 to float