[X86] Add some load folding patterns for cvtsi2ss/sd into intrinsic instructions.

llvm-svn: 332189
This commit is contained in:
Craig Topper 2018-05-13 01:54:33 +00:00
parent 28b85caea8
commit 38b713d4a7
3 changed files with 61 additions and 1 deletions

View File

@ -6936,20 +6936,40 @@ def : Pat<(v4f32 (X86Movss
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
(VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
(VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
(VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
(VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
} // Predicates = [HasAVX512]
// Convert float/double to signed/unsigned int 32/64 with truncation

View File

@ -1408,20 +1408,40 @@ def : Pat<(v4f32 (X86Movss
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
(VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SSrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
(VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SSrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
(VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SDrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
(VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SDrm_Int VR128:$dst, addr:$src)>;
} // Predicates = [UseAVX]
let Predicates = [UseSSE2] in {
@ -1442,10 +1462,20 @@ def : Pat<(v2f64 (X86Movsd
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
(CVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
(CVTSI642SDrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
(CVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
(CVTSI2SDrm_Int VR128:$dst, addr:$src)>;
} // Predicates = [UseSSE2]
let Predicates = [UseSSE1] in {
@ -1454,10 +1484,20 @@ def : Pat<(v4f32 (X86Movss
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
(CVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
(CVTSI642SSrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
(CVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
(CVTSI2SSrm_Int VR128:$dst, addr:$src)>;
} // Predicates = [UseSSE1]
let Predicates = [HasAVX, NoVLX] in {

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s
; RUN: llc < %s -disable-peephole -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
; CHECK-LABEL: test_x86_sse2_psll_dq_bs: