[X86] Add some load folding patterns for cvtsi2ss/sd into intrinsic instructions.

llvm-svn: 332189
This commit is contained in:
Craig Topper 2018-05-13 01:54:33 +00:00
parent 28b85caea8
commit 38b713d4a7
3 changed files with 61 additions and 1 deletions

View File

@ -6936,20 +6936,40 @@ def : Pat<(v4f32 (X86Movss
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))), (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
(VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>; (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst), (v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
(VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>; (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst), (v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
(VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>; (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst), (v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
(VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>; (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
} // Predicates = [HasAVX512] } // Predicates = [HasAVX512]
// Convert float/double to signed/unsigned int 32/64 with truncation // Convert float/double to signed/unsigned int 32/64 with truncation

View File

@ -1408,20 +1408,40 @@ def : Pat<(v4f32 (X86Movss
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))), (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
(VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>; (VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SSrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst), (v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
(VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>; (VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SSrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst), (v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
(VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>; (VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SDrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst), (v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
(VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>; (VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SDrm_Int VR128:$dst, addr:$src)>;
} // Predicates = [UseAVX] } // Predicates = [UseAVX]
let Predicates = [UseSSE2] in { let Predicates = [UseSSE2] in {
@ -1442,10 +1462,20 @@ def : Pat<(v2f64 (X86Movsd
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
(CVTSI642SDrr_Int VR128:$dst, GR64:$src)>; (CVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
(CVTSI642SDrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst), (v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))), (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
(CVTSI2SDrr_Int VR128:$dst, GR32:$src)>; (CVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
(v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
(CVTSI2SDrm_Int VR128:$dst, addr:$src)>;
} // Predicates = [UseSSE2] } // Predicates = [UseSSE2]
let Predicates = [UseSSE1] in { let Predicates = [UseSSE1] in {
@ -1454,10 +1484,20 @@ def : Pat<(v4f32 (X86Movss
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))), (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
(CVTSI642SSrr_Int VR128:$dst, GR64:$src)>; (CVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
(CVTSI642SSrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst), (v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))), (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
(CVTSI2SSrr_Int VR128:$dst, GR32:$src)>; (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
(v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
(CVTSI2SSrm_Int VR128:$dst, addr:$src)>;
} // Predicates = [UseSSE1] } // Predicates = [UseSSE1]
let Predicates = [HasAVX, NoVLX] in { let Predicates = [HasAVX, NoVLX] in {

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s ; RUN: llc < %s -disable-peephole -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
; CHECK-LABEL: test_x86_sse2_psll_dq_bs: ; CHECK-LABEL: test_x86_sse2_psll_dq_bs: