From bbf2ab990f91a629ffa0d2b2fcc54398e522f36f Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Thu, 16 Jun 2011 07:03:21 +0000 Subject: [PATCH] Add AVX suport for fpextend. Original patch by Syoyo Fujita with more comments by me. llvm-svn: 133153 --- llvm/lib/Target/X86/X86InstrSSE.td | 19 +++++++++++++++++++ llvm/test/CodeGen/X86/avx-128.ll | 10 ++++++++++ 2 files changed, 29 insertions(+) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index a38e3721f350..7774057d3da8 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2063,6 +2063,15 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; +// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while +// in the non-AVX version bits 127:64 aren't touched. Find a better way to +// represent this instead of always zeroing SRC1. One possible solution is +// to represent the instruction w/ something similar as the "$src1 = $dst" +// constraint but without the tied operands. +def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), addr:$src)>, + Requires<[HasAVX, OptForSpeed]>; + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Load/Store XCSR register //===----------------------------------------------------------------------===// @@ -3589,6 +3598,16 @@ let Predicates = [HasSSE2] in def : Pat<(fextend (loadf32 addr:$src)), (CVTSS2SDrm addr:$src)>; +// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while +// in the non-AVX version bits 127:64 aren't touched. Find a better way to +// represent this instead of always zeroing SRC1. One possible solution is +// to represent the instruction w/ something similar as the "$src1 = $dst" +// constraint but without the tied operands. +let Predicates = [HasAVX] in + def : Pat<(fextend (loadf32 addr:$src)), + (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), + addr:$src)>; + // bit_convert let Predicates = [HasXMMInt] in { def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; diff --git a/llvm/test/CodeGen/X86/avx-128.ll b/llvm/test/CodeGen/X86/avx-128.ll index 2bd3b5dfedd6..c29cb5d36c33 100644 --- a/llvm/test/CodeGen/X86/avx-128.ll +++ b/llvm/test/CodeGen/X86/avx-128.ll @@ -10,3 +10,13 @@ entry: ret void } +define void @fpext() nounwind uwtable { +entry: + %f = alloca float, align 4 + %d = alloca double, align 8 + %tmp = load float* %f, align 4 + ; CHECK: vcvtss2sd + %conv = fpext float %tmp to double + store double %conv, double* %d, align 8 + ret void +}