Enhance PR11334 fix to support extload from v2f32/v4f32

- Fix an remaining issue of PR11674 as well

llvm-svn: 163528
This commit is contained in:
Michael Liao 2012-09-10 18:33:51 +00:00
parent 256ea4e4f3
commit 400f7ef871
4 changed files with 43 additions and 11 deletions

View File

@ -932,6 +932,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
}
if (Subtarget->hasSSE41()) {
@ -1043,6 +1045,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal);
setOperationAction(ISD::SRL, MVT::v16i16, Custom);
setOperationAction(ISD::SRL, MVT::v32i8, Custom);

View File

@ -240,6 +240,10 @@ def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
// 128-/256-bit extload pattern fragments
def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
// Like 'store', but always requires 128-bit vector alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{

View File

@ -2007,10 +2007,10 @@ def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB, VEX;
let neverHasSideEffects = 1, mayLoad = 1 in
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, TB, VEX;
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
IIC_SSE_CVT_PD_RM>, TB, VEX;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
@ -2028,10 +2028,10 @@ def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
IIC_SSE_CVT_PD_RR>, TB;
let neverHasSideEffects = 1, mayLoad = 1 in
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_CVT_PD_RM>, TB;
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
IIC_SSE_CVT_PD_RM>, TB;
}
// Convert Packed DW Integers to Packed Double FP
@ -2134,7 +2134,7 @@ let Predicates = [HasAVX] in {
(VCVTPS2PDrr VR128:$src)>;
def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
(VCVTPS2PDYrr VR128:$src)>;
def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
def : Pat<(v4f64 (extloadv4f32 addr:$src)),
(VCVTPS2PDYrm addr:$src)>;
}

View File

@ -1,14 +1,38 @@
; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck --check-prefix=AVX %s
; PR11674
define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
entry:
; TODO: We should be able to generate cvtps2pd for the load.
; For now, just check that we generate something sane.
; CHECK: cvtss2sd
; CHECK: cvtss2sd
; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
; AVX: vcvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
%0 = load <2 x float>* %in, align 8
%1 = fpext <2 x float> %0 to <2 x double>
store <2 x double> %1, <2 x double>* %out, align 1
ret void
}
define void @fpext_frommem4(<4 x float>* %in, <4 x double>* %out) {
entry:
; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
%0 = load <4 x float>* %in
%1 = fpext <4 x float> %0 to <4 x double>
store <4 x double> %1, <4 x double>* %out, align 1
ret void
}
define void @fpext_frommem8(<8 x float>* %in, <8 x double>* %out) {
entry:
; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
; CHECK: cvtps2pd 16(%{{.+}}), %xmm{{[0-9]+}}
; CHECK: cvtps2pd 24(%{{.+}}), %xmm{{[0-9]+}}
; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
; AVX: vcvtps2pd 16(%{{.+}}), %ymm{{[0-9]+}}
%0 = load <8 x float>* %in
%1 = fpext <8 x float> %0 to <8 x double>
store <8 x double> %1, <8 x double>* %out, align 1
ret void
}