forked from OSchip/llvm-project
Enhance PR11334 fix to support extload from v2f32/v4f32
- Fix an remaining issue of PR11674 as well llvm-svn: 163528
This commit is contained in:
parent
256ea4e4f3
commit
400f7ef871
|
@ -932,6 +932,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
|
||||
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
|
||||
}
|
||||
|
||||
if (Subtarget->hasSSE41()) {
|
||||
|
@ -1043,6 +1045,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
|
||||
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, Legal);
|
||||
|
||||
setOperationAction(ISD::SRL, MVT::v16i16, Custom);
|
||||
setOperationAction(ISD::SRL, MVT::v32i8, Custom);
|
||||
|
||||
|
|
|
@ -240,6 +240,10 @@ def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
|
|||
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
|
||||
def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
|
||||
|
||||
// 128-/256-bit extload pattern fragments
|
||||
def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
|
||||
def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
|
||||
|
||||
// Like 'store', but always requires 128-bit vector alignment.
|
||||
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
|
||||
(store node:$val, node:$ptr), [{
|
||||
|
|
|
@ -2007,10 +2007,10 @@ def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
|||
"vcvtps2pd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
|
||||
IIC_SSE_CVT_PD_RR>, TB, VEX;
|
||||
let neverHasSideEffects = 1, mayLoad = 1 in
|
||||
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
||||
"vcvtps2pd\t{$src, $dst|$dst, $src}", [],
|
||||
IIC_SSE_CVT_PD_RM>, TB, VEX;
|
||||
"vcvtps2pd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
|
||||
IIC_SSE_CVT_PD_RM>, TB, VEX;
|
||||
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
|
||||
"vcvtps2pd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR256:$dst,
|
||||
|
@ -2028,10 +2028,10 @@ def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
|||
"cvtps2pd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
|
||||
IIC_SSE_CVT_PD_RR>, TB;
|
||||
let neverHasSideEffects = 1, mayLoad = 1 in
|
||||
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
||||
"cvtps2pd\t{$src, $dst|$dst, $src}", [],
|
||||
IIC_SSE_CVT_PD_RM>, TB;
|
||||
"cvtps2pd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
|
||||
IIC_SSE_CVT_PD_RM>, TB;
|
||||
}
|
||||
|
||||
// Convert Packed DW Integers to Packed Double FP
|
||||
|
@ -2134,7 +2134,7 @@ let Predicates = [HasAVX] in {
|
|||
(VCVTPS2PDrr VR128:$src)>;
|
||||
def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
|
||||
(VCVTPS2PDYrr VR128:$src)>;
|
||||
def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
|
||||
def : Pat<(v4f64 (extloadv4f32 addr:$src)),
|
||||
(VCVTPS2PDYrm addr:$src)>;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,14 +1,38 @@
|
|||
; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
|
||||
; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck --check-prefix=AVX %s
|
||||
|
||||
; PR11674
|
||||
define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
|
||||
entry:
|
||||
; TODO: We should be able to generate cvtps2pd for the load.
|
||||
; For now, just check that we generate something sane.
|
||||
; CHECK: cvtss2sd
|
||||
; CHECK: cvtss2sd
|
||||
; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
|
||||
; AVX: vcvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
|
||||
%0 = load <2 x float>* %in, align 8
|
||||
%1 = fpext <2 x float> %0 to <2 x double>
|
||||
store <2 x double> %1, <2 x double>* %out, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fpext_frommem4(<4 x float>* %in, <4 x double>* %out) {
|
||||
entry:
|
||||
; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
|
||||
; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
|
||||
; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
|
||||
%0 = load <4 x float>* %in
|
||||
%1 = fpext <4 x float> %0 to <4 x double>
|
||||
store <4 x double> %1, <4 x double>* %out, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fpext_frommem8(<8 x float>* %in, <8 x double>* %out) {
|
||||
entry:
|
||||
; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
|
||||
; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
|
||||
; CHECK: cvtps2pd 16(%{{.+}}), %xmm{{[0-9]+}}
|
||||
; CHECK: cvtps2pd 24(%{{.+}}), %xmm{{[0-9]+}}
|
||||
; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
|
||||
; AVX: vcvtps2pd 16(%{{.+}}), %ymm{{[0-9]+}}
|
||||
%0 = load <8 x float>* %in
|
||||
%1 = fpext <8 x float> %0 to <8 x double>
|
||||
store <8 x double> %1, <8 x double>* %out, align 1
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue