forked from OSchip/llvm-project
AVX-512: SINT_TO_FP cost model and some bugfixes
Checked some corner cases, for example translation of <8 x i1> to <8 x double> llvm-svn: 221883
This commit is contained in:
parent
3217c6a52c
commit
d5e95b57e0
|
@ -1410,6 +1410,10 @@ void X86TargetLowering::resetOperationActions() {
|
|||
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
|
||||
|
@ -13209,10 +13213,18 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
|
|||
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
|
||||
if (SrcVT.isVector())
|
||||
if (SrcVT.isVector()) {
|
||||
if (SrcVT.getVectorElementType() == MVT::i1) {
|
||||
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT,
|
||||
Op.getOperand(0)));
|
||||
}
|
||||
return SDValue();
|
||||
|
||||
}
|
||||
|
||||
assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
|
||||
"Unknown SINT_TO_FP to lower!");
|
||||
|
||||
|
@ -13225,7 +13237,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
|||
return Op;
|
||||
}
|
||||
|
||||
SDLoc dl(Op);
|
||||
unsigned Size = SrcVT.getSizeInBits()/8;
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
|
||||
|
@ -15455,8 +15466,11 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget
|
|||
if (NumElts != 8 && NumElts != 16)
|
||||
return SDValue();
|
||||
|
||||
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
|
||||
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {
|
||||
if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
|
||||
return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));
|
||||
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
|
||||
}
|
||||
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
|
||||
|
|
|
@ -618,6 +618,13 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
|
|||
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
|
||||
|
||||
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
|
||||
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
|
||||
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
|
||||
};
|
||||
|
||||
if (ST->hasAVX512()) {
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
|
||||
; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze < %s | FileCheck --check-prefix=AVX512F %s
|
||||
|
||||
define <2 x double> @sitofpv2i8v2double(<2 x i8> %a) {
|
||||
; SSE2: sitofpv2i8v2double
|
||||
|
@ -279,3 +280,47 @@ define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) {
|
|||
%1 = sitofp <32 x i64> %a to <32 x float>
|
||||
ret <32 x float> %1
|
||||
}
|
||||
|
||||
; AVX512F-LABEL: sitofp_16i8_float
|
||||
; AVX512F: cost of 2 {{.*}} sitofp
|
||||
define <16 x float> @sitofp_16i8_float(<16 x i8> %a) {
|
||||
%1 = sitofp <16 x i8> %a to <16 x float>
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
define <16 x float> @sitofp_16i16_float(<16 x i16> %a) {
|
||||
; AVX512F-LABEL: sitofp_16i16_float
|
||||
; AVX512F: cost of 2 {{.*}} sitofp
|
||||
%1 = sitofp <16 x i16> %a to <16 x float>
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
; AVX512F-LABEL: sitofp_8i8_double
|
||||
; AVX512F: cost of 2 {{.*}} sitofp
|
||||
define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
|
||||
%1 = sitofp <8 x i8> %a to <8 x double>
|
||||
ret <8 x double> %1
|
||||
}
|
||||
|
||||
; AVX512F-LABEL: sitofp_8i16_double
|
||||
; AVX512F: cost of 2 {{.*}} sitofp
|
||||
define <8 x double> @sitofp_8i16_double(<8 x i16> %a) {
|
||||
%1 = sitofp <8 x i16> %a to <8 x double>
|
||||
ret <8 x double> %1
|
||||
}
|
||||
|
||||
; AVX512F-LABEL: sitofp_8i1_double
|
||||
; AVX512F: cost of 4 {{.*}} sitofp
|
||||
define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
|
||||
%cmpres = fcmp ogt <8 x double> %a, zeroinitializer
|
||||
%1 = sitofp <8 x i1> %cmpres to <8 x double>
|
||||
ret <8 x double> %1
|
||||
}
|
||||
|
||||
; AVX512F-LABEL: sitofp_16i1_float
|
||||
; AVX512F: cost of 3 {{.*}} sitofp
|
||||
define <16 x float> @sitofp_16i1_float(<16 x float> %a) {
|
||||
%cmpres = fcmp ogt <16 x float> %a, zeroinitializer
|
||||
%1 = sitofp <16 x i1> %cmpres to <16 x float>
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
|
|
@ -255,3 +255,56 @@ define double @uitofp03(i32 %a) nounwind {
|
|||
%b = uitofp i32 %a to double
|
||||
ret double %b
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @sitofp_16i1_float
|
||||
; CHECK: vpbroadcastd
|
||||
; CHECK: vcvtdq2ps
|
||||
define <16 x float> @sitofp_16i1_float(<16 x i32> %a) {
|
||||
%mask = icmp slt <16 x i32> %a, zeroinitializer
|
||||
%1 = sitofp <16 x i1> %mask to <16 x float>
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @sitofp_16i8_float
|
||||
; CHECK: vpmovsxbd
|
||||
; CHECK: vcvtdq2ps
|
||||
define <16 x float> @sitofp_16i8_float(<16 x i8> %a) {
|
||||
%1 = sitofp <16 x i8> %a to <16 x float>
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @sitofp_16i16_float
|
||||
; CHECK: vpmovsxwd
|
||||
; CHECK: vcvtdq2ps
|
||||
define <16 x float> @sitofp_16i16_float(<16 x i16> %a) {
|
||||
%1 = sitofp <16 x i16> %a to <16 x float>
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @sitofp_8i16_double
|
||||
; CHECK: vpmovsxwd
|
||||
; CHECK: vcvtdq2pd
|
||||
define <8 x double> @sitofp_8i16_double(<8 x i16> %a) {
|
||||
%1 = sitofp <8 x i16> %a to <8 x double>
|
||||
ret <8 x double> %1
|
||||
}
|
||||
|
||||
; CHECK-LABEL: sitofp_8i8_double
|
||||
; CHECK: vpmovzxwd
|
||||
; CHECK: vpslld
|
||||
; CHECK: vpsrad
|
||||
; CHECK: vcvtdq2pd
|
||||
define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
|
||||
%1 = sitofp <8 x i8> %a to <8 x double>
|
||||
ret <8 x double> %1
|
||||
}
|
||||
|
||||
|
||||
; CHECK-LABEL: @sitofp_8i1_double
|
||||
; CHECK: vpbroadcastq
|
||||
; CHECK: vcvtdq2pd
|
||||
define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
|
||||
%cmpres = fcmp ogt <8 x double> %a, zeroinitializer
|
||||
%1 = sitofp <8 x i1> %cmpres to <8 x double>
|
||||
ret <8 x double> %1
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue