forked from OSchip/llvm-project
AArch64: improve vector [su]itofp handling.
This somehow got missed in the AArch64 merge, so should fix a performance regression since 3.4. llvm-svn: 210984
This commit is contained in:
parent
e876f5b61e
commit
ef0d760cd9
|
@ -1440,32 +1440,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
|
|||
SDValue In = Op.getOperand(0);
|
||||
EVT InVT = In.getValueType();
|
||||
|
||||
// v2i32 to v2f32 is legal.
|
||||
if (VT == MVT::v2f32 && InVT == MVT::v2i32)
|
||||
return Op;
|
||||
|
||||
// This function only handles v2f64 outputs.
|
||||
if (VT == MVT::v2f64) {
|
||||
// Extend the input argument to a v2i64 that we can feed into the
|
||||
// floating point conversion. Zero or sign extend based on whether
|
||||
// we're doing a signed or unsigned float conversion.
|
||||
unsigned Opc =
|
||||
Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
|
||||
assert(Op.getNumOperands() == 1 && "FP conversions take one argument");
|
||||
SDValue Promoted = DAG.getNode(Opc, dl, MVT::v2i64, Op.getOperand(0));
|
||||
return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Promoted);
|
||||
if (VT.getSizeInBits() < InVT.getSizeInBits()) {
|
||||
MVT CastVT =
|
||||
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
|
||||
InVT.getVectorNumElements());
|
||||
In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
|
||||
return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0));
|
||||
}
|
||||
|
||||
// Scalarize v2i64 to v2f32 conversions.
|
||||
std::vector<SDValue> BuildVectorOps;
|
||||
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
|
||||
SDValue Sclr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, In,
|
||||
DAG.getConstant(i, MVT::i64));
|
||||
Sclr = DAG.getNode(Op->getOpcode(), dl, MVT::f32, Sclr);
|
||||
BuildVectorOps.push_back(Sclr);
|
||||
if (VT.getSizeInBits() > InVT.getSizeInBits()) {
|
||||
unsigned CastOpc =
|
||||
Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
|
||||
EVT CastVT = VT.changeVectorElementTypeToInteger();
|
||||
In = DAG.getNode(CastOpc, dl, CastVT, In);
|
||||
return DAG.getNode(Op.getOpcode(), dl, VT, In);
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BuildVectorOps);
|
||||
return Op;
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
|
||||
|
|
|
@ -306,28 +306,47 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
|
|||
static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
|
||||
// LowerVectorINT_TO_FP:
|
||||
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
|
||||
|
||||
// Complex: to v2f32
|
||||
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
|
||||
|
||||
// Complex: to v4f32
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
|
||||
|
||||
// Complex: to v2f64
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
|
||||
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
|
||||
|
||||
|
||||
// LowerVectorFP_TO_INT
|
||||
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 4 },
|
||||
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 4 },
|
||||
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 },
|
||||
{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 },
|
||||
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 },
|
||||
|
||||
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
|
||||
{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
|
||||
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
|
||||
};
|
||||
|
||||
int Idx = ConvertCostTableLookup<MVT>(
|
||||
|
|
|
@ -1,29 +0,0 @@
|
|||
; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
|
||||
|
||||
define <2 x double> @f1(<2 x i32> %v) nounwind readnone {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: sshll.2d v0, v0, #0
|
||||
; CHECK-NEXT: scvtf.2d v0, v0
|
||||
; CHECK-NEXT: ret
|
||||
%conv = sitofp <2 x i32> %v to <2 x double>
|
||||
ret <2 x double> %conv
|
||||
}
|
||||
define <2 x double> @f2(<2 x i32> %v) nounwind readnone {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: ushll.2d v0, v0, #0
|
||||
; CHECK-NEXT: ucvtf.2d v0, v0
|
||||
; CHECK-NEXT: ret
|
||||
%conv = uitofp <2 x i32> %v to <2 x double>
|
||||
ret <2 x double> %conv
|
||||
}
|
||||
|
||||
; CHECK: autogen_SD19655
|
||||
; CHECK: scvtf
|
||||
; CHECK: ret
|
||||
define void @autogen_SD19655(<2 x i64>* %addr, <2 x float>* %addrfloat) {
|
||||
%T = load <2 x i64>* %addr
|
||||
%F = sitofp <2 x i64> %T to <2 x float>
|
||||
store <2 x float> %F, <2 x float>* %addrfloat
|
||||
ret void
|
||||
}
|
||||
|
|
@ -0,0 +1,164 @@
|
|||
; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
|
||||
|
||||
; CHECK: autogen_SD19655
|
||||
; CHECK: scvtf
|
||||
; CHECK: ret
|
||||
define void @autogen_SD19655(<2 x i64>* %addr, <2 x float>* %addrfloat) {
|
||||
%T = load <2 x i64>* %addr
|
||||
%F = sitofp <2 x i64> %T to <2 x float>
|
||||
store <2 x float> %F, <2 x float>* %addrfloat
|
||||
ret void
|
||||
}
|
||||
|
||||
define <2 x double> @test_signed_v2i32_to_v2f64(<2 x i32> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_signed_v2i32_to_v2f64:
|
||||
; CHECK: sshll.2d [[VAL64:v[0-9]+]], v0, #0
|
||||
; CHECK-NEXT: scvtf.2d v0, [[VAL64]]
|
||||
; CHECK-NEXT: ret
|
||||
%conv = sitofp <2 x i32> %v to <2 x double>
|
||||
ret <2 x double> %conv
|
||||
}
|
||||
|
||||
define <2 x double> @test_unsigned_v2i32_to_v2f64(<2 x i32> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_unsigned_v2i32_to_v2f64
|
||||
; CHECK: ushll.2d [[VAL64:v[0-9]+]], v0, #0
|
||||
; CHECK-NEXT: ucvtf.2d v0, [[VAL64]]
|
||||
; CHECK-NEXT: ret
|
||||
%conv = uitofp <2 x i32> %v to <2 x double>
|
||||
ret <2 x double> %conv
|
||||
}
|
||||
|
||||
define <2 x double> @test_signed_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_signed_v2i16_to_v2f64:
|
||||
; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #16
|
||||
; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #16
|
||||
; CHECK: sshll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
|
||||
; CHECK: scvtf.2d v0, [[VAL64]]
|
||||
|
||||
%conv = sitofp <2 x i16> %v to <2 x double>
|
||||
ret <2 x double> %conv
|
||||
}
|
||||
define <2 x double> @test_unsigned_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_unsigned_v2i16_to_v2f64
|
||||
; CHECK: movi d[[MASK:[0-9]+]], #0x00ffff0000ffff
|
||||
; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
|
||||
; CHECK: ushll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
|
||||
; CHECK: ucvtf.2d v0, [[VAL64]]
|
||||
|
||||
%conv = uitofp <2 x i16> %v to <2 x double>
|
||||
ret <2 x double> %conv
|
||||
}
|
||||
|
||||
define <2 x double> @test_signed_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_signed_v2i8_to_v2f64:
|
||||
; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #24
|
||||
; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #24
|
||||
; CHECK: sshll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
|
||||
; CHECK: scvtf.2d v0, [[VAL64]]
|
||||
|
||||
%conv = sitofp <2 x i8> %v to <2 x double>
|
||||
ret <2 x double> %conv
|
||||
}
|
||||
define <2 x double> @test_unsigned_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_unsigned_v2i8_to_v2f64
|
||||
; CHECK: movi d[[MASK:[0-9]+]], #0x0000ff000000ff
|
||||
; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
|
||||
; CHECK: ushll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
|
||||
; CHECK: ucvtf.2d v0, [[VAL64]]
|
||||
|
||||
%conv = uitofp <2 x i8> %v to <2 x double>
|
||||
ret <2 x double> %conv
|
||||
}
|
||||
|
||||
define <2 x float> @test_signed_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_signed_v2i64_to_v2f32:
|
||||
; CHECK: scvtf.2d [[VAL64:v[0-9]+]], v0
|
||||
; CHECK: fcvtn v0.2s, [[VAL64]].2d
|
||||
|
||||
%conv = sitofp <2 x i64> %v to <2 x float>
|
||||
ret <2 x float> %conv
|
||||
}
|
||||
define <2 x float> @test_unsigned_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_unsigned_v2i64_to_v2f32
|
||||
; CHECK: ucvtf.2d [[VAL64:v[0-9]+]], v0
|
||||
; CHECK: fcvtn v0.2s, [[VAL64]].2d
|
||||
|
||||
%conv = uitofp <2 x i64> %v to <2 x float>
|
||||
ret <2 x float> %conv
|
||||
}
|
||||
|
||||
define <2 x float> @test_signed_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_signed_v2i16_to_v2f32:
|
||||
; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #16
|
||||
; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #16
|
||||
; CHECK: scvtf.2s v0, [[VAL32]]
|
||||
|
||||
%conv = sitofp <2 x i16> %v to <2 x float>
|
||||
ret <2 x float> %conv
|
||||
}
|
||||
define <2 x float> @test_unsigned_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_unsigned_v2i16_to_v2f32
|
||||
; CHECK: movi d[[MASK:[0-9]+]], #0x00ffff0000ffff
|
||||
; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
|
||||
; CHECK: ucvtf.2s v0, [[VAL32]]
|
||||
|
||||
%conv = uitofp <2 x i16> %v to <2 x float>
|
||||
ret <2 x float> %conv
|
||||
}
|
||||
|
||||
define <2 x float> @test_signed_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_signed_v2i8_to_v2f32:
|
||||
; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #24
|
||||
; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #24
|
||||
; CHECK: scvtf.2s v0, [[VAL32]]
|
||||
|
||||
%conv = sitofp <2 x i8> %v to <2 x float>
|
||||
ret <2 x float> %conv
|
||||
}
|
||||
define <2 x float> @test_unsigned_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_unsigned_v2i8_to_v2f32
|
||||
; CHECK: movi d[[MASK:[0-9]+]], #0x0000ff000000ff
|
||||
; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
|
||||
; CHECK: ucvtf.2s v0, [[VAL32]]
|
||||
|
||||
%conv = uitofp <2 x i8> %v to <2 x float>
|
||||
ret <2 x float> %conv
|
||||
}
|
||||
|
||||
define <4 x float> @test_signed_v4i16_to_v4f32(<4 x i16> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_signed_v4i16_to_v4f32:
|
||||
; CHECK: sshll.4s [[VAL32:v[0-9]+]], v0, #0
|
||||
; CHECK: scvtf.4s v0, [[VAL32]]
|
||||
|
||||
%conv = sitofp <4 x i16> %v to <4 x float>
|
||||
ret <4 x float> %conv
|
||||
}
|
||||
|
||||
define <4 x float> @test_unsigned_v4i16_to_v4f32(<4 x i16> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_unsigned_v4i16_to_v4f32
|
||||
; CHECK: ushll.4s [[VAL32:v[0-9]+]], v0, #0
|
||||
; CHECK: ucvtf.4s v0, [[VAL32]]
|
||||
|
||||
%conv = uitofp <4 x i16> %v to <4 x float>
|
||||
ret <4 x float> %conv
|
||||
}
|
||||
|
||||
define <4 x float> @test_signed_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_signed_v4i8_to_v4f32:
|
||||
; CHECK: shl.4h [[TMP:v[0-9]+]], v0, #8
|
||||
; CHECK: sshr.4h [[VAL16:v[0-9]+]], [[TMP]], #8
|
||||
; CHECK: sshll.4s [[VAL32:v[0-9]+]], [[VAL16]], #0
|
||||
; CHECK: scvtf.4s v0, [[VAL32]]
|
||||
|
||||
%conv = sitofp <4 x i8> %v to <4 x float>
|
||||
ret <4 x float> %conv
|
||||
}
|
||||
define <4 x float> @test_unsigned_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone {
|
||||
; CHECK-LABEL: test_unsigned_v4i8_to_v4f32
|
||||
; CHECK: bic.4h v0, #0xff, lsl #8
|
||||
; CHECK: ushll.4s [[VAL32:v[0-9]+]], v0, #0
|
||||
; CHECK: ucvtf.4s v0, [[VAL32]]
|
||||
|
||||
%conv = uitofp <4 x i8> %v to <4 x float>
|
||||
ret <4 x float> %conv
|
||||
}
|
Loading…
Reference in New Issue