AArch64: improve vector [su]itofp handling.

This somehow got missed in the AArch64 merge, so should fix a
performance regression since 3.4.

llvm-svn: 210984
This commit is contained in:
Tim Northover 2014-06-15 09:27:06 +00:00
parent e876f5b61e
commit ef0d760cd9
5 changed files with 210 additions and 65 deletions

View File

@ -1440,32 +1440,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
SDValue In = Op.getOperand(0);
EVT InVT = In.getValueType();
// v2i32 to v2f32 is legal.
if (VT == MVT::v2f32 && InVT == MVT::v2i32)
return Op;
// This function only handles v2f64 outputs.
if (VT == MVT::v2f64) {
// Extend the input argument to a v2i64 that we can feed into the
// floating point conversion. Zero or sign extend based on whether
// we're doing a signed or unsigned float conversion.
unsigned Opc =
Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
assert(Op.getNumOperands() == 1 && "FP conversions take one argument");
SDValue Promoted = DAG.getNode(Opc, dl, MVT::v2i64, Op.getOperand(0));
return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Promoted);
if (VT.getSizeInBits() < InVT.getSizeInBits()) {
MVT CastVT =
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
InVT.getVectorNumElements());
In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0));
}
// Scalarize v2i64 to v2f32 conversions.
std::vector<SDValue> BuildVectorOps;
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
SDValue Sclr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, In,
DAG.getConstant(i, MVT::i64));
Sclr = DAG.getNode(Op->getOpcode(), dl, MVT::f32, Sclr);
BuildVectorOps.push_back(Sclr);
if (VT.getSizeInBits() > InVT.getSizeInBits()) {
unsigned CastOpc =
Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
EVT CastVT = VT.changeVectorElementTypeToInteger();
In = DAG.getNode(CastOpc, dl, CastVT, In);
return DAG.getNode(Op.getOpcode(), dl, VT, In);
}
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BuildVectorOps);
return Op;
}
SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,

View File

@ -306,28 +306,47 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
// LowerVectorINT_TO_FP:
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
// Complex: to v2f32
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
// Complex: to v4f32
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
// Complex: to v2f64
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
// LowerVectorFP_TO_INT
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 4 },
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 4 },
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 },
{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4 },
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 },
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 },
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
};
int Idx = ConvertCostTableLookup<MVT>(

View File

@ -1,29 +0,0 @@
; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
define <2 x double> @f1(<2 x i32> %v) nounwind readnone {
; CHECK-LABEL: f1:
; CHECK: sshll.2d v0, v0, #0
; CHECK-NEXT: scvtf.2d v0, v0
; CHECK-NEXT: ret
%conv = sitofp <2 x i32> %v to <2 x double>
ret <2 x double> %conv
}
define <2 x double> @f2(<2 x i32> %v) nounwind readnone {
; CHECK-LABEL: f2:
; CHECK: ushll.2d v0, v0, #0
; CHECK-NEXT: ucvtf.2d v0, v0
; CHECK-NEXT: ret
%conv = uitofp <2 x i32> %v to <2 x double>
ret <2 x double> %conv
}
; CHECK: autogen_SD19655
; CHECK: scvtf
; CHECK: ret
define void @autogen_SD19655(<2 x i64>* %addr, <2 x float>* %addrfloat) {
%T = load <2 x i64>* %addr
%F = sitofp <2 x i64> %T to <2 x float>
store <2 x float> %F, <2 x float>* %addrfloat
ret void
}

View File

@ -0,0 +1,164 @@
; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
; CHECK: autogen_SD19655
; CHECK: scvtf
; CHECK: ret
define void @autogen_SD19655(<2 x i64>* %addr, <2 x float>* %addrfloat) {
%T = load <2 x i64>* %addr
%F = sitofp <2 x i64> %T to <2 x float>
store <2 x float> %F, <2 x float>* %addrfloat
ret void
}
define <2 x double> @test_signed_v2i32_to_v2f64(<2 x i32> %v) nounwind readnone {
; CHECK-LABEL: test_signed_v2i32_to_v2f64:
; CHECK: sshll.2d [[VAL64:v[0-9]+]], v0, #0
; CHECK-NEXT: scvtf.2d v0, [[VAL64]]
; CHECK-NEXT: ret
%conv = sitofp <2 x i32> %v to <2 x double>
ret <2 x double> %conv
}
define <2 x double> @test_unsigned_v2i32_to_v2f64(<2 x i32> %v) nounwind readnone {
; CHECK-LABEL: test_unsigned_v2i32_to_v2f64
; CHECK: ushll.2d [[VAL64:v[0-9]+]], v0, #0
; CHECK-NEXT: ucvtf.2d v0, [[VAL64]]
; CHECK-NEXT: ret
%conv = uitofp <2 x i32> %v to <2 x double>
ret <2 x double> %conv
}
define <2 x double> @test_signed_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone {
; CHECK-LABEL: test_signed_v2i16_to_v2f64:
; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #16
; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #16
; CHECK: sshll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
; CHECK: scvtf.2d v0, [[VAL64]]
%conv = sitofp <2 x i16> %v to <2 x double>
ret <2 x double> %conv
}
define <2 x double> @test_unsigned_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone {
; CHECK-LABEL: test_unsigned_v2i16_to_v2f64
; CHECK: movi d[[MASK:[0-9]+]], #0x00ffff0000ffff
; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
; CHECK: ushll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
; CHECK: ucvtf.2d v0, [[VAL64]]
%conv = uitofp <2 x i16> %v to <2 x double>
ret <2 x double> %conv
}
define <2 x double> @test_signed_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone {
; CHECK-LABEL: test_signed_v2i8_to_v2f64:
; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #24
; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #24
; CHECK: sshll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
; CHECK: scvtf.2d v0, [[VAL64]]
%conv = sitofp <2 x i8> %v to <2 x double>
ret <2 x double> %conv
}
define <2 x double> @test_unsigned_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone {
; CHECK-LABEL: test_unsigned_v2i8_to_v2f64
; CHECK: movi d[[MASK:[0-9]+]], #0x0000ff000000ff
; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
; CHECK: ushll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0
; CHECK: ucvtf.2d v0, [[VAL64]]
%conv = uitofp <2 x i8> %v to <2 x double>
ret <2 x double> %conv
}
define <2 x float> @test_signed_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone {
; CHECK-LABEL: test_signed_v2i64_to_v2f32:
; CHECK: scvtf.2d [[VAL64:v[0-9]+]], v0
; CHECK: fcvtn v0.2s, [[VAL64]].2d
%conv = sitofp <2 x i64> %v to <2 x float>
ret <2 x float> %conv
}
define <2 x float> @test_unsigned_v2i64_to_v2f32(<2 x i64> %v) nounwind readnone {
; CHECK-LABEL: test_unsigned_v2i64_to_v2f32
; CHECK: ucvtf.2d [[VAL64:v[0-9]+]], v0
; CHECK: fcvtn v0.2s, [[VAL64]].2d
%conv = uitofp <2 x i64> %v to <2 x float>
ret <2 x float> %conv
}
define <2 x float> @test_signed_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone {
; CHECK-LABEL: test_signed_v2i16_to_v2f32:
; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #16
; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #16
; CHECK: scvtf.2s v0, [[VAL32]]
%conv = sitofp <2 x i16> %v to <2 x float>
ret <2 x float> %conv
}
define <2 x float> @test_unsigned_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone {
; CHECK-LABEL: test_unsigned_v2i16_to_v2f32
; CHECK: movi d[[MASK:[0-9]+]], #0x00ffff0000ffff
; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
; CHECK: ucvtf.2s v0, [[VAL32]]
%conv = uitofp <2 x i16> %v to <2 x float>
ret <2 x float> %conv
}
define <2 x float> @test_signed_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone {
; CHECK-LABEL: test_signed_v2i8_to_v2f32:
; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #24
; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #24
; CHECK: scvtf.2s v0, [[VAL32]]
%conv = sitofp <2 x i8> %v to <2 x float>
ret <2 x float> %conv
}
define <2 x float> @test_unsigned_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone {
; CHECK-LABEL: test_unsigned_v2i8_to_v2f32
; CHECK: movi d[[MASK:[0-9]+]], #0x0000ff000000ff
; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]]
; CHECK: ucvtf.2s v0, [[VAL32]]
%conv = uitofp <2 x i8> %v to <2 x float>
ret <2 x float> %conv
}
define <4 x float> @test_signed_v4i16_to_v4f32(<4 x i16> %v) nounwind readnone {
; CHECK-LABEL: test_signed_v4i16_to_v4f32:
; CHECK: sshll.4s [[VAL32:v[0-9]+]], v0, #0
; CHECK: scvtf.4s v0, [[VAL32]]
%conv = sitofp <4 x i16> %v to <4 x float>
ret <4 x float> %conv
}
define <4 x float> @test_unsigned_v4i16_to_v4f32(<4 x i16> %v) nounwind readnone {
; CHECK-LABEL: test_unsigned_v4i16_to_v4f32
; CHECK: ushll.4s [[VAL32:v[0-9]+]], v0, #0
; CHECK: ucvtf.4s v0, [[VAL32]]
%conv = uitofp <4 x i16> %v to <4 x float>
ret <4 x float> %conv
}
define <4 x float> @test_signed_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone {
; CHECK-LABEL: test_signed_v4i8_to_v4f32:
; CHECK: shl.4h [[TMP:v[0-9]+]], v0, #8
; CHECK: sshr.4h [[VAL16:v[0-9]+]], [[TMP]], #8
; CHECK: sshll.4s [[VAL32:v[0-9]+]], [[VAL16]], #0
; CHECK: scvtf.4s v0, [[VAL32]]
%conv = sitofp <4 x i8> %v to <4 x float>
ret <4 x float> %conv
}
define <4 x float> @test_unsigned_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone {
; CHECK-LABEL: test_unsigned_v4i8_to_v4f32
; CHECK: bic.4h v0, #0xff, lsl #8
; CHECK: ushll.4s [[VAL32:v[0-9]+]], v0, #0
; CHECK: ucvtf.4s v0, [[VAL32]]
%conv = uitofp <4 x i8> %v to <4 x float>
ret <4 x float> %conv
}