[AArch64][SVE] Add fixed length codegen for FP_TO_{S,U}INT/{S,U}INT_TO_FP

Depends on D102607 Differential Revision: https://reviews.llvm.org/D102777
2021-05-18 13:49:27 +01:00 · 2021-05-18 13:49:27 +01:00 · f3c577ed38
parent c2c2be44ed
commit f3c577ed38
5 changed files with 3621 additions and 70 deletions
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -1482,6 +1482,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
  setOperationAction(ISD::FNEG, VT, Custom);
  setOperationAction(ISD::FP_EXTEND, VT, Custom);
  setOperationAction(ISD::FP_ROUND, VT, Custom);
  setOperationAction(ISD::FP_TO_SINT, VT, Custom);
  setOperationAction(ISD::FP_TO_UINT, VT, Custom);
  setOperationAction(ISD::FRINT, VT, Custom);
  setOperationAction(ISD::FROUND, VT, Custom);
  setOperationAction(ISD::FROUNDEVEN, VT, Custom);
@ -1501,6 +1503,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
  setOperationAction(ISD::SHL, VT, Custom);
  setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
  setOperationAction(ISD::SINT_TO_FP, VT, Custom);
  setOperationAction(ISD::SMAX, VT, Custom);
  setOperationAction(ISD::SMIN, VT, Custom);
  setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
@ -1510,6 +1513,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
  setOperationAction(ISD::SUB, VT, Custom);
  setOperationAction(ISD::TRUNCATE, VT, Custom);
  setOperationAction(ISD::UDIV, VT, Custom);
  setOperationAction(ISD::UINT_TO_FP, VT, Custom);
  setOperationAction(ISD::UMAX, VT, Custom);
  setOperationAction(ISD::UMIN, VT, Custom);
  setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
@ -3260,6 +3264,9 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
    return LowerToPredicatedOp(Op, DAG, Opcode);
  }
  if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
    return LowerFixedLengthFPToIntToSVE(Op, DAG);
  unsigned NumElts = InVT.getVectorNumElements();
  // f16 conversions are promoted to f32 when full fp16 is not supported.
@ -3384,6 +3391,9 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
    return LowerToPredicatedOp(Op, DAG, Opcode);
  }
  if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
    return LowerFixedLengthIntToFPToSVE(Op, DAG);
  uint64_t VTSize = VT.getFixedSizeInBits();
  uint64_t InVTSize = InVT.getFixedSizeInBits();
  if (VTSize < InVTSize) {
@ -17994,6 +18004,95 @@ AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
  return DAG.getNode(ISD::BITCAST, DL, VT, Val);
 }
 SDValue
 AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
                                                    SelectionDAG &DAG) const {
  EVT VT = Op.getValueType();
  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
  unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
                             : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
  SDLoc DL(Op);
  SDValue Val = Op.getOperand(0);
  EVT SrcVT = Val.getValueType();
  EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
  EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
  if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
      ContainerDstVT.getVectorElementType().getSizeInBits()) {
    SDValue Pg = getPredicateForVector(DAG, DL, VT);
    Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
                      VT.changeTypeToInteger(), Val);
    Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
    Val = getSVESafeBitCast(ContainerDstVT.changeTypeToInteger(), Val, DAG);
    // Safe to use a larger than specified operand since we just unpacked the
    // data, hence the upper bits are zero.
    Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
                      DAG.getUNDEF(ContainerDstVT));
    return convertFromScalableVector(DAG, VT, Val);
  } else {
    EVT CvtVT = ContainerSrcVT.changeVectorElementType(
        ContainerDstVT.getVectorElementType());
    SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
    Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
    Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
    Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
    Val = convertFromScalableVector(DAG, SrcVT, Val);
    Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
    return DAG.getNode(ISD::BITCAST, DL, VT, Val);
  }
 }
 SDValue
 AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
                                                    SelectionDAG &DAG) const {
  EVT VT = Op.getValueType();
  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
  unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
                             : AArch64ISD::FCVTZU_MERGE_PASSTHRU;
  SDLoc DL(Op);
  SDValue Val = Op.getOperand(0);
  EVT SrcVT = Val.getValueType();
  EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
  EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
  if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
      ContainerDstVT.getVectorElementType().getSizeInBits()) {
    EVT CvtVT = ContainerDstVT.changeVectorElementType(
      ContainerSrcVT.getVectorElementType());
    SDValue Pg = getPredicateForVector(DAG, DL, VT);
    Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
    Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);
    Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
    Val = getSVESafeBitCast(CvtVT, Val, DAG);
    Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
                      DAG.getUNDEF(ContainerDstVT));
    return convertFromScalableVector(DAG, VT, Val);
  } else {
    EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
    SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
    // Safe to use a larger than specified result since an fp_to_int where the
    // result doesn't fit into the destination is undefined.
    Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
    Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
    Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
    return DAG.getNode(ISD::TRUNCATE, DL, VT, Val);
  }
 }
 SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
                                                 SelectionDAG &DAG) const {
  SDLoc DL(Op);
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@ -1005,6 +1005,8 @@ private:
                                             SelectionDAG &DAG) const;
  SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
  SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
                        SmallVectorImpl<SDNode *> &Created) const override;
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-converts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-converts.ll
@ -1,70 +0,0 @@
 ; RUN: llc -aarch64-sve-vector-bits-min=128  -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
 ; RUN: llc -aarch64-sve-vector-bits-min=256  -asm-verbose=0 < %s | FileCheck %s
 ; RUN: llc -aarch64-sve-vector-bits-min=384  -asm-verbose=0 < %s | FileCheck %s
 ; RUN: llc -aarch64-sve-vector-bits-min=512  -asm-verbose=0 < %s | FileCheck %s
 ; RUN: llc -aarch64-sve-vector-bits-min=640  -asm-verbose=0 < %s | FileCheck %s
 ; RUN: llc -aarch64-sve-vector-bits-min=768  -asm-verbose=0 < %s | FileCheck %s
 ; RUN: llc -aarch64-sve-vector-bits-min=896  -asm-verbose=0 < %s | FileCheck %s
 ; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s
 ; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s
 ; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s
 ; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s
 ; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s
 ; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s
 ; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s
 ; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s
 ; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s
 target triple = "aarch64-unknown-linux-gnu"
 ; Don't use SVE when its registers are no bigger than NEON.
 ; NO_SVE-NOT: z{0-9}
 ; NOTE: fptrunc operations bigger than NEON are expanded. These tests just
 ; ensure we've correctly set the operation action for fixed length vector types
 ; that require SVE. They'll be updated to protect their expected code generation
 ; when lowering it implemented.
 ;
 ; vector uint_to_fp i8 -> f32
 ; AArch64 doesn't have a direct vector->f32 conversion instructions for
 ; elements smaller than i32, so make sure inputs are promoted to i32 first.
 ;
 define void @uitofp_v4i8_v4f32(<4 x i8>* %in, <4 x float>* %out) #0 {
 ; CHECK-LABEL: uitofp_v4i8_v4f32:
 ; CHECK-COUNT-1: ucvt
  %vec = load <4 x i8>, <4 x i8>* %in
  %conv = uitofp <4 x i8> %vec to <4 x float>
  store <4 x float> %conv, <4 x float>* %out
  ret void
 }
 define void @uitofp_v8i8_v8f32(<8 x i8>* %in, <8 x float>* %out) #0 {
 ; CHECK-LABEL: uitofp_v8i8_v8f32:
 ; CHECK-COUNT-8: ucvt
  %vec = load <8 x i8>, <8 x i8>* %in
  %conv = uitofp <8 x i8> %vec to <8 x float>
  store <8 x float> %conv, <8 x float>* %out
  ret void
 }
 define void @uitofp_v16i8_v16f32(<16 x i8>* %in, <16 x float>* %out) #0 {
 ; CHECK-LABEL: uitofp_v16i8_v16f32:
 ; CHECK-COUNT-16: ucvt
  %vec = load <16 x i8>, <16 x i8>* %in
  %conv = uitofp <16 x i8> %vec to <16 x float>
  store <16 x float> %conv, <16 x float>* %out
  ret void
 }
 define void @uitofp_v32i8_v32f32(<32 x i8>* %in, <32 x float>* %out) #0 {
 ; CHECK-LABEL: uitofp_v32i8_v32f32:
 ; CHECK-COUNT-32: ucvt
  %vec = load <32 x i8>, <32 x i8>* %in
  %conv = uitofp <32 x i8> %vec to <32 x float>
  store <32 x float> %conv, <32 x float>* %out
  ret void
 }
 attributes #0 = { nounwind "target-features"="+sve" }
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll