[AArch64][SVE] Add fixed length codegen for FP_TO_{S,U}INT/{S,U}INT_TO_FP

Depends on D102607

Differential Revision: https://reviews.llvm.org/D102777
This commit is contained in:
Bradley Smith 2021-05-18 13:49:27 +01:00
parent c2c2be44ed
commit f3c577ed38
5 changed files with 3621 additions and 70 deletions

View File

@ -1482,6 +1482,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::FRINT, VT, Custom);
setOperationAction(ISD::FROUND, VT, Custom);
setOperationAction(ISD::FROUNDEVEN, VT, Custom);
@ -1501,6 +1503,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::SMIN, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
@ -1510,6 +1513,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
@ -3260,6 +3264,9 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
return LowerToPredicatedOp(Op, DAG, Opcode);
}
if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
return LowerFixedLengthFPToIntToSVE(Op, DAG);
unsigned NumElts = InVT.getVectorNumElements();
// f16 conversions are promoted to f32 when full fp16 is not supported.
@ -3384,6 +3391,9 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
return LowerToPredicatedOp(Op, DAG, Opcode);
}
if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
return LowerFixedLengthIntToFPToSVE(Op, DAG);
uint64_t VTSize = VT.getFixedSizeInBits();
uint64_t InVTSize = InVT.getFixedSizeInBits();
if (VTSize < InVTSize) {
@ -17994,6 +18004,95 @@ AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
return DAG.getNode(ISD::BITCAST, DL, VT, Val);
}
SDValue
AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
: AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT SrcVT = Val.getValueType();
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
ContainerDstVT.getVectorElementType().getSizeInBits()) {
SDValue Pg = getPredicateForVector(DAG, DL, VT);
Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
VT.changeTypeToInteger(), Val);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = getSVESafeBitCast(ContainerDstVT.changeTypeToInteger(), Val, DAG);
// Safe to use a larger than specified operand since we just unpacked the
// data, hence the upper bits are zero.
Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
DAG.getUNDEF(ContainerDstVT));
return convertFromScalableVector(DAG, VT, Val);
} else {
EVT CvtVT = ContainerSrcVT.changeVectorElementType(
ContainerDstVT.getVectorElementType());
SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
Val = convertFromScalableVector(DAG, SrcVT, Val);
Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
return DAG.getNode(ISD::BITCAST, DL, VT, Val);
}
}
SDValue
AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
: AArch64ISD::FCVTZU_MERGE_PASSTHRU;
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT SrcVT = Val.getValueType();
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
ContainerDstVT.getVectorElementType().getSizeInBits()) {
EVT CvtVT = ContainerDstVT.changeVectorElementType(
ContainerSrcVT.getVectorElementType());
SDValue Pg = getPredicateForVector(DAG, DL, VT);
Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = getSVESafeBitCast(CvtVT, Val, DAG);
Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
DAG.getUNDEF(ContainerDstVT));
return convertFromScalableVector(DAG, VT, Val);
} else {
EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
// Safe to use a larger than specified result since an fp_to_int where the
// result doesn't fit into the destination is undefined.
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
return DAG.getNode(ISD::TRUNCATE, DL, VT, Val);
}
}
SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);

View File

@ -1005,6 +1005,8 @@ private:
SelectionDAG &DAG) const;
SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;

View File

@ -1,70 +0,0 @@
; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s
; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
; Don't use SVE when its registers are no bigger than NEON.
; NO_SVE-NOT: z{0-9}
; NOTE: fptrunc operations bigger than NEON are expanded. These tests just
; ensure we've correctly set the operation action for fixed length vector types
; that require SVE. They'll be updated to protect their expected code generation
; when lowering it implemented.
;
; vector uint_to_fp i8 -> f32
; AArch64 doesn't have a direct vector->f32 conversion instructions for
; elements smaller than i32, so make sure inputs are promoted to i32 first.
;
define void @uitofp_v4i8_v4f32(<4 x i8>* %in, <4 x float>* %out) #0 {
; CHECK-LABEL: uitofp_v4i8_v4f32:
; CHECK-COUNT-1: ucvt
%vec = load <4 x i8>, <4 x i8>* %in
%conv = uitofp <4 x i8> %vec to <4 x float>
store <4 x float> %conv, <4 x float>* %out
ret void
}
define void @uitofp_v8i8_v8f32(<8 x i8>* %in, <8 x float>* %out) #0 {
; CHECK-LABEL: uitofp_v8i8_v8f32:
; CHECK-COUNT-8: ucvt
%vec = load <8 x i8>, <8 x i8>* %in
%conv = uitofp <8 x i8> %vec to <8 x float>
store <8 x float> %conv, <8 x float>* %out
ret void
}
define void @uitofp_v16i8_v16f32(<16 x i8>* %in, <16 x float>* %out) #0 {
; CHECK-LABEL: uitofp_v16i8_v16f32:
; CHECK-COUNT-16: ucvt
%vec = load <16 x i8>, <16 x i8>* %in
%conv = uitofp <16 x i8> %vec to <16 x float>
store <16 x float> %conv, <16 x float>* %out
ret void
}
define void @uitofp_v32i8_v32f32(<32 x i8>* %in, <32 x float>* %out) #0 {
; CHECK-LABEL: uitofp_v32i8_v32f32:
; CHECK-COUNT-32: ucvt
%vec = load <32 x i8>, <32 x i8>* %in
%conv = uitofp <32 x i8> %vec to <32 x float>
store <32 x float> %conv, <32 x float>* %out
ret void
}
attributes #0 = { nounwind "target-features"="+sve" }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff