forked from OSchip/llvm-project
[AArch64][SVE] Add fixed length codegen for FP_TO_{S,U}INT/{S,U}INT_TO_FP
Depends on D102607 Differential Revision: https://reviews.llvm.org/D102777
This commit is contained in:
parent
c2c2be44ed
commit
f3c577ed38
|
@ -1482,6 +1482,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
|
||||||
setOperationAction(ISD::FNEG, VT, Custom);
|
setOperationAction(ISD::FNEG, VT, Custom);
|
||||||
setOperationAction(ISD::FP_EXTEND, VT, Custom);
|
setOperationAction(ISD::FP_EXTEND, VT, Custom);
|
||||||
setOperationAction(ISD::FP_ROUND, VT, Custom);
|
setOperationAction(ISD::FP_ROUND, VT, Custom);
|
||||||
|
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
|
||||||
|
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
|
||||||
setOperationAction(ISD::FRINT, VT, Custom);
|
setOperationAction(ISD::FRINT, VT, Custom);
|
||||||
setOperationAction(ISD::FROUND, VT, Custom);
|
setOperationAction(ISD::FROUND, VT, Custom);
|
||||||
setOperationAction(ISD::FROUNDEVEN, VT, Custom);
|
setOperationAction(ISD::FROUNDEVEN, VT, Custom);
|
||||||
|
@ -1501,6 +1503,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
|
||||||
setOperationAction(ISD::SHL, VT, Custom);
|
setOperationAction(ISD::SHL, VT, Custom);
|
||||||
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
|
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
|
||||||
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
|
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
|
||||||
|
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
|
||||||
setOperationAction(ISD::SMAX, VT, Custom);
|
setOperationAction(ISD::SMAX, VT, Custom);
|
||||||
setOperationAction(ISD::SMIN, VT, Custom);
|
setOperationAction(ISD::SMIN, VT, Custom);
|
||||||
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
||||||
|
@ -1510,6 +1513,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
|
||||||
setOperationAction(ISD::SUB, VT, Custom);
|
setOperationAction(ISD::SUB, VT, Custom);
|
||||||
setOperationAction(ISD::TRUNCATE, VT, Custom);
|
setOperationAction(ISD::TRUNCATE, VT, Custom);
|
||||||
setOperationAction(ISD::UDIV, VT, Custom);
|
setOperationAction(ISD::UDIV, VT, Custom);
|
||||||
|
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
|
||||||
setOperationAction(ISD::UMAX, VT, Custom);
|
setOperationAction(ISD::UMAX, VT, Custom);
|
||||||
setOperationAction(ISD::UMIN, VT, Custom);
|
setOperationAction(ISD::UMIN, VT, Custom);
|
||||||
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
|
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
|
||||||
|
@ -3260,6 +3264,9 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
|
||||||
return LowerToPredicatedOp(Op, DAG, Opcode);
|
return LowerToPredicatedOp(Op, DAG, Opcode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
|
||||||
|
return LowerFixedLengthFPToIntToSVE(Op, DAG);
|
||||||
|
|
||||||
unsigned NumElts = InVT.getVectorNumElements();
|
unsigned NumElts = InVT.getVectorNumElements();
|
||||||
|
|
||||||
// f16 conversions are promoted to f32 when full fp16 is not supported.
|
// f16 conversions are promoted to f32 when full fp16 is not supported.
|
||||||
|
@ -3384,6 +3391,9 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
|
||||||
return LowerToPredicatedOp(Op, DAG, Opcode);
|
return LowerToPredicatedOp(Op, DAG, Opcode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
|
||||||
|
return LowerFixedLengthIntToFPToSVE(Op, DAG);
|
||||||
|
|
||||||
uint64_t VTSize = VT.getFixedSizeInBits();
|
uint64_t VTSize = VT.getFixedSizeInBits();
|
||||||
uint64_t InVTSize = InVT.getFixedSizeInBits();
|
uint64_t InVTSize = InVT.getFixedSizeInBits();
|
||||||
if (VTSize < InVTSize) {
|
if (VTSize < InVTSize) {
|
||||||
|
@ -17994,6 +18004,95 @@ AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
|
||||||
return DAG.getNode(ISD::BITCAST, DL, VT, Val);
|
return DAG.getNode(ISD::BITCAST, DL, VT, Val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
|
||||||
|
|
||||||
|
bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
|
||||||
|
unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
|
||||||
|
: AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
|
||||||
|
|
||||||
|
SDLoc DL(Op);
|
||||||
|
SDValue Val = Op.getOperand(0);
|
||||||
|
EVT SrcVT = Val.getValueType();
|
||||||
|
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
|
||||||
|
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
|
||||||
|
|
||||||
|
if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
|
||||||
|
ContainerDstVT.getVectorElementType().getSizeInBits()) {
|
||||||
|
SDValue Pg = getPredicateForVector(DAG, DL, VT);
|
||||||
|
|
||||||
|
Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
|
||||||
|
VT.changeTypeToInteger(), Val);
|
||||||
|
|
||||||
|
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
|
||||||
|
Val = getSVESafeBitCast(ContainerDstVT.changeTypeToInteger(), Val, DAG);
|
||||||
|
// Safe to use a larger than specified operand since we just unpacked the
|
||||||
|
// data, hence the upper bits are zero.
|
||||||
|
Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
|
||||||
|
DAG.getUNDEF(ContainerDstVT));
|
||||||
|
return convertFromScalableVector(DAG, VT, Val);
|
||||||
|
} else {
|
||||||
|
EVT CvtVT = ContainerSrcVT.changeVectorElementType(
|
||||||
|
ContainerDstVT.getVectorElementType());
|
||||||
|
SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
|
||||||
|
|
||||||
|
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
|
||||||
|
Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
|
||||||
|
Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
|
||||||
|
Val = convertFromScalableVector(DAG, SrcVT, Val);
|
||||||
|
|
||||||
|
Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
|
||||||
|
return DAG.getNode(ISD::BITCAST, DL, VT, Val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
|
||||||
|
|
||||||
|
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
|
||||||
|
unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
|
||||||
|
: AArch64ISD::FCVTZU_MERGE_PASSTHRU;
|
||||||
|
|
||||||
|
SDLoc DL(Op);
|
||||||
|
SDValue Val = Op.getOperand(0);
|
||||||
|
EVT SrcVT = Val.getValueType();
|
||||||
|
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
|
||||||
|
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
|
||||||
|
|
||||||
|
if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
|
||||||
|
ContainerDstVT.getVectorElementType().getSizeInBits()) {
|
||||||
|
EVT CvtVT = ContainerDstVT.changeVectorElementType(
|
||||||
|
ContainerSrcVT.getVectorElementType());
|
||||||
|
SDValue Pg = getPredicateForVector(DAG, DL, VT);
|
||||||
|
|
||||||
|
Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
|
||||||
|
Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);
|
||||||
|
|
||||||
|
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
|
||||||
|
Val = getSVESafeBitCast(CvtVT, Val, DAG);
|
||||||
|
Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
|
||||||
|
DAG.getUNDEF(ContainerDstVT));
|
||||||
|
return convertFromScalableVector(DAG, VT, Val);
|
||||||
|
} else {
|
||||||
|
EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
|
||||||
|
SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
|
||||||
|
|
||||||
|
// Safe to use a larger than specified result since an fp_to_int where the
|
||||||
|
// result doesn't fit into the destination is undefined.
|
||||||
|
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
|
||||||
|
Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
|
||||||
|
Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
|
||||||
|
|
||||||
|
return DAG.getNode(ISD::TRUNCATE, DL, VT, Val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
|
SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
|
||||||
SelectionDAG &DAG) const {
|
SelectionDAG &DAG) const {
|
||||||
SDLoc DL(Op);
|
SDLoc DL(Op);
|
||||||
|
|
|
@ -1005,6 +1005,8 @@ private:
|
||||||
SelectionDAG &DAG) const;
|
SelectionDAG &DAG) const;
|
||||||
SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
|
SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
|
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
|
||||||
SmallVectorImpl<SDNode *> &Created) const override;
|
SmallVectorImpl<SDNode *> &Created) const override;
|
||||||
|
|
|
@ -1,70 +0,0 @@
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s
|
|
||||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s
|
|
||||||
|
|
||||||
target triple = "aarch64-unknown-linux-gnu"
|
|
||||||
|
|
||||||
; Don't use SVE when its registers are no bigger than NEON.
|
|
||||||
; NO_SVE-NOT: z{0-9}
|
|
||||||
|
|
||||||
; NOTE: fptrunc operations bigger than NEON are expanded. These tests just
|
|
||||||
; ensure we've correctly set the operation action for fixed length vector types
|
|
||||||
; that require SVE. They'll be updated to protect their expected code generation
|
|
||||||
; when lowering it implemented.
|
|
||||||
|
|
||||||
;
|
|
||||||
; vector uint_to_fp i8 -> f32
|
|
||||||
; AArch64 doesn't have a direct vector->f32 conversion instructions for
|
|
||||||
; elements smaller than i32, so make sure inputs are promoted to i32 first.
|
|
||||||
;
|
|
||||||
|
|
||||||
define void @uitofp_v4i8_v4f32(<4 x i8>* %in, <4 x float>* %out) #0 {
|
|
||||||
; CHECK-LABEL: uitofp_v4i8_v4f32:
|
|
||||||
; CHECK-COUNT-1: ucvt
|
|
||||||
%vec = load <4 x i8>, <4 x i8>* %in
|
|
||||||
%conv = uitofp <4 x i8> %vec to <4 x float>
|
|
||||||
store <4 x float> %conv, <4 x float>* %out
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @uitofp_v8i8_v8f32(<8 x i8>* %in, <8 x float>* %out) #0 {
|
|
||||||
; CHECK-LABEL: uitofp_v8i8_v8f32:
|
|
||||||
; CHECK-COUNT-8: ucvt
|
|
||||||
%vec = load <8 x i8>, <8 x i8>* %in
|
|
||||||
%conv = uitofp <8 x i8> %vec to <8 x float>
|
|
||||||
store <8 x float> %conv, <8 x float>* %out
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @uitofp_v16i8_v16f32(<16 x i8>* %in, <16 x float>* %out) #0 {
|
|
||||||
; CHECK-LABEL: uitofp_v16i8_v16f32:
|
|
||||||
; CHECK-COUNT-16: ucvt
|
|
||||||
%vec = load <16 x i8>, <16 x i8>* %in
|
|
||||||
%conv = uitofp <16 x i8> %vec to <16 x float>
|
|
||||||
store <16 x float> %conv, <16 x float>* %out
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
define void @uitofp_v32i8_v32f32(<32 x i8>* %in, <32 x float>* %out) #0 {
|
|
||||||
; CHECK-LABEL: uitofp_v32i8_v32f32:
|
|
||||||
; CHECK-COUNT-32: ucvt
|
|
||||||
%vec = load <32 x i8>, <32 x i8>* %in
|
|
||||||
%conv = uitofp <32 x i8> %vec to <32 x float>
|
|
||||||
store <32 x float> %conv, <32 x float>* %out
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
attributes #0 = { nounwind "target-features"="+sve" }
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue