forked from OSchip/llvm-project
[AArch64] Optimise bitreverse lowering in ISel
Differential Revision: https://reviews.llvm.org/D103105
This commit is contained in:
parent
b25546a4b4
commit
e971099a9b
|
@ -1025,6 +1025,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
|
||||
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
|
||||
setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
|
||||
setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
|
||||
|
||||
// AArch64 doesn't have MUL.2d:
|
||||
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
|
||||
|
@ -4723,8 +4727,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
|
|||
case ISD::ABS:
|
||||
return LowerABS(Op, DAG);
|
||||
case ISD::BITREVERSE:
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
|
||||
/*OverrideNEON=*/true);
|
||||
return LowerBitreverse(Op, DAG);
|
||||
case ISD::BSWAP:
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
|
||||
case ISD::CTLZ:
|
||||
|
@ -6898,6 +6901,56 @@ SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
|
|||
return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
if (VT.isScalableVector() ||
|
||||
useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
|
||||
true);
|
||||
|
||||
SDLoc DL(Op);
|
||||
SDValue REVB;
|
||||
MVT VST;
|
||||
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
llvm_unreachable("Invalid type for bitreverse!");
|
||||
|
||||
case MVT::v2i32: {
|
||||
VST = MVT::v8i8;
|
||||
REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case MVT::v4i32: {
|
||||
VST = MVT::v16i8;
|
||||
REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case MVT::v1i64: {
|
||||
VST = MVT::v8i8;
|
||||
REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case MVT::v2i64: {
|
||||
VST = MVT::v16i8;
|
||||
REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return DAG.getNode(AArch64ISD::NVCAST, DL, VT,
|
||||
DAG.getNode(ISD::BITREVERSE, DL, VST, REVB));
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
if (Op.getValueType().isVector())
|
||||
|
|
|
@ -956,6 +956,7 @@ private:
|
|||
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
|
|
@ -8,13 +8,8 @@ declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone
|
|||
define <2 x i16> @f(<2 x i16> %a) {
|
||||
; CHECK-LABEL: f:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: fmov w8, s0
|
||||
; CHECK-NEXT: rbit w8, w8
|
||||
; CHECK-NEXT: mov w9, v0.s[1]
|
||||
; CHECK-NEXT: fmov s0, w8
|
||||
; CHECK-NEXT: rbit w8, w9
|
||||
; CHECK-NEXT: mov v0.s[1], w8
|
||||
; CHECK-NEXT: rev32 v0.8b, v0.8b
|
||||
; CHECK-NEXT: rbit v0.8b, v0.8b
|
||||
; CHECK-NEXT: ushr v0.2s, v0.2s, #16
|
||||
; CHECK-NEXT: ret
|
||||
%b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
|
||||
|
@ -118,14 +113,8 @@ declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) readnone
|
|||
define <2 x i32> @g_vec_2x32(<2 x i32> %a) {
|
||||
; CHECK-LABEL: g_vec_2x32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: fmov w8, s0
|
||||
; CHECK-NEXT: rbit w8, w8
|
||||
; CHECK-NEXT: mov w9, v0.s[1]
|
||||
; CHECK-NEXT: fmov s0, w8
|
||||
; CHECK-NEXT: rbit w8, w9
|
||||
; CHECK-NEXT: mov v0.s[1], w8
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-NEXT: rev32 v0.8b, v0.8b
|
||||
; CHECK-NEXT: rbit v0.8b, v0.8b
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%b = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %a)
|
||||
|
@ -137,18 +126,8 @@ declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) readnone
|
|||
define <4 x i32> @g_vec_4x32(<4 x i32> %a) {
|
||||
; CHECK-LABEL: g_vec_4x32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmov w10, s0
|
||||
; CHECK-NEXT: mov w8, v0.s[1]
|
||||
; CHECK-NEXT: rbit w10, w10
|
||||
; CHECK-NEXT: mov w9, v0.s[2]
|
||||
; CHECK-NEXT: mov w11, v0.s[3]
|
||||
; CHECK-NEXT: fmov s0, w10
|
||||
; CHECK-NEXT: rbit w8, w8
|
||||
; CHECK-NEXT: rbit w9, w9
|
||||
; CHECK-NEXT: mov v0.s[1], w8
|
||||
; CHECK-NEXT: mov v0.s[2], w9
|
||||
; CHECK-NEXT: rbit w8, w11
|
||||
; CHECK-NEXT: mov v0.s[3], w8
|
||||
; CHECK-NEXT: rev32 v0.16b, v0.16b
|
||||
; CHECK-NEXT: rbit v0.16b, v0.16b
|
||||
; CHECK-NEXT: ret
|
||||
%b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
|
||||
ret <4 x i32> %b
|
||||
|
@ -159,10 +138,8 @@ declare <1 x i64> @llvm.bitreverse.v1i64(<1 x i64>) readnone
|
|||
define <1 x i64> @g_vec_1x64(<1 x i64> %a) {
|
||||
; CHECK-LABEL: g_vec_1x64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: fmov x8, d0
|
||||
; CHECK-NEXT: rbit x8, x8
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: rev64 v0.8b, v0.8b
|
||||
; CHECK-NEXT: rbit v0.8b, v0.8b
|
||||
; CHECK-NEXT: ret
|
||||
%b = call <1 x i64> @llvm.bitreverse.v1i64(<1 x i64> %a)
|
||||
ret <1 x i64> %b
|
||||
|
@ -173,12 +150,8 @@ declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) readnone
|
|||
define <2 x i64> @g_vec_2x64(<2 x i64> %a) {
|
||||
; CHECK-LABEL: g_vec_2x64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmov x8, d0
|
||||
; CHECK-NEXT: rbit x8, x8
|
||||
; CHECK-NEXT: mov x9, v0.d[1]
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: rbit x8, x9
|
||||
; CHECK-NEXT: mov v0.d[1], x8
|
||||
; CHECK-NEXT: rev64 v0.16b, v0.16b
|
||||
; CHECK-NEXT: rbit v0.16b, v0.16b
|
||||
; CHECK-NEXT: ret
|
||||
%b = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
|
||||
ret <2 x i64> %b
|
||||
|
|
|
@ -55,14 +55,8 @@ declare <2 x i32> @llvm.aarch64.neon.rbit.v2i32(<2 x i32>) nounwind readnone
|
|||
define <2 x i32> @rbit_2x32(<2 x i32> %A) {
|
||||
; CHECK-LABEL: rbit_2x32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: fmov w8, s0
|
||||
; CHECK-NEXT: rbit w8, w8
|
||||
; CHECK-NEXT: mov w9, v0.s[1]
|
||||
; CHECK-NEXT: fmov s0, w8
|
||||
; CHECK-NEXT: rbit w8, w9
|
||||
; CHECK-NEXT: mov v0.s[1], w8
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-NEXT: rev32 v0.8b, v0.8b
|
||||
; CHECK-NEXT: rbit v0.8b, v0.8b
|
||||
; CHECK-NEXT: ret
|
||||
%tmp3 = call <2 x i32> @llvm.aarch64.neon.rbit.v2i32(<2 x i32> %A)
|
||||
ret <2 x i32> %tmp3
|
||||
|
@ -73,18 +67,8 @@ declare <4 x i32> @llvm.aarch64.neon.rbit.v4i32(<4 x i32>) nounwind readnone
|
|||
define <4 x i32> @rbit_4x32(<4 x i32> %A) {
|
||||
; CHECK-LABEL: rbit_4x32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmov w10, s0
|
||||
; CHECK-NEXT: mov w8, v0.s[1]
|
||||
; CHECK-NEXT: rbit w10, w10
|
||||
; CHECK-NEXT: mov w9, v0.s[2]
|
||||
; CHECK-NEXT: mov w11, v0.s[3]
|
||||
; CHECK-NEXT: fmov s0, w10
|
||||
; CHECK-NEXT: rbit w8, w8
|
||||
; CHECK-NEXT: rbit w9, w9
|
||||
; CHECK-NEXT: mov v0.s[1], w8
|
||||
; CHECK-NEXT: mov v0.s[2], w9
|
||||
; CHECK-NEXT: rbit w8, w11
|
||||
; CHECK-NEXT: mov v0.s[3], w8
|
||||
; CHECK-NEXT: rev32 v0.16b, v0.16b
|
||||
; CHECK-NEXT: rbit v0.16b, v0.16b
|
||||
; CHECK-NEXT: ret
|
||||
%tmp3 = call <4 x i32> @llvm.aarch64.neon.rbit.v4i32(<4 x i32> %A)
|
||||
ret <4 x i32> %tmp3
|
||||
|
@ -95,10 +79,8 @@ declare <1 x i64> @llvm.aarch64.neon.rbit.v1i64(<1 x i64>) readnone
|
|||
define <1 x i64> @rbit_1x64(<1 x i64> %A) {
|
||||
; CHECK-LABEL: rbit_1x64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: fmov x8, d0
|
||||
; CHECK-NEXT: rbit x8, x8
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: rev64 v0.8b, v0.8b
|
||||
; CHECK-NEXT: rbit v0.8b, v0.8b
|
||||
; CHECK-NEXT: ret
|
||||
%tmp3 = call <1 x i64> @llvm.aarch64.neon.rbit.v1i64(<1 x i64> %A)
|
||||
ret <1 x i64> %tmp3
|
||||
|
@ -109,12 +91,8 @@ declare <2 x i64> @llvm.aarch64.neon.rbit.v2i64(<2 x i64>) readnone
|
|||
define <2 x i64> @rbit_2x64(<2 x i64> %A) {
|
||||
; CHECK-LABEL: rbit_2x64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: fmov x8, d0
|
||||
; CHECK-NEXT: rbit x8, x8
|
||||
; CHECK-NEXT: mov x9, v0.d[1]
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: rbit x8, x9
|
||||
; CHECK-NEXT: mov v0.d[1], x8
|
||||
; CHECK-NEXT: rev64 v0.16b, v0.16b
|
||||
; CHECK-NEXT: rbit v0.16b, v0.16b
|
||||
; CHECK-NEXT: ret
|
||||
%tmp3 = call <2 x i64> @llvm.aarch64.neon.rbit.v2i64(<2 x i64> %A)
|
||||
ret <2 x i64> %tmp3
|
||||
|
|
Loading…
Reference in New Issue