forked from OSchip/llvm-project
[RISCV][WebAssembly][TargetLowering] Allow expandCTLZ/expandCTTZ to rely on CTPOP expansion for vectors.
Our fallback expansion for CTLZ/CTTZ relies on CTPOP. If CTPOP isn't legal or custom for a vector type we would scalarize the CTLZ/CTTZ. This is different than CTPOP itself which would use a vector expansion. This patch teaches expandCTLZ/CTTZ to rely on the vector CTPOP expansion instead of scalarizing. To do this I had to add additional checks to make sure the operations used by CTPOP expansions are all supported. Some of the operations were already needed for the CTLZ/CTTZ expansion. This is a huge improvement to the RISCV which doesn't have a scalar ctlz or cttz in the base ISA. For WebAssembly, I've added Custom lowering to keep the scalarizing behavior. I've also extended the scalarizing to CTPOP. Differential Revision: https://reviews.llvm.org/D111919
This commit is contained in:
parent
60e19f6752
commit
fe1f0de003
|
@ -6980,6 +6980,17 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
// Only expand vector types if we have the appropriate vector bit operations.
|
||||
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
|
||||
assert(VT.isVector() && "Expected vector type");
|
||||
unsigned Len = VT.getScalarSizeInBits();
|
||||
return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
|
||||
TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
|
||||
TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
|
||||
(Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
|
||||
TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
|
||||
}
|
||||
|
||||
bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc dl(Node);
|
||||
|
@ -6994,11 +7005,7 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
|
|||
return false;
|
||||
|
||||
// Only expand vector types if we have the appropriate vector bit operations.
|
||||
if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
|
||||
!isOperationLegalOrCustom(ISD::SUB, VT) ||
|
||||
!isOperationLegalOrCustom(ISD::SRL, VT) ||
|
||||
(Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
|
||||
!isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
|
||||
if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
|
||||
return false;
|
||||
|
||||
// This is the "best" algorithm from
|
||||
|
@ -7068,8 +7075,10 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
|
|||
}
|
||||
|
||||
// Only expand vector types if we have the appropriate vector bit operations.
|
||||
// This includes the operations needed to expand CTPOP if it isn't supported.
|
||||
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
|
||||
!isOperationLegalOrCustom(ISD::CTPOP, VT) ||
|
||||
(!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
|
||||
!canExpandVectorCTPOP(*this, VT)) ||
|
||||
!isOperationLegalOrCustom(ISD::SRL, VT) ||
|
||||
!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
|
||||
return false;
|
||||
|
@ -7120,9 +7129,11 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
|
|||
}
|
||||
|
||||
// Only expand vector types if we have the appropriate vector bit operations.
|
||||
// This includes the operations needed to expand CTPOP if it isn't supported.
|
||||
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
|
||||
(!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
|
||||
!isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
|
||||
!isOperationLegalOrCustom(ISD::CTLZ, VT) &&
|
||||
!canExpandVectorCTPOP(*this, VT)) ||
|
||||
!isOperationLegalOrCustom(ISD::SUB, VT) ||
|
||||
!isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
|
||||
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
|
||||
|
|
|
@ -215,8 +215,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
|
|||
setOperationAction(ISD::SELECT_CC, T, Expand);
|
||||
|
||||
// Expand integer operations supported for scalars but not SIMD
|
||||
for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
|
||||
ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
|
||||
for (auto Op :
|
||||
{ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
|
||||
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
|
||||
setOperationAction(Op, T, Expand);
|
||||
|
||||
|
@ -225,8 +225,15 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
|
|||
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
|
||||
setOperationAction(Op, T, Legal);
|
||||
|
||||
// And we have popcnt for i8x16
|
||||
// And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
|
||||
setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
|
||||
setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
|
||||
setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
|
||||
|
||||
// Custom lower bit counting operations for other types to scalarize them.
|
||||
for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
|
||||
for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
|
||||
setOperationAction(Op, T, Custom);
|
||||
|
||||
// Expand float operations supported for scalars but not SIMD
|
||||
for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
|
||||
|
@ -1405,6 +1412,10 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
|
|||
return LowerLoad(Op, DAG);
|
||||
case ISD::STORE:
|
||||
return LowerStore(Op, DAG);
|
||||
case ISD::CTPOP:
|
||||
case ISD::CTLZ:
|
||||
case ISD::CTTZ:
|
||||
return DAG.UnrollVectorOp(Op.getNode());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -120,8 +120,7 @@ define <8 x i16> @cttz_v8i16_undef(<8 x i16> %x) {
|
|||
}
|
||||
|
||||
; CHECK-LABEL: ctpop_v8i16:
|
||||
; Note: expansion does not use i32.popcnt
|
||||
; CHECK: v128.and
|
||||
; CHECK: i32.popcnt
|
||||
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
|
||||
define <8 x i16> @ctpop_v8i16(<8 x i16> %x) {
|
||||
%v = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %x)
|
||||
|
@ -209,8 +208,7 @@ define <4 x i32> @cttz_v4i32_undef(<4 x i32> %x) {
|
|||
}
|
||||
|
||||
; CHECK-LABEL: ctpop_v4i32:
|
||||
; Note: expansion does not use i32.popcnt
|
||||
; CHECK: v128.and
|
||||
; CHECK: i32.popcnt
|
||||
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
|
||||
define <4 x i32> @ctpop_v4i32(<4 x i32> %x) {
|
||||
%v = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x)
|
||||
|
@ -298,8 +296,7 @@ define <2 x i64> @cttz_v2i64_undef(<2 x i64> %x) {
|
|||
}
|
||||
|
||||
; CHECK-LABEL: ctpop_v2i64:
|
||||
; Note: expansion does not use i64.popcnt
|
||||
; CHECK: v128.and
|
||||
; CHECK: i64.popcnt
|
||||
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
|
||||
define <2 x i64> @ctpop_v2i64(<2 x i64> %x) {
|
||||
%v = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %x)
|
||||
|
|
Loading…
Reference in New Issue