forked from OSchip/llvm-project
[CostModel] Add generic expansion funnel shift cost support
Add support for the expansion of funnelshift/rotates to getIntrinsicInstrCost. This also required us to move the X86 fshl/fshr costs to the same place as the rotates to avoid expansion and get correct scalarization vs vectorization costs. llvm-svn: 346854
This commit is contained in:
parent
7cdb22b1ef
commit
cdb170794b
|
@ -1071,6 +1071,46 @@ public:
|
|||
case Intrinsic::experimental_vector_reduce_umax:
|
||||
case Intrinsic::experimental_vector_reduce_umin:
|
||||
return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
|
||||
case Intrinsic::fshl:
|
||||
case Intrinsic::fshr: {
|
||||
Value *X = Args[0];
|
||||
Value *Y = Args[1];
|
||||
Value *Z = Args[2];
|
||||
TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
|
||||
TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
|
||||
TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
|
||||
TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
|
||||
TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
|
||||
OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
|
||||
: TTI::OP_None;
|
||||
// fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
|
||||
// fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
|
||||
auto *ConcreteTTI = static_cast<T *>(this);
|
||||
unsigned Cost = 0;
|
||||
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
|
||||
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
|
||||
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
|
||||
OpKindX, OpKindZ, OpPropsX);
|
||||
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
|
||||
OpKindY, OpKindZ, OpPropsY);
|
||||
// Non-constant shift amounts requires a modulo.
|
||||
if (OpKindZ != TTI::OK_UniformConstantValue &&
|
||||
OpKindZ != TTI::OK_NonUniformConstantValue)
|
||||
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
|
||||
OpKindZ, OpKindBW, OpPropsZ,
|
||||
OpPropsBW);
|
||||
// For non-rotates (X != Y) we must add shift-by-zero handling costs.
|
||||
if (X != Y) {
|
||||
Type *CondTy = Type::getInt1Ty(RetTy->getContext());
|
||||
if (RetVF > 1)
|
||||
CondTy = VectorType::get(CondTy, RetVF);
|
||||
Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
|
||||
CondTy, nullptr);
|
||||
Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
|
||||
CondTy, nullptr);
|
||||
}
|
||||
return Cost;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1857,16 +1857,12 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
|
|||
{ ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/
|
||||
};
|
||||
static const CostTblEntry X64CostTbl[] = { // 64-bit targets
|
||||
{ ISD::BITREVERSE, MVT::i64, 14 },
|
||||
{ X86ISD::SHLD, MVT::i64, 4 }
|
||||
{ ISD::BITREVERSE, MVT::i64, 14 }
|
||||
};
|
||||
static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
|
||||
{ ISD::BITREVERSE, MVT::i32, 14 },
|
||||
{ ISD::BITREVERSE, MVT::i16, 14 },
|
||||
{ ISD::BITREVERSE, MVT::i8, 11 },
|
||||
{ X86ISD::SHLD, MVT::i32, 4 },
|
||||
{ X86ISD::SHLD, MVT::i16, 4 },
|
||||
{ X86ISD::SHLD, MVT::i8, 4 }
|
||||
{ ISD::BITREVERSE, MVT::i8, 11 }
|
||||
};
|
||||
|
||||
unsigned ISD = ISD::DELETED_NODE;
|
||||
|
@ -1888,11 +1884,6 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
|
|||
case Intrinsic::cttz:
|
||||
ISD = ISD::CTTZ;
|
||||
break;
|
||||
case Intrinsic::fshl:
|
||||
case Intrinsic::fshr:
|
||||
// SHRD has same costs so don't duplicate.
|
||||
ISD = X86ISD::SHLD;
|
||||
break;
|
||||
case Intrinsic::sqrt:
|
||||
ISD = ISD::FSQRT;
|
||||
break;
|
||||
|
@ -1999,7 +1990,8 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
|
|||
};
|
||||
static const CostTblEntry X64CostTbl[] = { // 64-bit targets
|
||||
{ ISD::ROTL, MVT::i64, 1 },
|
||||
{ ISD::ROTR, MVT::i64, 1 }
|
||||
{ ISD::ROTR, MVT::i64, 1 },
|
||||
{ X86ISD::SHLD, MVT::i64, 4 }
|
||||
};
|
||||
static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
|
||||
{ ISD::ROTL, MVT::i32, 1 },
|
||||
|
@ -2007,7 +1999,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
|
|||
{ ISD::ROTL, MVT::i8, 1 },
|
||||
{ ISD::ROTR, MVT::i32, 1 },
|
||||
{ ISD::ROTR, MVT::i16, 1 },
|
||||
{ ISD::ROTR, MVT::i8, 1 }
|
||||
{ ISD::ROTR, MVT::i8, 1 },
|
||||
{ X86ISD::SHLD, MVT::i32, 4 },
|
||||
{ X86ISD::SHLD, MVT::i16, 4 },
|
||||
{ X86ISD::SHLD, MVT::i8, 4 }
|
||||
};
|
||||
|
||||
unsigned ISD = ISD::DELETED_NODE;
|
||||
|
@ -2015,10 +2010,13 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
|
|||
default:
|
||||
break;
|
||||
case Intrinsic::fshl:
|
||||
ISD = X86ISD::SHLD;
|
||||
if (Args[0] == Args[1])
|
||||
ISD = ISD::ROTL;
|
||||
break;
|
||||
case Intrinsic::fshr:
|
||||
// SHRD has same costs so don't duplicate.
|
||||
ISD = X86ISD::SHLD;
|
||||
if (Args[0] == Args[1])
|
||||
ISD = ISD::ROTR;
|
||||
break;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue