[CostModel] Add generic expansion funnel shift cost support

Add support for the expansion of funnelshift/rotates to getIntrinsicInstrCost.

This also required us to move the X86 fshl/fshr costs to the same place as the rotates to avoid expansion and get correct scalarization vs vectorization costs.

llvm-svn: 346854
This commit is contained in:
Simon Pilgrim 2018-11-14 12:24:50 +00:00
parent 7cdb22b1ef
commit cdb170794b
4 changed files with 2981 additions and 1287 deletions

View File

@ -1071,6 +1071,46 @@ public:
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
case Intrinsic::fshl:
case Intrinsic::fshr: {
Value *X = Args[0];
Value *Y = Args[1];
Value *Z = Args[2];
TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
: TTI::OP_None;
// fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
// fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
auto *ConcreteTTI = static_cast<T *>(this);
unsigned Cost = 0;
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
OpKindX, OpKindZ, OpPropsX);
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
OpKindY, OpKindZ, OpPropsY);
// Non-constant shift amounts requires a modulo.
if (OpKindZ != TTI::OK_UniformConstantValue &&
OpKindZ != TTI::OK_NonUniformConstantValue)
Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
OpKindZ, OpKindBW, OpPropsZ,
OpPropsBW);
// For non-rotates (X != Y) we must add shift-by-zero handling costs.
if (X != Y) {
Type *CondTy = Type::getInt1Ty(RetTy->getContext());
if (RetVF > 1)
CondTy = VectorType::get(CondTy, RetVF);
Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
CondTy, nullptr);
Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
CondTy, nullptr);
}
return Cost;
}
}
}

View File

@ -1857,16 +1857,12 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/
};
static const CostTblEntry X64CostTbl[] = { // 64-bit targets
{ ISD::BITREVERSE, MVT::i64, 14 },
{ X86ISD::SHLD, MVT::i64, 4 }
{ ISD::BITREVERSE, MVT::i64, 14 }
};
static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
{ ISD::BITREVERSE, MVT::i32, 14 },
{ ISD::BITREVERSE, MVT::i16, 14 },
{ ISD::BITREVERSE, MVT::i8, 11 },
{ X86ISD::SHLD, MVT::i32, 4 },
{ X86ISD::SHLD, MVT::i16, 4 },
{ X86ISD::SHLD, MVT::i8, 4 }
{ ISD::BITREVERSE, MVT::i8, 11 }
};
unsigned ISD = ISD::DELETED_NODE;
@ -1888,11 +1884,6 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
case Intrinsic::cttz:
ISD = ISD::CTTZ;
break;
case Intrinsic::fshl:
case Intrinsic::fshr:
// SHRD has same costs so don't duplicate.
ISD = X86ISD::SHLD;
break;
case Intrinsic::sqrt:
ISD = ISD::FSQRT;
break;
@ -1999,7 +1990,8 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
};
static const CostTblEntry X64CostTbl[] = { // 64-bit targets
{ ISD::ROTL, MVT::i64, 1 },
{ ISD::ROTR, MVT::i64, 1 }
{ ISD::ROTR, MVT::i64, 1 },
{ X86ISD::SHLD, MVT::i64, 4 }
};
static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
{ ISD::ROTL, MVT::i32, 1 },
@ -2007,7 +1999,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::ROTL, MVT::i8, 1 },
{ ISD::ROTR, MVT::i32, 1 },
{ ISD::ROTR, MVT::i16, 1 },
{ ISD::ROTR, MVT::i8, 1 }
{ ISD::ROTR, MVT::i8, 1 },
{ X86ISD::SHLD, MVT::i32, 4 },
{ X86ISD::SHLD, MVT::i16, 4 },
{ X86ISD::SHLD, MVT::i8, 4 }
};
unsigned ISD = ISD::DELETED_NODE;
@ -2015,10 +2010,13 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
default:
break;
case Intrinsic::fshl:
ISD = X86ISD::SHLD;
if (Args[0] == Args[1])
ISD = ISD::ROTL;
break;
case Intrinsic::fshr:
// SHRD has same costs so don't duplicate.
ISD = X86ISD::SHLD;
if (Args[0] == Args[1])
ISD = ISD::ROTR;
break;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff