[TTI][X86] getArithmeticInstrCost - move opcode canonicalization before all target-specific costs. NFCI.

The GLM/SLM special cases still get tested first but after the the MUL/DIV/REM pattern detection - this will be necessary for when we make the SLM vXi32 MUL canonicalization generic to improve PMULLW/PMULHW/PMADDDW cost support etc.
This commit is contained in:
Simon Pilgrim 2021-08-30 12:24:59 +01:00
parent 7c25a32840
commit af2920ec6f
1 changed files with 44 additions and 48 deletions

View File

@ -206,6 +206,50 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
if ((ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV ||
ISD == ISD::UREM) &&
(Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
if (ISD == ISD::SDIV || ISD == ISD::SREM) {
// On X86, vector signed division by constants power-of-two are
// normally expanded to the sequence SRA + SRL + ADD + SRA.
// The OperandValue properties may not be the same as that of the previous
// operation; conservatively assume OP_None.
InstructionCost Cost =
2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Op1Info,
Op2Info, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info,
Op2Info, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, Op1Info,
Op2Info, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
if (ISD == ISD::SREM) {
// For SREM: (X % C) is the equivalent of (X - (X/C)*C)
Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info,
Op2Info);
Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info,
Op2Info);
}
return Cost;
}
// Vector unsigned division/remainder will be simplified to shifts/masks.
if (ISD == ISD::UDIV)
return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info,
Op2Info, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
else // UREM
return getArithmeticInstrCost(Instruction::And, Ty, CostKind, Op1Info,
Op2Info, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
}
static const CostTblEntry GLMCostTable[] = {
{ ISD::FDIV, MVT::f32, 18 }, // divss
{ ISD::FDIV, MVT::v4f32, 35 }, // divps
@ -268,54 +312,6 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
}
}
if ((ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV ||
ISD == ISD::UREM) &&
(Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
if (ISD == ISD::SDIV || ISD == ISD::SREM) {
// On X86, vector signed division by constants power-of-two are
// normally expanded to the sequence SRA + SRL + ADD + SRA.
// The OperandValue properties may not be the same as that of the previous
// operation; conservatively assume OP_None.
InstructionCost Cost =
2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Op1Info,
Op2Info, TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info,
Op2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, Op1Info,
Op2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
if (ISD == ISD::SREM) {
// For SREM: (X % C) is the equivalent of (X - (X/C)*C)
Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info,
Op2Info);
Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info,
Op2Info);
}
return Cost;
}
// Vector unsigned division/remainder will be simplified to shifts/masks.
if (ISD == ISD::UDIV)
return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind,
Op1Info, Op2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
else // UREM
return getArithmeticInstrCost(Instruction::And, Ty, CostKind,
Op1Info, Op2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
}
static const CostTblEntry AVX512BWUniformConstCostTable[] = {
{ ISD::SHL, MVT::v64i8, 2 }, // psllw + pand.
{ ISD::SRL, MVT::v64i8, 2 }, // psrlw + pand.