[CostModel][X86] Add ICMP Predicate specific costs

First step towards PR40376, this patch adds support for getCmpSelInstrCost to use the (optional) Instruction CmpInst predicate to indicate the type of integer comparison we're performing and alter the costs accordingly.

Differential Revision: https://reviews.llvm.org/D57013

llvm-svn: 351810
This commit is contained in:
Simon Pilgrim 2019-01-22 12:29:38 +00:00
parent 72ef4510b6
commit ee900efb30
2 changed files with 1085 additions and 1044 deletions

View File

@ -1650,6 +1650,47 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
int ISD = TLI->InstructionOpcodeToISD(Opcode); int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode"); assert(ISD && "Invalid opcode");
unsigned ExtraCost = 0;
if (I && (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)) {
// Some vector comparison predicates cost extra instructions.
if (MTy.isVector() &&
!((ST->hasXOP() && (!ST->hasAVX2() || MTy.is128BitVector())) ||
(ST->hasAVX512() && 32 <= MTy.getScalarSizeInBits()) ||
ST->hasBWI())) {
switch (cast<CmpInst>(I)->getPredicate()) {
case CmpInst::Predicate::ICMP_NE:
// xor(cmpeq(x,y),-1)
ExtraCost = 1;
break;
case CmpInst::Predicate::ICMP_SGE:
case CmpInst::Predicate::ICMP_SLE:
// xor(cmpgt(x,y),-1)
ExtraCost = 1;
break;
case CmpInst::Predicate::ICMP_ULT:
case CmpInst::Predicate::ICMP_UGT:
// cmpgt(xor(x,signbit),xor(y,signbit))
// xor(cmpeq(pmaxu(x,y),x),-1)
ExtraCost = 2;
break;
case CmpInst::Predicate::ICMP_ULE:
case CmpInst::Predicate::ICMP_UGE:
if ((ST->hasSSE41() && MTy.getScalarSizeInBits() == 32) ||
(ST->hasSSE2() && MTy.getScalarSizeInBits() < 32)) {
// cmpeq(psubus(x,y),0)
// cmpeq(pminu(x,y),x)
ExtraCost = 1;
} else {
// xor(cmpgt(xor(x,signbit),xor(y,signbit)),-1)
ExtraCost = 3;
}
break;
default:
break;
}
}
}
static const CostTblEntry AVX512BWCostTbl[] = { static const CostTblEntry AVX512BWCostTbl[] = {
{ ISD::SETCC, MVT::v32i16, 1 }, { ISD::SETCC, MVT::v32i16, 1 },
{ ISD::SETCC, MVT::v64i8, 1 }, { ISD::SETCC, MVT::v64i8, 1 },
@ -1738,35 +1779,35 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
if (ST->hasBWI()) if (ST->hasBWI())
if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy)) if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
return LT.first * Entry->Cost; return LT.first * (ExtraCost + Entry->Cost);
if (ST->hasAVX512()) if (ST->hasAVX512())
if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
return LT.first * Entry->Cost; return LT.first * (ExtraCost + Entry->Cost);
if (ST->hasAVX2()) if (ST->hasAVX2())
if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy)) if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
return LT.first * Entry->Cost; return LT.first * (ExtraCost + Entry->Cost);
if (ST->hasAVX()) if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy)) if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
return LT.first * Entry->Cost; return LT.first * (ExtraCost + Entry->Cost);
if (ST->hasSSE42()) if (ST->hasSSE42())
if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
return LT.first * Entry->Cost; return LT.first * (ExtraCost + Entry->Cost);
if (ST->hasSSE41()) if (ST->hasSSE41())
if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy)) if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
return LT.first * Entry->Cost; return LT.first * (ExtraCost + Entry->Cost);
if (ST->hasSSE2()) if (ST->hasSSE2())
if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
return LT.first * Entry->Cost; return LT.first * (ExtraCost + Entry->Cost);
if (ST->hasSSE1()) if (ST->hasSSE1())
if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
return LT.first * Entry->Cost; return LT.first * (ExtraCost + Entry->Cost);
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
} }

File diff suppressed because it is too large Load Diff