forked from OSchip/llvm-project
[X86][SSE] Improve cost model for i64 vector comparisons on pre-SSE42 targets
As discussed on PR24888, until SSE42 we don't have access to PCMPGTQ for v2i64 comparisons, but the cost models don't reflect this, resulting in over-optimistic vectorizaton. This patch adds SSE2 'base level' costs that match what a typical target is capable of and only reduces the v2i64 costs at SSE42. Technically SSE41 provides a PCMPEQQ v2i64 equality test, but as getCmpSelInstrCost doesn't give us a way to discriminate between comparison test types we can't easily make use of this, otherwise we could split the cost of integer equality and greater-than tests to give better costings of each. Differential Revision: http://reviews.llvm.org/D20057 llvm-svn: 268972
This commit is contained in:
parent
e402e56492
commit
eec3a95f95
|
@ -857,13 +857,17 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
|
|||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||
assert(ISD && "Invalid opcode");
|
||||
|
||||
static const CostTblEntry SSE2CostTbl[] = {
|
||||
{ ISD::SETCC, MVT::v2i64, 8 },
|
||||
{ ISD::SETCC, MVT::v4i32, 1 },
|
||||
{ ISD::SETCC, MVT::v8i16, 1 },
|
||||
{ ISD::SETCC, MVT::v16i8, 1 },
|
||||
};
|
||||
|
||||
static const CostTblEntry SSE42CostTbl[] = {
|
||||
{ ISD::SETCC, MVT::v2f64, 1 },
|
||||
{ ISD::SETCC, MVT::v4f32, 1 },
|
||||
{ ISD::SETCC, MVT::v2i64, 1 },
|
||||
{ ISD::SETCC, MVT::v4i32, 1 },
|
||||
{ ISD::SETCC, MVT::v8i16, 1 },
|
||||
{ ISD::SETCC, MVT::v16i8, 1 },
|
||||
};
|
||||
|
||||
static const CostTblEntry AVX1CostTbl[] = {
|
||||
|
@ -906,6 +910,10 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
|
|||
if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
if (ST->hasSSE2())
|
||||
if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
|
||||
return LT.first * Entry->Cost;
|
||||
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
|
||||
}
|
||||
|
||||
|
|
|
@ -87,18 +87,18 @@ define i32 @cmp(i32 %arg) {
|
|||
;AVX: cost of 1 {{.*}} icmp
|
||||
%H = icmp eq <4 x i32> undef, undef
|
||||
|
||||
;SSE2: cost of 1 {{.*}} icmp
|
||||
;SSE3: cost of 1 {{.*}} icmp
|
||||
;SSSE3: cost of 1 {{.*}} icmp
|
||||
;SSE41: cost of 1 {{.*}} icmp
|
||||
;SSE2: cost of 8 {{.*}} icmp
|
||||
;SSE3: cost of 8 {{.*}} icmp
|
||||
;SSSE3: cost of 8 {{.*}} icmp
|
||||
;SSE41: cost of 8 {{.*}} icmp
|
||||
;SSE42: cost of 1 {{.*}} icmp
|
||||
;AVX: cost of 1 {{.*}} icmp
|
||||
%I = icmp eq <2 x i64> undef, undef
|
||||
|
||||
;SSE2: cost of 2 {{.*}} icmp
|
||||
;SSE3: cost of 2 {{.*}} icmp
|
||||
;SSSE3: cost of 2 {{.*}} icmp
|
||||
;SSE41: cost of 2 {{.*}} icmp
|
||||
;SSE2: cost of 16 {{.*}} icmp
|
||||
;SSE3: cost of 16 {{.*}} icmp
|
||||
;SSSE3: cost of 16 {{.*}} icmp
|
||||
;SSE41: cost of 16 {{.*}} icmp
|
||||
;SSE42: cost of 2 {{.*}} icmp
|
||||
;AVX1: cost of 4 {{.*}} icmp
|
||||
;AVX2: cost of 1 {{.*}} icmp
|
||||
|
|
Loading…
Reference in New Issue