forked from OSchip/llvm-project
[SystemZ::TTI] Accurate costs for i1->double vector conversions
This factors out a new method getBoolVecToIntConversionCost() containing the code for vector sext/zext of i1, in order to reuse it for i1 to double vector conversions. Review: Ulrich Weigand https://reviews.llvm.org/D53923 llvm-svn: 345817
This commit is contained in:
parent
1bb9aea56b
commit
f15a53bc81
|
@ -635,6 +635,25 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
// Get the cost of converting a boolean vector to a vector with same width
|
||||
// and element size as Dst, plus the cost of zero extending if needed.
|
||||
unsigned SystemZTTIImpl::
|
||||
getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
|
||||
const Instruction *I) {
|
||||
assert (Dst->isVectorTy());
|
||||
unsigned VF = Dst->getVectorNumElements();
|
||||
unsigned Cost = 0;
|
||||
// If we know what the widths of the compared operands, get any cost of
|
||||
// converting it to match Dst. Otherwise assume same widths.
|
||||
Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
|
||||
if (CmpOpTy != nullptr)
|
||||
Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst);
|
||||
if (Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP)
|
||||
// One 'vn' per dst vector with an immediate mask.
|
||||
Cost += getNumVectorRegs(Dst);
|
||||
return Cost;
|
||||
}
|
||||
|
||||
int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
const Instruction *I) {
|
||||
unsigned DstScalarBits = Dst->getScalarSizeInBits();
|
||||
|
@ -666,19 +685,8 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|||
|
||||
return (NumUnpacks * NumDstVectors) + NumSrcVectorOps;
|
||||
}
|
||||
else if (SrcScalarBits == 1) {
|
||||
// This should be extension of a compare i1 result.
|
||||
// If we know what the widths of the compared operands, get the
|
||||
// cost of converting it to Dst. Otherwise assume same widths.
|
||||
unsigned Cost = 0;
|
||||
Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
|
||||
if (CmpOpTy != nullptr)
|
||||
Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst);
|
||||
if (Opcode == Instruction::ZExt)
|
||||
// One 'vn' per dst vector with an immediate mask.
|
||||
Cost += NumDstVectors;
|
||||
return Cost;
|
||||
}
|
||||
else if (SrcScalarBits == 1)
|
||||
return getBoolVecToIntConversionCost(Opcode, Dst, I);
|
||||
}
|
||||
|
||||
if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
|
||||
|
@ -687,8 +695,13 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
|||
// (seems to miss on differentiating on scalar/vector types).
|
||||
|
||||
// Only 64 bit vector conversions are natively supported.
|
||||
if (SrcScalarBits == 64 && DstScalarBits == 64)
|
||||
return NumDstVectors;
|
||||
if (DstScalarBits == 64) {
|
||||
if (SrcScalarBits == 64)
|
||||
return NumDstVectors;
|
||||
|
||||
if (SrcScalarBits == 1)
|
||||
return getBoolVecToIntConversionCost(Opcode, Dst, I) + NumDstVectors;
|
||||
}
|
||||
|
||||
// Return the cost of multiple scalar invocation plus the cost of
|
||||
// inserting and extracting the values. Base implementation does not
|
||||
|
|
|
@ -80,6 +80,8 @@ public:
|
|||
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
|
||||
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
|
||||
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
|
||||
unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
|
||||
const Instruction *I);
|
||||
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
const Instruction *I = nullptr);
|
||||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
|
||||
;
|
||||
; Costs for conversion of i1 vectors to vectors of double.
|
||||
|
||||
define <2 x double> @fun0(<2 x i8> %val1, <2 x i8> %val2) {
|
||||
%cmp = icmp eq <2 x i8> %val1, %val2
|
||||
%v = uitofp <2 x i1> %cmp to <2 x double>
|
||||
ret <2 x double> %v
|
||||
|
||||
; CHECK: fun0
|
||||
; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2
|
||||
; CHECK: cost of 5 for instruction: %v = uitofp <2 x i1> %cmp to <2 x double>
|
||||
}
|
||||
|
||||
define <2 x double> @fun1(<2 x i8> %val1, <2 x i8> %val2) {
|
||||
%cmp = icmp eq <2 x i8> %val1, %val2
|
||||
%v = sitofp <2 x i1> %cmp to <2 x double>
|
||||
ret <2 x double> %v
|
||||
|
||||
; CHECK: fun1
|
||||
; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i8> %val1, %val2
|
||||
; CHECK: cost of 4 for instruction: %v = sitofp <2 x i1> %cmp to <2 x double>
|
||||
}
|
||||
|
||||
define <2 x double> @fun2(<2 x i64> %val1, <2 x i64> %val2) {
|
||||
%cmp = icmp eq <2 x i64> %val1, %val2
|
||||
%v = uitofp <2 x i1> %cmp to <2 x double>
|
||||
ret <2 x double> %v
|
||||
|
||||
; CHECK: fun2
|
||||
; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2
|
||||
; CHECK: cost of 2 for instruction: %v = uitofp <2 x i1> %cmp to <2 x double>
|
||||
}
|
||||
|
||||
define <2 x double> @fun3(<2 x i64> %val1, <2 x i64> %val2) {
|
||||
%cmp = icmp eq <2 x i64> %val1, %val2
|
||||
%v = sitofp <2 x i1> %cmp to <2 x double>
|
||||
ret <2 x double> %v
|
||||
|
||||
; CHECK: fun3
|
||||
; CHECK: cost of 1 for instruction: %cmp = icmp eq <2 x i64> %val1, %val2
|
||||
; CHECK: cost of 1 for instruction: %v = sitofp <2 x i1> %cmp to <2 x double>
|
||||
}
|
Loading…
Reference in New Issue