forked from OSchip/llvm-project
[X86] Add costs for SSE zext/sext to v4i64 to TTI
The costs are somewhat hand-wavy, but should be much closer to the truth than what we get from BasicTTI. Differential Revision: http://reviews.llvm.org/D21156 llvm-svn: 272406
This commit is contained in:
parent
df5843a532
commit
9a0542a792
|
@ -412,6 +412,9 @@ public:
|
|||
// If we are converting vectors and the operation is illegal, or
|
||||
// if the vectors are legalized to different types, estimate the
|
||||
// scalarization costs.
|
||||
// TODO: This is probably a big overestimate. For splits, we should have
|
||||
// something like getTypeLegalizationCost() + 2 * getCastInstrCost().
|
||||
// The same applies to getCmpSelInstrCost() and getArithmeticInstrCost()
|
||||
unsigned Num = Dst->getVectorNumElements();
|
||||
unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
|
||||
Opcode, Dst->getScalarType(), Src->getScalarType());
|
||||
|
|
|
@ -709,6 +709,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
|
|||
};
|
||||
|
||||
static const TypeConversionCostTblEntry SSE41ConversionTbl[] = {
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 2 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 2 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 2 },
|
||||
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
|
||||
|
@ -759,6 +766,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
|
|||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
|
||||
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 5 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 10 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 },
|
||||
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 },
|
||||
|
|
|
@ -1,6 +1,85 @@
|
|||
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
|
||||
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse4.1 -cost-model -analyze < %s | FileCheck --check-prefix=SSE41 %s
|
||||
|
||||
define void @zext_v4i8_to_v4i64(<4 x i8>* %a) {
|
||||
; SSE2: zext_v4i8_to_v4i64
|
||||
; SSE2: cost of 4 {{.*}} zext
|
||||
;
|
||||
; SSE41: zext_v4i8_to_v4i64
|
||||
; SSE41: cost of 2 {{.*}} zext
|
||||
;
|
||||
%1 = load <4 x i8>, <4 x i8>* %a
|
||||
%2 = zext <4 x i8> %1 to <4 x i64>
|
||||
store <4 x i64> %2, <4 x i64>* undef, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sext_v4i8_to_v4i64(<4 x i8>* %a) {
|
||||
; SSE2: sext_v4i8_to_v4i64
|
||||
; SSE2: cost of 8 {{.*}} sext
|
||||
;
|
||||
; SSE41: sext_v4i8_to_v4i64
|
||||
; SSE41: cost of 2 {{.*}} sext
|
||||
;
|
||||
%1 = load <4 x i8>, <4 x i8>* %a
|
||||
%2 = sext <4 x i8> %1 to <4 x i64>
|
||||
store <4 x i64> %2, <4 x i64>* undef, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @zext_v4i16_to_v4i64(<4 x i16>* %a) {
|
||||
; SSE2: zext_v4i16_to_v4i64
|
||||
; SSE2: cost of 3 {{.*}} zext
|
||||
;
|
||||
; SSE41: zext_v4i16_to_v4i64
|
||||
; SSE41: cost of 2 {{.*}} zext
|
||||
;
|
||||
%1 = load <4 x i16>, <4 x i16>* %a
|
||||
%2 = zext <4 x i16> %1 to <4 x i64>
|
||||
store <4 x i64> %2, <4 x i64>* undef, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sext_v4i16_to_v4i64(<4 x i16>* %a) {
|
||||
; SSE2: sext_v4i16_to_v4i64
|
||||
; SSE2: cost of 10 {{.*}} sext
|
||||
;
|
||||
; SSE41: sext_v4i16_to_v4i64
|
||||
; SSE41: cost of 2 {{.*}} sext
|
||||
;
|
||||
%1 = load <4 x i16>, <4 x i16>* %a
|
||||
%2 = sext <4 x i16> %1 to <4 x i64>
|
||||
store <4 x i64> %2, <4 x i64>* undef, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @zext_v4i32_to_v4i64(<4 x i32>* %a) {
|
||||
; SSE2: zext_v4i32_to_v4i64
|
||||
; SSE2: cost of 3 {{.*}} zext
|
||||
;
|
||||
; SSE41: zext_v4i32_to_v4i64
|
||||
; SSE41: cost of 2 {{.*}} zext
|
||||
;
|
||||
%1 = load <4 x i32>, <4 x i32>* %a
|
||||
%2 = zext <4 x i32> %1 to <4 x i64>
|
||||
store <4 x i64> %2, <4 x i64>* undef, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sext_v4i32_to_v4i64(<4 x i32>* %a) {
|
||||
; SSE2: sext_v4i32_to_v4i64
|
||||
; SSE2: cost of 5 {{.*}} sext
|
||||
;
|
||||
; SSE41: sext_v4i32_to_v4i64
|
||||
; SSE41: cost of 2 {{.*}} sext
|
||||
;
|
||||
%1 = load <4 x i32>, <4 x i32>* %a
|
||||
%2 = sext <4 x i32> %1 to <4 x i64>
|
||||
store <4 x i64> %2, <4 x i64>* undef, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @zext_v16i16_to_v16i32(<16 x i16>* %a) {
|
||||
; SSE2: zext_v16i16_to_v16i32
|
||||
; SSE2: cost of 6 {{.*}} zext
|
||||
|
|
Loading…
Reference in New Issue