forked from OSchip/llvm-project
[CostModel][AArch64] Increase cost of vector insert element and add missing cast costs
Summary: Increase the estimated costs for insert/extract element operations on AArch64. This is motivated by results from benchmarking interleaved accesses. Add missing costs for zext/sext/trunc instructions and some integer to floating point conversions. These costs were previously calculated by scalarizing these operation and were affected by the cost increase of the insert/extract element operations. Reviewers: rengolin Subscribers: mcrosier, aemerson, rengolin, llvm-commits Differential Revision: http://reviews.llvm.org/D11939 llvm-svn: 245226
This commit is contained in:
parent
d5ac26937c
commit
b322aa6f53
|
@ -187,6 +187,28 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
|
|||
return BaseT::getCastInstrCost(Opcode, Dst, Src);
|
||||
|
||||
static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
|
||||
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
|
||||
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
|
||||
|
||||
// The number of shll instructions for the extension.
|
||||
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
|
||||
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
|
||||
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
|
||||
|
||||
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
|
||||
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
|
||||
|
||||
// LowerVectorINT_TO_FP:
|
||||
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
|
||||
|
@ -209,6 +231,16 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
|
|||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
|
||||
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
|
||||
|
||||
// Complex: to v8f32
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
|
||||
|
||||
// Complex: to v16f32
|
||||
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
|
||||
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
|
||||
|
||||
// Complex: to v2f64
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
|
||||
|
@ -280,7 +312,7 @@ int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
|
|||
}
|
||||
|
||||
// All other insert/extracts cost this much.
|
||||
return 2;
|
||||
return 3;
|
||||
}
|
||||
|
||||
int AArch64TTIImpl::getArithmeticInstrCost(
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: opt -S -slp-vectorizer %s | FileCheck %s
|
||||
; RUN: opt -S -slp-vectorizer %s -slp-threshold=-10 | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux-gnu"
|
||||
|
||||
|
|
Loading…
Reference in New Issue