[CostModel][AArch64] Increase cost of vector insert element and add missing cast costs

Summary:
Increase the estimated costs for insert/extract element operations on
AArch64. This is motivated by results from benchmarking interleaved
accesses.

Add missing costs for zext/sext/trunc instructions and some integer to
floating point conversions. These costs were previously calculated
by scalarizing these operation and were affected by the cost increase of
the insert/extract element operations.

Reviewers: rengolin

Subscribers: mcrosier, aemerson, rengolin, llvm-commits

Differential Revision: http://reviews.llvm.org/D11939

llvm-svn: 245226
This commit is contained in:
Silviu Baranga 2015-08-17 16:05:09 +00:00
parent d5ac26937c
commit b322aa6f53
2 changed files with 34 additions and 2 deletions

View File

@ -187,6 +187,28 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
return BaseT::getCastInstrCost(Opcode, Dst, Src);
static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
// The number of shll instructions for the extension.
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
// LowerVectorINT_TO_FP:
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
@ -209,6 +231,16 @@ int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
// Complex: to v8f32
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
// Complex: to v16f32
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
// Complex: to v2f64
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
@ -280,7 +312,7 @@ int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
}
// All other insert/extracts cost this much.
return 2;
return 3;
}
int AArch64TTIImpl::getArithmeticInstrCost(

View File

@ -1,4 +1,4 @@
; RUN: opt -S -slp-vectorizer %s | FileCheck %s
; RUN: opt -S -slp-vectorizer %s -slp-threshold=-10 | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"