2015-03-31 20:52:27 +08:00
|
|
|
//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file implements a TargetTransformInfo analysis pass specific to the
|
|
|
|
// SystemZ target machine. It uses the target's detailed information to provide
|
|
|
|
// more precise answers to certain TTI queries, while letting the target
|
|
|
|
// independent and default TTI implementations handle the rest.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "SystemZTargetTransformInfo.h"
|
|
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
|
|
#include "llvm/CodeGen/BasicTTIImpl.h"
|
|
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Target/CostTable.h"
|
|
|
|
#include "llvm/Target/TargetLowering.h"
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "systemztti"
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// SystemZ cost model.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2015-08-06 02:08:10 +08:00
|
|
|
int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
|
2015-03-31 20:52:27 +08:00
|
|
|
assert(Ty->isIntegerTy());
|
|
|
|
|
|
|
|
unsigned BitSize = Ty->getPrimitiveSizeInBits();
|
|
|
|
// There is no cost model for constants with a bit size of 0. Return TCC_Free
|
|
|
|
// here, so that constant hoisting will ignore this constant.
|
|
|
|
if (BitSize == 0)
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
// No cost model for operations on integers larger than 64 bit implemented yet.
|
|
|
|
if (BitSize > 64)
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
|
|
|
|
if (Imm == 0)
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
|
|
|
|
if (Imm.getBitWidth() <= 64) {
|
|
|
|
// Constants loaded via lgfi.
|
|
|
|
if (isInt<32>(Imm.getSExtValue()))
|
|
|
|
return TTI::TCC_Basic;
|
|
|
|
// Constants loaded via llilf.
|
|
|
|
if (isUInt<32>(Imm.getZExtValue()))
|
|
|
|
return TTI::TCC_Basic;
|
|
|
|
// Constants loaded via llihf:
|
|
|
|
if ((Imm.getZExtValue() & 0xffffffff) == 0)
|
|
|
|
return TTI::TCC_Basic;
|
|
|
|
|
|
|
|
return 2 * TTI::TCC_Basic;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 4 * TTI::TCC_Basic;
|
|
|
|
}
|
|
|
|
|
2015-08-06 02:08:10 +08:00
|
|
|
int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
|
|
|
|
const APInt &Imm, Type *Ty) {
|
2015-03-31 20:52:27 +08:00
|
|
|
assert(Ty->isIntegerTy());
|
|
|
|
|
|
|
|
unsigned BitSize = Ty->getPrimitiveSizeInBits();
|
|
|
|
// There is no cost model for constants with a bit size of 0. Return TCC_Free
|
|
|
|
// here, so that constant hoisting will ignore this constant.
|
|
|
|
if (BitSize == 0)
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
// No cost model for operations on integers larger than 64 bit implemented yet.
|
|
|
|
if (BitSize > 64)
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
|
|
|
|
switch (Opcode) {
|
|
|
|
default:
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
case Instruction::GetElementPtr:
|
|
|
|
// Always hoist the base address of a GetElementPtr. This prevents the
|
|
|
|
// creation of new constants for every base constant that gets constant
|
|
|
|
// folded with the offset.
|
|
|
|
if (Idx == 0)
|
|
|
|
return 2 * TTI::TCC_Basic;
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
case Instruction::Store:
|
|
|
|
if (Idx == 0 && Imm.getBitWidth() <= 64) {
|
|
|
|
// Any 8-bit immediate store can by implemented via mvi.
|
|
|
|
if (BitSize == 8)
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
// 16-bit immediate values can be stored via mvhhi/mvhi/mvghi.
|
|
|
|
if (isInt<16>(Imm.getSExtValue()))
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case Instruction::ICmp:
|
|
|
|
if (Idx == 1 && Imm.getBitWidth() <= 64) {
|
|
|
|
// Comparisons against signed 32-bit immediates implemented via cgfi.
|
|
|
|
if (isInt<32>(Imm.getSExtValue()))
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
// Comparisons against unsigned 32-bit immediates implemented via clgfi.
|
|
|
|
if (isUInt<32>(Imm.getZExtValue()))
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case Instruction::Add:
|
|
|
|
case Instruction::Sub:
|
|
|
|
if (Idx == 1 && Imm.getBitWidth() <= 64) {
|
|
|
|
// We use algfi/slgfi to add/subtract 32-bit unsigned immediates.
|
|
|
|
if (isUInt<32>(Imm.getZExtValue()))
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
// Or their negation, by swapping addition vs. subtraction.
|
|
|
|
if (isUInt<32>(-Imm.getSExtValue()))
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case Instruction::Mul:
|
|
|
|
if (Idx == 1 && Imm.getBitWidth() <= 64) {
|
|
|
|
// We use msgfi to multiply by 32-bit signed immediates.
|
|
|
|
if (isInt<32>(Imm.getSExtValue()))
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case Instruction::Or:
|
|
|
|
case Instruction::Xor:
|
|
|
|
if (Idx == 1 && Imm.getBitWidth() <= 64) {
|
|
|
|
// Masks supported by oilf/xilf.
|
|
|
|
if (isUInt<32>(Imm.getZExtValue()))
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
// Masks supported by oihf/xihf.
|
|
|
|
if ((Imm.getZExtValue() & 0xffffffff) == 0)
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case Instruction::And:
|
|
|
|
if (Idx == 1 && Imm.getBitWidth() <= 64) {
|
|
|
|
// Any 32-bit AND operation can by implemented via nilf.
|
|
|
|
if (BitSize <= 32)
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
// 64-bit masks supported by nilf.
|
|
|
|
if (isUInt<32>(~Imm.getZExtValue()))
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
// 64-bit masks supported by nilh.
|
|
|
|
if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff)
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
// Some 64-bit AND operations can be implemented via risbg.
|
|
|
|
const SystemZInstrInfo *TII = ST->getInstrInfo();
|
|
|
|
unsigned Start, End;
|
|
|
|
if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End))
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case Instruction::Shl:
|
|
|
|
case Instruction::LShr:
|
|
|
|
case Instruction::AShr:
|
|
|
|
// Always return TCC_Free for the shift value of a shift instruction.
|
|
|
|
if (Idx == 1)
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
break;
|
|
|
|
case Instruction::UDiv:
|
|
|
|
case Instruction::SDiv:
|
|
|
|
case Instruction::URem:
|
|
|
|
case Instruction::SRem:
|
|
|
|
case Instruction::Trunc:
|
|
|
|
case Instruction::ZExt:
|
|
|
|
case Instruction::SExt:
|
|
|
|
case Instruction::IntToPtr:
|
|
|
|
case Instruction::PtrToInt:
|
|
|
|
case Instruction::BitCast:
|
|
|
|
case Instruction::PHI:
|
|
|
|
case Instruction::Call:
|
|
|
|
case Instruction::Select:
|
|
|
|
case Instruction::Ret:
|
|
|
|
case Instruction::Load:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return SystemZTTIImpl::getIntImmCost(Imm, Ty);
|
|
|
|
}
|
|
|
|
|
2015-08-06 02:08:10 +08:00
|
|
|
int SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
|
|
|
|
const APInt &Imm, Type *Ty) {
|
2015-03-31 20:52:27 +08:00
|
|
|
assert(Ty->isIntegerTy());
|
|
|
|
|
|
|
|
unsigned BitSize = Ty->getPrimitiveSizeInBits();
|
|
|
|
// There is no cost model for constants with a bit size of 0. Return TCC_Free
|
|
|
|
// here, so that constant hoisting will ignore this constant.
|
|
|
|
if (BitSize == 0)
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
// No cost model for operations on integers larger than 64 bit implemented yet.
|
|
|
|
if (BitSize > 64)
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
|
|
|
|
switch (IID) {
|
|
|
|
default:
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
case Intrinsic::sadd_with_overflow:
|
|
|
|
case Intrinsic::uadd_with_overflow:
|
|
|
|
case Intrinsic::ssub_with_overflow:
|
|
|
|
case Intrinsic::usub_with_overflow:
|
|
|
|
// These get expanded to include a normal addition/subtraction.
|
|
|
|
if (Idx == 1 && Imm.getBitWidth() <= 64) {
|
|
|
|
if (isUInt<32>(Imm.getZExtValue()))
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
if (isUInt<32>(-Imm.getSExtValue()))
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case Intrinsic::smul_with_overflow:
|
|
|
|
case Intrinsic::umul_with_overflow:
|
|
|
|
// These get expanded to include a normal multiplication.
|
|
|
|
if (Idx == 1 && Imm.getBitWidth() <= 64) {
|
|
|
|
if (isInt<32>(Imm.getSExtValue()))
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case Intrinsic::experimental_stackmap:
|
|
|
|
if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
break;
|
|
|
|
case Intrinsic::experimental_patchpoint_void:
|
|
|
|
case Intrinsic::experimental_patchpoint_i64:
|
|
|
|
if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
|
|
|
|
return TTI::TCC_Free;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return SystemZTTIImpl::getIntImmCost(Imm, Ty);
|
|
|
|
}
|
2015-03-31 20:56:33 +08:00
|
|
|
|
|
|
|
TargetTransformInfo::PopcntSupportKind
|
|
|
|
SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
|
|
|
|
assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2");
|
|
|
|
if (ST->hasPopulationCount() && TyWidth <= 64)
|
|
|
|
return TTI::PSK_FastHardware;
|
|
|
|
return TTI::PSK_Software;
|
|
|
|
}
|
|
|
|
|
2016-09-28 17:41:38 +08:00
|
|
|
void SystemZTTIImpl::getUnrollingPreferences(Loop *L,
|
|
|
|
TTI::UnrollingPreferences &UP) {
|
|
|
|
// Find out if L contains a call, what the machine instruction count
|
|
|
|
// estimate is, and how many stores there are.
|
|
|
|
bool HasCall = false;
|
|
|
|
unsigned NumStores = 0;
|
|
|
|
for (auto &BB : L->blocks())
|
|
|
|
for (auto &I : *BB) {
|
|
|
|
if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
|
|
|
|
ImmutableCallSite CS(&I);
|
|
|
|
if (const Function *F = CS.getCalledFunction()) {
|
|
|
|
if (isLoweredToCall(F))
|
|
|
|
HasCall = true;
|
|
|
|
if (F->getIntrinsicID() == Intrinsic::memcpy ||
|
|
|
|
F->getIntrinsicID() == Intrinsic::memset)
|
|
|
|
NumStores++;
|
|
|
|
} else { // indirect call.
|
|
|
|
HasCall = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (isa<StoreInst>(&I)) {
|
|
|
|
NumStores++;
|
|
|
|
Type *MemAccessTy = I.getOperand(0)->getType();
|
|
|
|
if((MemAccessTy->isIntegerTy() || MemAccessTy->isFloatingPointTy()) &&
|
|
|
|
(getDataLayout().getTypeSizeInBits(MemAccessTy) == 128))
|
|
|
|
NumStores++; // 128 bit fp/int stores get split.
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// The z13 processor will run out of store tags if too many stores
|
|
|
|
// are fed into it too quickly. Therefore make sure there are not
|
|
|
|
// too many stores in the resulting unrolled loop.
|
|
|
|
unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX);
|
|
|
|
|
|
|
|
if (HasCall) {
|
|
|
|
// Only allow full unrolling if loop has any calls.
|
|
|
|
UP.FullUnrollMaxCount = Max;
|
|
|
|
UP.MaxCount = 1;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
UP.MaxCount = Max;
|
|
|
|
if (UP.MaxCount <= 1)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Allow partial and runtime trip count unrolling.
|
|
|
|
UP.Partial = UP.Runtime = true;
|
|
|
|
|
|
|
|
UP.PartialThreshold = 75;
|
|
|
|
UP.DefaultUnrollRuntimeCount = 4;
|
|
|
|
|
|
|
|
// Allow expensive instructions in the pre-header of the loop.
|
|
|
|
UP.AllowExpensiveTripCount = true;
|
|
|
|
|
|
|
|
UP.Force = true;
|
|
|
|
}
|
|
|
|
|
[SystemZ] Add CodeGen support for integer vector types
This the first of a series of patches to add CodeGen support exploiting
the instructions of the z13 vector facility. This patch adds support
for the native integer vector types (v16i8, v8i16, v4i32, v2i64).
When the vector facility is present, we default to the new vector ABI.
This is characterized by two major differences:
- Vector types are passed/returned in vector registers
(except for unnamed arguments of a variable-argument list function).
- Vector types are at most 8-byte aligned.
The reason for the choice of 8-byte vector alignment is that the hardware
is able to efficiently load vectors at 8-byte alignment, and the ABI only
guarantees 8-byte alignment of the stack pointer, so requiring any higher
alignment for vectors would require dynamic stack re-alignment code.
However, for compatibility with old code that may use vector types, when
*not* using the vector facility, the old alignment rules (vector types
are naturally aligned) remain in use.
These alignment rules are not only implemented at the C language level
(implemented in clang), but also at the LLVM IR level. This is done
by selecting a different DataLayout string depending on whether the
vector ABI is in effect or not.
Based on a patch by Richard Sandiford.
llvm-svn: 236521
2015-05-06 03:25:42 +08:00
|
|
|
unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
|
|
|
|
if (!Vector)
|
|
|
|
// Discount the stack pointer. Also leave out %r0, since it can't
|
|
|
|
// be used in an address.
|
|
|
|
return 14;
|
|
|
|
if (ST->hasVector())
|
|
|
|
return 32;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) {
|
|
|
|
if (!Vector)
|
|
|
|
return 64;
|
|
|
|
if (ST->hasVector())
|
|
|
|
return 128;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|