llvm-project/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp

316 lines
10 KiB
C++

//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements a TargetTransformInfo analysis pass specific to the
// SystemZ target machine. It uses the target's detailed information to provide
// more precise answers to certain TTI queries, while letting the target
// independent and default TTI implementations handle the rest.
//
//===----------------------------------------------------------------------===//
#include "SystemZTargetTransformInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
#define DEBUG_TYPE "systemztti"
//===----------------------------------------------------------------------===//
//
// SystemZ cost model.
//
//===----------------------------------------------------------------------===//
int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
// There is no cost model for constants with a bit size of 0. Return TCC_Free
// here, so that constant hoisting will ignore this constant.
if (BitSize == 0)
return TTI::TCC_Free;
// No cost model for operations on integers larger than 64 bit implemented yet.
if (BitSize > 64)
return TTI::TCC_Free;
if (Imm == 0)
return TTI::TCC_Free;
if (Imm.getBitWidth() <= 64) {
// Constants loaded via lgfi.
if (isInt<32>(Imm.getSExtValue()))
return TTI::TCC_Basic;
// Constants loaded via llilf.
if (isUInt<32>(Imm.getZExtValue()))
return TTI::TCC_Basic;
// Constants loaded via llihf:
if ((Imm.getZExtValue() & 0xffffffff) == 0)
return TTI::TCC_Basic;
return 2 * TTI::TCC_Basic;
}
return 4 * TTI::TCC_Basic;
}
int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
// There is no cost model for constants with a bit size of 0. Return TCC_Free
// here, so that constant hoisting will ignore this constant.
if (BitSize == 0)
return TTI::TCC_Free;
// No cost model for operations on integers larger than 64 bit implemented yet.
if (BitSize > 64)
return TTI::TCC_Free;
switch (Opcode) {
default:
return TTI::TCC_Free;
case Instruction::GetElementPtr:
// Always hoist the base address of a GetElementPtr. This prevents the
// creation of new constants for every base constant that gets constant
// folded with the offset.
if (Idx == 0)
return 2 * TTI::TCC_Basic;
return TTI::TCC_Free;
case Instruction::Store:
if (Idx == 0 && Imm.getBitWidth() <= 64) {
// Any 8-bit immediate store can by implemented via mvi.
if (BitSize == 8)
return TTI::TCC_Free;
// 16-bit immediate values can be stored via mvhhi/mvhi/mvghi.
if (isInt<16>(Imm.getSExtValue()))
return TTI::TCC_Free;
}
break;
case Instruction::ICmp:
if (Idx == 1 && Imm.getBitWidth() <= 64) {
// Comparisons against signed 32-bit immediates implemented via cgfi.
if (isInt<32>(Imm.getSExtValue()))
return TTI::TCC_Free;
// Comparisons against unsigned 32-bit immediates implemented via clgfi.
if (isUInt<32>(Imm.getZExtValue()))
return TTI::TCC_Free;
}
break;
case Instruction::Add:
case Instruction::Sub:
if (Idx == 1 && Imm.getBitWidth() <= 64) {
// We use algfi/slgfi to add/subtract 32-bit unsigned immediates.
if (isUInt<32>(Imm.getZExtValue()))
return TTI::TCC_Free;
// Or their negation, by swapping addition vs. subtraction.
if (isUInt<32>(-Imm.getSExtValue()))
return TTI::TCC_Free;
}
break;
case Instruction::Mul:
if (Idx == 1 && Imm.getBitWidth() <= 64) {
// We use msgfi to multiply by 32-bit signed immediates.
if (isInt<32>(Imm.getSExtValue()))
return TTI::TCC_Free;
}
break;
case Instruction::Or:
case Instruction::Xor:
if (Idx == 1 && Imm.getBitWidth() <= 64) {
// Masks supported by oilf/xilf.
if (isUInt<32>(Imm.getZExtValue()))
return TTI::TCC_Free;
// Masks supported by oihf/xihf.
if ((Imm.getZExtValue() & 0xffffffff) == 0)
return TTI::TCC_Free;
}
break;
case Instruction::And:
if (Idx == 1 && Imm.getBitWidth() <= 64) {
// Any 32-bit AND operation can by implemented via nilf.
if (BitSize <= 32)
return TTI::TCC_Free;
// 64-bit masks supported by nilf.
if (isUInt<32>(~Imm.getZExtValue()))
return TTI::TCC_Free;
// 64-bit masks supported by nilh.
if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff)
return TTI::TCC_Free;
// Some 64-bit AND operations can be implemented via risbg.
const SystemZInstrInfo *TII = ST->getInstrInfo();
unsigned Start, End;
if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End))
return TTI::TCC_Free;
}
break;
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
// Always return TCC_Free for the shift value of a shift instruction.
if (Idx == 1)
return TTI::TCC_Free;
break;
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::IntToPtr:
case Instruction::PtrToInt:
case Instruction::BitCast:
case Instruction::PHI:
case Instruction::Call:
case Instruction::Select:
case Instruction::Ret:
case Instruction::Load:
break;
}
return SystemZTTIImpl::getIntImmCost(Imm, Ty);
}
int SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
// There is no cost model for constants with a bit size of 0. Return TCC_Free
// here, so that constant hoisting will ignore this constant.
if (BitSize == 0)
return TTI::TCC_Free;
// No cost model for operations on integers larger than 64 bit implemented yet.
if (BitSize > 64)
return TTI::TCC_Free;
switch (IID) {
default:
return TTI::TCC_Free;
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
case Intrinsic::usub_with_overflow:
// These get expanded to include a normal addition/subtraction.
if (Idx == 1 && Imm.getBitWidth() <= 64) {
if (isUInt<32>(Imm.getZExtValue()))
return TTI::TCC_Free;
if (isUInt<32>(-Imm.getSExtValue()))
return TTI::TCC_Free;
}
break;
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow:
// These get expanded to include a normal multiplication.
if (Idx == 1 && Imm.getBitWidth() <= 64) {
if (isInt<32>(Imm.getSExtValue()))
return TTI::TCC_Free;
}
break;
case Intrinsic::experimental_stackmap:
if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
return TTI::TCC_Free;
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
return TTI::TCC_Free;
break;
}
return SystemZTTIImpl::getIntImmCost(Imm, Ty);
}
TargetTransformInfo::PopcntSupportKind
SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2");
if (ST->hasPopulationCount() && TyWidth <= 64)
return TTI::PSK_FastHardware;
return TTI::PSK_Software;
}
void SystemZTTIImpl::getUnrollingPreferences(Loop *L,
TTI::UnrollingPreferences &UP) {
// Find out if L contains a call, what the machine instruction count
// estimate is, and how many stores there are.
bool HasCall = false;
unsigned NumStores = 0;
for (auto &BB : L->blocks())
for (auto &I : *BB) {
if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
ImmutableCallSite CS(&I);
if (const Function *F = CS.getCalledFunction()) {
if (isLoweredToCall(F))
HasCall = true;
if (F->getIntrinsicID() == Intrinsic::memcpy ||
F->getIntrinsicID() == Intrinsic::memset)
NumStores++;
} else { // indirect call.
HasCall = true;
}
}
if (isa<StoreInst>(&I)) {
NumStores++;
Type *MemAccessTy = I.getOperand(0)->getType();
if((MemAccessTy->isIntegerTy() || MemAccessTy->isFloatingPointTy()) &&
(getDataLayout().getTypeSizeInBits(MemAccessTy) == 128))
NumStores++; // 128 bit fp/int stores get split.
}
}
// The z13 processor will run out of store tags if too many stores
// are fed into it too quickly. Therefore make sure there are not
// too many stores in the resulting unrolled loop.
unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX);
if (HasCall) {
// Only allow full unrolling if loop has any calls.
UP.FullUnrollMaxCount = Max;
UP.MaxCount = 1;
return;
}
UP.MaxCount = Max;
if (UP.MaxCount <= 1)
return;
// Allow partial and runtime trip count unrolling.
UP.Partial = UP.Runtime = true;
UP.PartialThreshold = 75;
UP.DefaultUnrollRuntimeCount = 4;
// Allow expensive instructions in the pre-header of the loop.
UP.AllowExpensiveTripCount = true;
UP.Force = true;
}
unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
if (!Vector)
// Discount the stack pointer. Also leave out %r0, since it can't
// be used in an address.
return 14;
if (ST->hasVector())
return 32;
return 0;
}
unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) {
if (!Vector)
return 64;
if (ST->hasVector())
return 128;
return 0;
}