forked from OSchip/llvm-project
Roll forward r243250
r243250 appeared to break clang/test/Analysis/dead-store.c on one of the build slaves, but I couldn't reproduce this failure locally. Probably a false positive as I saw this test was broken by r243246 or r243247 too but passed later without people fixing anything. llvm-svn: 243253
This commit is contained in:
parent
9d8af4de4c
commit
bfefff555e
|
@ -136,7 +136,8 @@ public:
|
|||
/// The contract for this function is the same as \c getOperationCost except
|
||||
/// that it supports an interface that provides extra information specific to
|
||||
/// the GEP operation.
|
||||
unsigned getGEPCost(const Value *Ptr, ArrayRef<const Value *> Operands) const;
|
||||
unsigned getGEPCost(Type *PointeeType, const Value *Ptr,
|
||||
ArrayRef<const Value *> Operands) const;
|
||||
|
||||
/// \brief Estimate the cost of a function call when lowered.
|
||||
///
|
||||
|
@ -543,7 +544,7 @@ public:
|
|||
virtual ~Concept() = 0;
|
||||
virtual const DataLayout &getDataLayout() const = 0;
|
||||
virtual unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
|
||||
virtual unsigned getGEPCost(const Value *Ptr,
|
||||
virtual unsigned getGEPCost(Type *PointeeType, const Value *Ptr,
|
||||
ArrayRef<const Value *> Operands) = 0;
|
||||
virtual unsigned getCallCost(FunctionType *FTy, int NumArgs) = 0;
|
||||
virtual unsigned getCallCost(const Function *F, int NumArgs) = 0;
|
||||
|
@ -643,9 +644,9 @@ public:
|
|||
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
|
||||
return Impl.getOperationCost(Opcode, Ty, OpTy);
|
||||
}
|
||||
unsigned getGEPCost(const Value *Ptr,
|
||||
unsigned getGEPCost(Type *PointeeType, const Value *Ptr,
|
||||
ArrayRef<const Value *> Operands) override {
|
||||
return Impl.getGEPCost(Ptr, Operands);
|
||||
return Impl.getGEPCost(PointeeType, Ptr, Operands);
|
||||
}
|
||||
unsigned getCallCost(FunctionType *FTy, int NumArgs) override {
|
||||
return Impl.getCallCost(FTy, NumArgs);
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/GetElementPtrTypeIterator.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
|
||||
|
@ -92,7 +93,8 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
unsigned getGEPCost(const Value *Ptr, ArrayRef<const Value *> Operands) {
|
||||
unsigned getGEPCost(Type *PointeeType, const Value *Ptr,
|
||||
ArrayRef<const Value *> Operands) {
|
||||
// In the basic model, we just assume that all-constant GEPs will be folded
|
||||
// into their uses via addressing modes.
|
||||
for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
|
||||
|
@ -378,6 +380,54 @@ public:
|
|||
return static_cast<T *>(this)->getCallCost(F, Arguments.size());
|
||||
}
|
||||
|
||||
using BaseT::getGEPCost;
|
||||
|
||||
unsigned getGEPCost(Type *PointeeType, const Value *Ptr,
|
||||
ArrayRef<const Value *> Operands) {
|
||||
const GlobalValue *BaseGV = nullptr;
|
||||
if (Ptr != nullptr) {
|
||||
// TODO: will remove this when pointers have an opaque type.
|
||||
assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
|
||||
PointeeType &&
|
||||
"explicit pointee type doesn't match operand's pointee type");
|
||||
BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
|
||||
}
|
||||
bool HasBaseReg = (BaseGV == nullptr);
|
||||
int64_t BaseOffset = 0;
|
||||
int64_t Scale = 0;
|
||||
|
||||
// Assumes the address space is 0 when Ptr is nullptr.
|
||||
unsigned AS =
|
||||
(Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace());
|
||||
auto GTI = gep_type_begin(PointerType::get(PointeeType, AS), Operands);
|
||||
for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
|
||||
if (isa<SequentialType>(*GTI)) {
|
||||
int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
|
||||
if (const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I)) {
|
||||
BaseOffset += ConstIdx->getSExtValue() * ElementSize;
|
||||
} else {
|
||||
// Needs scale register.
|
||||
if (Scale != 0) {
|
||||
// No addressing mode takes two scale registers.
|
||||
return TTI::TCC_Basic;
|
||||
}
|
||||
Scale = ElementSize;
|
||||
}
|
||||
} else {
|
||||
StructType *STy = cast<StructType>(*GTI);
|
||||
uint64_t Field = cast<ConstantInt>(*I)->getZExtValue();
|
||||
BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
|
||||
}
|
||||
}
|
||||
|
||||
if (static_cast<T *>(this)->isLegalAddressingMode(
|
||||
PointerType::get(*GTI, AS), const_cast<GlobalValue *>(BaseGV),
|
||||
BaseOffset, HasBaseReg, Scale, AS)) {
|
||||
return TTI::TCC_Free;
|
||||
}
|
||||
return TTI::TCC_Basic;
|
||||
}
|
||||
|
||||
using BaseT::getIntrinsicCost;
|
||||
|
||||
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
|
||||
|
@ -397,9 +447,9 @@ public:
|
|||
return TTI::TCC_Free; // Model all PHI nodes as free.
|
||||
|
||||
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
|
||||
SmallVector<const Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end());
|
||||
return static_cast<T *>(this)
|
||||
->getGEPCost(GEP->getPointerOperand(), Indices);
|
||||
SmallVector<Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end());
|
||||
return static_cast<T *>(this)->getGEPCost(
|
||||
GEP->getSourceElementType(), GEP->getPointerOperand(), Indices);
|
||||
}
|
||||
|
||||
if (auto CS = ImmutableCallSite(U)) {
|
||||
|
|
|
@ -78,7 +78,7 @@ namespace llvm {
|
|||
// current type directly.
|
||||
Type *operator->() const { return operator*(); }
|
||||
|
||||
Value *getOperand() const { return *OpIt; }
|
||||
Value *getOperand() const { return const_cast<Value *>(&**OpIt); }
|
||||
|
||||
generic_gep_type_iterator& operator++() { // Preincrement
|
||||
if (CurTy.getInt()) {
|
||||
|
|
|
@ -383,10 +383,8 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
|
|||
return -1;
|
||||
|
||||
switch (I->getOpcode()) {
|
||||
case Instruction::GetElementPtr:{
|
||||
Type *ValTy = I->getOperand(0)->getType()->getPointerElementType();
|
||||
return TTI->getAddressComputationCost(ValTy);
|
||||
}
|
||||
case Instruction::GetElementPtr:
|
||||
return TTI->getUserCost(I);
|
||||
|
||||
case Instruction::Ret:
|
||||
case Instruction::PHI:
|
||||
|
|
|
@ -234,6 +234,7 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
|
|||
Basis.CandidateKind == C.CandidateKind);
|
||||
}
|
||||
|
||||
// TODO: use TTI->getGEPCost.
|
||||
static bool isGEPFoldable(GetElementPtrInst *GEP,
|
||||
const TargetTransformInfo *TTI,
|
||||
const DataLayout *DL) {
|
||||
|
|
|
@ -3,41 +3,85 @@
|
|||
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
|
||||
target triple = "thumbv7-apple-ios6.0.0"
|
||||
|
||||
define void @test_geps() {
|
||||
; Cost of scalar integer geps should be one. We can't always expect it to be
|
||||
; folded into the instruction addressing mode.
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
|
||||
define void @test_geps(i32 %i) {
|
||||
; GEPs with index 0 are essentially NOOPs.
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
|
||||
%a0 = getelementptr inbounds i8, i8* undef, i32 0
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
|
||||
%a1 = getelementptr inbounds i16, i16* undef, i32 0
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
|
||||
%a2 = getelementptr inbounds i32, i32* undef, i32 0
|
||||
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
|
||||
%a3 = getelementptr inbounds i64, i64* undef, i32 0
|
||||
|
||||
; Cost of scalar floating point geps should be one. We cannot fold the address
|
||||
; computation.
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float*
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float*
|
||||
%a4 = getelementptr inbounds float, float* undef, i32 0
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double*
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double*
|
||||
%a5 = getelementptr inbounds double, double* undef, i32 0
|
||||
|
||||
|
||||
; Cost of vector geps should be one. We cannot fold the address computation.
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
|
||||
%a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
|
||||
%a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
|
||||
%a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
|
||||
%a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
|
||||
%a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
|
||||
%a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
|
||||
|
||||
; Cost of GEPs is one if we cannot fold the address computation.
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
|
||||
%b0 = getelementptr inbounds i8, i8* undef, i32 1024
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
|
||||
%b1 = getelementptr inbounds i16, i16* undef, i32 1024
|
||||
; Thumb-2 cannot fold offset >= 2^12 into address computation.
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
|
||||
%b2 = getelementptr inbounds i32, i32* undef, i32 1024
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
|
||||
%b3 = getelementptr inbounds i64, i64* undef, i32 1024
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float*
|
||||
%b4 = getelementptr inbounds float, float* undef, i32 1024
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double*
|
||||
%b5 = getelementptr inbounds double, double* undef, i32 1024
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
|
||||
%b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
|
||||
%b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
|
||||
%b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
|
||||
%b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
|
||||
%b11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
|
||||
%b12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
|
||||
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
|
||||
%c0 = getelementptr inbounds i8, i8* undef, i32 %i
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
|
||||
%c1 = getelementptr inbounds i16, i16* undef, i32 %i
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
|
||||
%c2 = getelementptr inbounds i32, i32* undef, i32 %i
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
|
||||
%c3 = getelementptr inbounds i64, i64* undef, i32 %i
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float*
|
||||
%c4 = getelementptr inbounds float, float* undef, i32 %i
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double*
|
||||
%c5 = getelementptr inbounds double, double* undef, i32 %i
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
|
||||
%c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i
|
||||
;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
|
||||
%c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i
|
||||
; Thumb-2 cannot fold scales larger than 8 to address computation.
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
|
||||
%c9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 %i
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
|
||||
%c10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 %i
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
|
||||
%c11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
|
||||
;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
|
||||
%c12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
|
||||
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -5,9 +5,27 @@
|
|||
|
||||
; -- No triple in this module --
|
||||
|
||||
;CHECK: cost of 1 {{.*}} add
|
||||
;CHECK: cost of 1 {{.*}} ret
|
||||
; CHECK-LABEL: function 'no_info'
|
||||
; CHECK: cost of 1 {{.*}} add
|
||||
; CHECK: cost of 1 {{.*}} ret
|
||||
define i32 @no_info(i32 %arg) {
|
||||
%e = add i32 %arg, %arg
|
||||
ret i32 %e
|
||||
}
|
||||
|
||||
define i8 @addressing_mode_reg_reg(i8* %a, i32 %b) {
|
||||
; CHECK-LABEL: function 'addressing_mode_reg_reg'
|
||||
%p = getelementptr i8, i8* %a, i32 %b ; NoTTI accepts reg+reg addressing.
|
||||
; CHECK: cost of 0 {{.*}} getelementptr
|
||||
%v = load i8, i8* %p
|
||||
ret i8 %v
|
||||
}
|
||||
|
||||
; CHECK-LABEL: function 'addressing_mode_scaled_reg'
|
||||
define i32 @addressing_mode_scaled_reg(i32* %a, i32 %b) {
|
||||
; NoTTI rejects reg+scale*reg addressing.
|
||||
%p = getelementptr i32, i32* %a, i32 %b
|
||||
; CHECK: cost of 1 {{.*}} getelementptr
|
||||
%v = load i32, i32* %p
|
||||
ret i32 %v
|
||||
}
|
||||
|
|
|
@ -86,17 +86,20 @@ for.body: ; preds = %entry, %for.body
|
|||
%reduction.026 = phi i16 [ %add14, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds i16, i16* %arr, i64 %indvars.iv
|
||||
%0 = load i16, i16* %arrayidx, align 2
|
||||
%add = add i16 %0, %reduction.026
|
||||
%mul = shl i16 %0, 1
|
||||
%add = add i16 %mul, %reduction.026
|
||||
%sext = mul i64 %indvars.iv, 12884901888
|
||||
%idxprom3 = ashr exact i64 %sext, 32
|
||||
%arrayidx4 = getelementptr inbounds i16, i16* %arr, i64 %idxprom3
|
||||
%1 = load i16, i16* %arrayidx4, align 2
|
||||
%add7 = add i16 %add, %1
|
||||
%mul2 = shl i16 %1, 1
|
||||
%add7 = add i16 %add, %mul2
|
||||
%sext28 = mul i64 %indvars.iv, 21474836480
|
||||
%idxprom10 = ashr exact i64 %sext28, 32
|
||||
%arrayidx11 = getelementptr inbounds i16, i16* %arr, i64 %idxprom10
|
||||
%2 = load i16, i16* %arrayidx11, align 2
|
||||
%add14 = add i16 %add7, %2
|
||||
%mul3 = shl i16 %2, 1
|
||||
%add14 = add i16 %add7, %mul3
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
|
|
|
@ -60,7 +60,7 @@ for.body: ; preds = %for.body, %entry
|
|||
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
||||
store i32 %add, i32* %arrayidx2, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 32
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 64
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
|
@ -111,7 +111,7 @@ for.body: ; preds = %for.body, %entry
|
|||
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
||||
store i32 %add, i32* %arrayidx2, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 32
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 64
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
|
@ -162,7 +162,7 @@ for.body: ; preds = %for.body, %entry
|
|||
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
||||
store i32 %add, i32* %arrayidx2, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 32
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, 64
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
|
|
Loading…
Reference in New Issue