forked from OSchip/llvm-project
Add an unfolded offset field to LSR's Formula record. This is used to
model constants which can be added to base registers via add-immediate instructions which don't require an additional register to materialize the immediate. llvm-svn: 130743
This commit is contained in:
parent
37286b2440
commit
6136e94897
|
@ -1583,6 +1583,14 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
/// isLegalAddImmediate - Return true if the specified immediate is legal
|
||||
/// add immediate, that is the target has add instructions which can add
|
||||
/// a register with the immediate without having to materialize the
|
||||
/// immediate into a register.
|
||||
virtual bool isLegalAddImmediate(int64_t Imm) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Div utility functions
|
||||
//
|
||||
|
|
|
@ -6984,6 +6984,14 @@ bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
|
|||
return Imm >= 0 && Imm <= 255;
|
||||
}
|
||||
|
||||
/// isLegalAddImmediate - Return true if the specified immediate is legal
|
||||
/// add immediate, that is the target has add instructions which can add
|
||||
/// a register with the immediate without having to materialize the
|
||||
/// immediate into a register.
|
||||
bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
|
||||
return ARM_AM::getSOImmVal(Imm) != -1;
|
||||
}
|
||||
|
||||
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
|
||||
bool isSEXTLoad, SDValue &Base,
|
||||
SDValue &Offset, bool &isInc,
|
||||
|
|
|
@ -264,6 +264,12 @@ namespace llvm {
|
|||
/// the immediate into a register.
|
||||
virtual bool isLegalICmpImmediate(int64_t Imm) const;
|
||||
|
||||
/// isLegalAddImmediate - Return true if the specified immediate is legal
|
||||
/// add immediate, that is the target has add instructions which can
|
||||
/// add a register and the immediate without having to materialize
|
||||
/// the immediate into a register.
|
||||
virtual bool isLegalAddImmediate(int64_t Imm) const;
|
||||
|
||||
/// getPreIndexedAddressParts - returns true by value, base pointer and
|
||||
/// offset pointer and addressing mode by reference if the node's address
|
||||
/// can be legally represented as pre-indexed load / store address.
|
||||
|
|
|
@ -209,7 +209,12 @@ struct Formula {
|
|||
/// when AM.Scale is not zero.
|
||||
const SCEV *ScaledReg;
|
||||
|
||||
Formula() : ScaledReg(0) {}
|
||||
/// UnfoldedOffset - An additional constant offset which added near the
|
||||
/// use. This requires a temporary register, but the offset itself can
|
||||
/// live in an add immediate field rather than a register.
|
||||
int64_t UnfoldedOffset;
|
||||
|
||||
Formula() : ScaledReg(0), UnfoldedOffset(0) {}
|
||||
|
||||
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
|
||||
|
||||
|
@ -379,6 +384,10 @@ void Formula::print(raw_ostream &OS) const {
|
|||
OS << "<unknown>";
|
||||
OS << ')';
|
||||
}
|
||||
if (UnfoldedOffset != 0) {
|
||||
if (!First) OS << " + "; else First = false;
|
||||
OS << "imm(" << UnfoldedOffset << ')';
|
||||
}
|
||||
}
|
||||
|
||||
void Formula::dump() const {
|
||||
|
@ -771,8 +780,10 @@ void Cost::RateFormula(const Formula &F,
|
|||
RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
|
||||
}
|
||||
|
||||
if (F.BaseRegs.size() > 1)
|
||||
NumBaseAdds += F.BaseRegs.size() - 1;
|
||||
// Determine how many (unfolded) adds we'll need inside the loop.
|
||||
size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
|
||||
if (NumBaseParts > 1)
|
||||
NumBaseAdds += NumBaseParts - 1;
|
||||
|
||||
// Tally up the non-zero immediates.
|
||||
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
|
||||
|
@ -1945,7 +1956,8 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
|
|||
if (F.BaseRegs == OrigF.BaseRegs &&
|
||||
F.ScaledReg == OrigF.ScaledReg &&
|
||||
F.AM.BaseGV == OrigF.AM.BaseGV &&
|
||||
F.AM.Scale == OrigF.AM.Scale) {
|
||||
F.AM.Scale == OrigF.AM.Scale &&
|
||||
F.UnfoldedOffset == OrigF.UnfoldedOffset) {
|
||||
if (F.AM.BaseOffs == 0)
|
||||
return &LU;
|
||||
// This is the formula where all the registers and symbols matched;
|
||||
|
@ -2313,8 +2325,29 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
|
|||
if (InnerSum->isZero())
|
||||
continue;
|
||||
Formula F = Base;
|
||||
F.BaseRegs[i] = InnerSum;
|
||||
F.BaseRegs.push_back(*J);
|
||||
|
||||
// Add the remaining pieces of the add back into the new formula.
|
||||
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
|
||||
if (TLI && InnerSumSC &&
|
||||
SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
|
||||
TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
|
||||
InnerSumSC->getValue()->getZExtValue())) {
|
||||
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
|
||||
InnerSumSC->getValue()->getZExtValue();
|
||||
F.BaseRegs.erase(F.BaseRegs.begin() + i);
|
||||
} else
|
||||
F.BaseRegs[i] = InnerSum;
|
||||
|
||||
// Add J as its own register, or an unfolded immediate.
|
||||
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
|
||||
if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
|
||||
TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
|
||||
SC->getValue()->getZExtValue()))
|
||||
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
|
||||
SC->getValue()->getZExtValue();
|
||||
else
|
||||
F.BaseRegs.push_back(*J);
|
||||
|
||||
if (InsertFormula(LU, LUIdx, F))
|
||||
// If that formula hadn't been seen before, recurse to find more like
|
||||
// it.
|
||||
|
@ -2482,6 +2515,13 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
|
|||
continue;
|
||||
}
|
||||
|
||||
// Check that multiplying with the unfolded offset doesn't overflow.
|
||||
if (F.UnfoldedOffset != 0) {
|
||||
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
|
||||
if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we make it here and it's legal, add it.
|
||||
(void)InsertFormula(LU, LUIdx, F);
|
||||
next:;
|
||||
|
@ -2664,7 +2704,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
|
|||
// other orig regs.
|
||||
ImmMapTy::const_iterator OtherImms[] = {
|
||||
Imms.begin(), prior(Imms.end()),
|
||||
Imms.upper_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
|
||||
Imms.lower_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
|
||||
};
|
||||
for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
|
||||
ImmMapTy::const_iterator M = OtherImms[i];
|
||||
|
@ -2738,8 +2778,13 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
|
|||
Formula NewF = F;
|
||||
NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
|
||||
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
|
||||
LU.Kind, LU.AccessTy, TLI))
|
||||
continue;
|
||||
LU.Kind, LU.AccessTy, TLI)) {
|
||||
if (!TLI ||
|
||||
!TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
|
||||
continue;
|
||||
NewF = F;
|
||||
NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
|
||||
}
|
||||
NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
|
||||
|
||||
// If the new formula has a constant in a register, and adding the
|
||||
|
@ -3488,6 +3533,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
|
|||
}
|
||||
}
|
||||
|
||||
// Expand the unfolded offset portion.
|
||||
int64_t UnfoldedOffset = F.UnfoldedOffset;
|
||||
if (UnfoldedOffset != 0) {
|
||||
// Just add the immediate values.
|
||||
Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
|
||||
UnfoldedOffset)));
|
||||
}
|
||||
|
||||
// Emit instructions summing all the operands.
|
||||
const SCEV *FullS = Ops.empty() ?
|
||||
SE.getConstant(IntTy, 0) :
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
; LSR shouldn't introduce more induction variables than needed, increasing
|
||||
; register pressure and therefore spilling. There is more room for improvement
|
||||
; here.
|
||||
|
||||
; CHECK: sub sp, #32
|
||||
|
||||
; CHECK: ldr r{{.*}}, [sp, #4]
|
||||
; CHECK-NEXT: ldr r{{.*}}, [sp, #16]
|
||||
; CHECK-NEXT: ldr r{{.*}}, [sp, #12]
|
||||
; CHECK-NEXT: adds
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
|
||||
target triple = "thumbv7-apple-macosx10.7.0"
|
||||
|
||||
%struct.partition_entry = type { i32, i32, i64, i64 }
|
||||
|
||||
define i32 @partition_overlap_check(%struct.partition_entry* nocapture %part, i32 %num_entries) nounwind readonly optsize ssp {
|
||||
entry:
|
||||
%cmp79 = icmp sgt i32 %num_entries, 0
|
||||
br i1 %cmp79, label %outer.loop, label %for.end72
|
||||
|
||||
outer.loop: ; preds = %for.inc69, %entry
|
||||
%overlap.081 = phi i32 [ %overlap.4, %for.inc69 ], [ 0, %entry ]
|
||||
%0 = phi i32 [ %inc71, %for.inc69 ], [ 0, %entry ]
|
||||
%offset = getelementptr %struct.partition_entry* %part, i32 %0, i32 2
|
||||
%len = getelementptr %struct.partition_entry* %part, i32 %0, i32 3
|
||||
%tmp5 = load i64* %offset, align 4, !tbaa !0
|
||||
%tmp15 = load i64* %len, align 4, !tbaa !0
|
||||
%add = add nsw i64 %tmp15, %tmp5
|
||||
br label %inner.loop
|
||||
|
||||
inner.loop: ; preds = %for.inc, %outer.loop
|
||||
%overlap.178 = phi i32 [ %overlap.081, %outer.loop ], [ %overlap.4, %for.inc ]
|
||||
%1 = phi i32 [ 0, %outer.loop ], [ %inc, %for.inc ]
|
||||
%cmp23 = icmp eq i32 %0, %1
|
||||
br i1 %cmp23, label %for.inc, label %if.end
|
||||
|
||||
if.end: ; preds = %inner.loop
|
||||
%len39 = getelementptr %struct.partition_entry* %part, i32 %1, i32 3
|
||||
%offset28 = getelementptr %struct.partition_entry* %part, i32 %1, i32 2
|
||||
%tmp29 = load i64* %offset28, align 4, !tbaa !0
|
||||
%tmp40 = load i64* %len39, align 4, !tbaa !0
|
||||
%add41 = add nsw i64 %tmp40, %tmp29
|
||||
%cmp44 = icmp sge i64 %tmp29, %tmp5
|
||||
%cmp47 = icmp slt i64 %tmp29, %add
|
||||
%or.cond = and i1 %cmp44, %cmp47
|
||||
%overlap.2 = select i1 %or.cond, i32 1, i32 %overlap.178
|
||||
%cmp52 = icmp sle i64 %add41, %add
|
||||
%cmp56 = icmp sgt i64 %add41, %tmp5
|
||||
%or.cond74 = and i1 %cmp52, %cmp56
|
||||
%overlap.3 = select i1 %or.cond74, i32 1, i32 %overlap.2
|
||||
%cmp61 = icmp sgt i64 %tmp29, %tmp5
|
||||
%cmp65 = icmp slt i64 %add41, %add
|
||||
%or.cond75 = or i1 %cmp61, %cmp65
|
||||
br i1 %or.cond75, label %for.inc, label %if.then66
|
||||
|
||||
if.then66: ; preds = %if.end
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %if.end, %if.then66, %inner.loop
|
||||
%overlap.4 = phi i32 [ %overlap.178, %inner.loop ], [ 1, %if.then66 ], [ %overlap.3, %if.end ]
|
||||
%inc = add nsw i32 %1, 1
|
||||
%exitcond = icmp eq i32 %inc, %num_entries
|
||||
br i1 %exitcond, label %for.inc69, label %inner.loop
|
||||
|
||||
for.inc69: ; preds = %for.inc
|
||||
%inc71 = add nsw i32 %0, 1
|
||||
%exitcond83 = icmp eq i32 %inc71, %num_entries
|
||||
br i1 %exitcond83, label %for.end72, label %outer.loop
|
||||
|
||||
for.end72: ; preds = %for.inc69, %entry
|
||||
%overlap.0.lcssa = phi i32 [ 0, %entry ], [ %overlap.4, %for.inc69 ]
|
||||
ret i32 %overlap.0.lcssa
|
||||
}
|
||||
|
||||
!0 = metadata !{metadata !"long long", metadata !1}
|
||||
!1 = metadata !{metadata !"omnipotent char", metadata !2}
|
||||
!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
|
Loading…
Reference in New Issue