forked from OSchip/llvm-project
[LoadCombine] Combine Loads formed from GEPS with negative indexes
Change the underlying offset and comparisons to use int64_t instead of uint64_t. Patch by River Riddle! Differential Revision: http://reviews.llvm.org/D21499 llvm-svn: 273105
This commit is contained in:
parent
37ba54e5d7
commit
3119599475
|
@ -40,7 +40,7 @@ STATISTIC(NumLoadsCombined, "Number of loads combined");
|
||||||
namespace {
|
namespace {
|
||||||
struct PointerOffsetPair {
|
struct PointerOffsetPair {
|
||||||
Value *Pointer;
|
Value *Pointer;
|
||||||
uint64_t Offset;
|
int64_t Offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LoadPOPPair {
|
struct LoadPOPPair {
|
||||||
|
@ -102,7 +102,7 @@ PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
|
||||||
unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType());
|
unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType());
|
||||||
APInt Offset(BitWidth, 0);
|
APInt Offset(BitWidth, 0);
|
||||||
if (GEP->accumulateConstantOffset(DL, Offset))
|
if (GEP->accumulateConstantOffset(DL, Offset))
|
||||||
POP.Offset += Offset.getZExtValue();
|
POP.Offset += Offset.getSExtValue();
|
||||||
else
|
else
|
||||||
// Can't handle GEPs with variable indices.
|
// Can't handle GEPs with variable indices.
|
||||||
return POP;
|
return POP;
|
||||||
|
@ -138,28 +138,31 @@ bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
|
||||||
LoadInst *BaseLoad = nullptr;
|
LoadInst *BaseLoad = nullptr;
|
||||||
SmallVector<LoadPOPPair, 8> AggregateLoads;
|
SmallVector<LoadPOPPair, 8> AggregateLoads;
|
||||||
bool Combined = false;
|
bool Combined = false;
|
||||||
uint64_t PrevOffset = -1ull;
|
bool ValidPrevOffset = false;
|
||||||
|
int64_t PrevOffset = 0;
|
||||||
uint64_t PrevSize = 0;
|
uint64_t PrevSize = 0;
|
||||||
for (auto &L : Loads) {
|
for (auto &L : Loads) {
|
||||||
if (PrevOffset == -1ull) {
|
if (ValidPrevOffset == false) {
|
||||||
BaseLoad = L.Load;
|
BaseLoad = L.Load;
|
||||||
PrevOffset = L.POP.Offset;
|
PrevOffset = L.POP.Offset;
|
||||||
PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
|
PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
|
||||||
L.Load->getType());
|
L.Load->getType());
|
||||||
AggregateLoads.push_back(L);
|
AggregateLoads.push_back(L);
|
||||||
|
ValidPrevOffset = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (L.Load->getAlignment() > BaseLoad->getAlignment())
|
if (L.Load->getAlignment() > BaseLoad->getAlignment())
|
||||||
continue;
|
continue;
|
||||||
if (L.POP.Offset > PrevOffset + PrevSize) {
|
int64_t PrevEnd = PrevOffset + PrevSize;
|
||||||
|
if (L.POP.Offset > PrevEnd) {
|
||||||
// No other load will be combinable
|
// No other load will be combinable
|
||||||
if (combineLoads(AggregateLoads))
|
if (combineLoads(AggregateLoads))
|
||||||
Combined = true;
|
Combined = true;
|
||||||
AggregateLoads.clear();
|
AggregateLoads.clear();
|
||||||
PrevOffset = -1;
|
ValidPrevOffset = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (L.POP.Offset != PrevOffset + PrevSize)
|
if (L.POP.Offset != PrevEnd)
|
||||||
// This load is offset less than the size of the last load.
|
// This load is offset less than the size of the last load.
|
||||||
// FIXME: We may want to handle this case.
|
// FIXME: We may want to handle this case.
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -0,0 +1,19 @@
|
||||||
|
; RUN: opt -basicaa -load-combine -S < %s | FileCheck %s
|
||||||
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||||
|
target triple = "x86_64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
define i32 @Load_NegGep(i32* %i){
|
||||||
|
%1 = getelementptr inbounds i32, i32* %i, i64 -1
|
||||||
|
%2 = load i32, i32* %1, align 4
|
||||||
|
%3 = load i32, i32* %i, align 4
|
||||||
|
%4 = add nsw i32 %3, %2
|
||||||
|
ret i32 %4
|
||||||
|
; CHECK-LABEL: @Load_NegGep(
|
||||||
|
; CHECK: %[[load:.*]] = load i64
|
||||||
|
; CHECK: %[[combine_extract_lo:.*]] = trunc i64 %[[load]] to i32
|
||||||
|
; CHECK: %[[combine_extract_shift:.*]] = lshr i64 %[[load]], 32
|
||||||
|
; CHECK: %[[combine_extract_hi:.*]] = trunc i64 %[[combine_extract_shift]] to i32
|
||||||
|
; CHECK: %[[add:.*]] = add nsw i32 %[[combine_extract_hi]], %[[combine_extract_lo]]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue