[CFLAA] Add more offset-sensitivity tracking.

This patch teaches FunctionInfo about offsets.

Like the last patch, this one doesn't introduce any visible
functionality change (the core algorithm knows nothing about offsets;
they're just plumbed through). Tests will come when we start acting
differently because of the offsets.

Patch by Jia Chen.

(N.B. I made a tiny change to Jia's patch to avoid warnings by GCC: I
put DenseMapInfo specializations in the `llvm` namespace. Only realized
that those appeared when compiling locally. :) )

Differential Revision: https://reviews.llvm.org/D22634

llvm-svn: 276486
This commit is contained in:
George Burgess IV 2016-07-22 22:30:48 +00:00
parent 716814aa8a
commit 4ec1753ff4
5 changed files with 162 additions and 43 deletions

View File

@ -91,7 +91,7 @@ instantiateExternalRelation(ExternalRelation ERelation, CallSite CS) {
auto To = instantiateInterfaceValue(ERelation.To, CS);
if (!To)
return None;
return InstantiatedRelation{*From, *To};
return InstantiatedRelation{*From, *To, ERelation.Offset};
}
Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute EAttr,

View File

@ -134,20 +134,41 @@ inline bool operator>=(InterfaceValue LHS, InterfaceValue RHS) {
return !(LHS < RHS);
}
// We use UnknownOffset to represent pointer offsets that cannot be determined
// at compile time. Note that MemoryLocation::UnknownSize cannot be used here
// because we require a signed value.
static LLVM_CONSTEXPR int64_t UnknownOffset = INT64_MAX;
inline int64_t addOffset(int64_t LHS, int64_t RHS) {
if (LHS == UnknownOffset || RHS == UnknownOffset)
return UnknownOffset;
// FIXME: Do we need to guard against integer overflow here?
return LHS + RHS;
}
/// We use ExternalRelation to describe an externally visible aliasing relations
/// between parameters/return value of a function.
struct ExternalRelation {
InterfaceValue From, To;
int64_t Offset;
};
inline bool operator==(ExternalRelation LHS, ExternalRelation RHS) {
return LHS.From == RHS.From && LHS.To == RHS.To;
return LHS.From == RHS.From && LHS.To == RHS.To && LHS.Offset == RHS.Offset;
}
inline bool operator!=(ExternalRelation LHS, ExternalRelation RHS) {
return !(LHS == RHS);
}
inline bool operator<(ExternalRelation LHS, ExternalRelation RHS) {
return LHS.From < RHS.From || (LHS.From == RHS.From && LHS.To < RHS.To);
if (LHS.From < RHS.From)
return true;
if (LHS.From > RHS.From)
return false;
if (LHS.To < RHS.To)
return true;
if (LHS.To > RHS.To)
return false;
return LHS.Offset < RHS.Offset;
}
inline bool operator>(ExternalRelation LHS, ExternalRelation RHS) {
return RHS < LHS;
@ -206,6 +227,7 @@ inline bool operator>=(InstantiatedValue LHS, InstantiatedValue RHS) {
/// callsite
struct InstantiatedRelation {
InstantiatedValue From, To;
int64_t Offset;
};
Optional<InstantiatedRelation> instantiateExternalRelation(ExternalRelation,
CallSite);

View File

@ -116,6 +116,30 @@ LLVM_CONSTEXPR unsigned WriteOnlyStateMask =
(1U << static_cast<uint8_t>(MatchState::FlowToWriteOnly)) |
(1U << static_cast<uint8_t>(MatchState::FlowToMemAliasWriteOnly));
// A pair that consists of a value and an offset
struct OffsetValue {
const Value *Val;
int64_t Offset;
};
bool operator==(OffsetValue LHS, OffsetValue RHS) {
return LHS.Val == RHS.Val && LHS.Offset == RHS.Offset;
}
bool operator<(OffsetValue LHS, OffsetValue RHS) {
return std::less<const Value *>()(LHS.Val, RHS.Val) ||
(LHS.Val == RHS.Val && LHS.Offset < RHS.Offset);
}
// A pair that consists of an InstantiatedValue and an offset
struct OffsetInstantiatedValue {
InstantiatedValue IVal;
int64_t Offset;
};
bool operator==(OffsetInstantiatedValue LHS, OffsetInstantiatedValue RHS) {
return LHS.IVal == RHS.IVal && LHS.Offset == RHS.Offset;
}
// We use ReachabilitySet to keep track of value aliases (The nonterminal "V" in
// the paper) during the analysis.
class ReachabilitySet {
@ -227,12 +251,55 @@ struct ValueSummary {
};
}
namespace llvm {
// Specialize DenseMapInfo for OffsetValue.
template <> struct DenseMapInfo<OffsetValue> {
static OffsetValue getEmptyKey() {
return OffsetValue{DenseMapInfo<const Value *>::getEmptyKey(),
DenseMapInfo<int64_t>::getEmptyKey()};
}
static OffsetValue getTombstoneKey() {
return OffsetValue{DenseMapInfo<const Value *>::getTombstoneKey(),
DenseMapInfo<int64_t>::getEmptyKey()};
}
static unsigned getHashValue(const OffsetValue &OVal) {
return DenseMapInfo<std::pair<const Value *, int64_t>>::getHashValue(
std::make_pair(OVal.Val, OVal.Offset));
}
static bool isEqual(const OffsetValue &LHS, const OffsetValue &RHS) {
return LHS == RHS;
}
};
// Specialize DenseMapInfo for OffsetInstantiatedValue.
template <> struct DenseMapInfo<OffsetInstantiatedValue> {
static OffsetInstantiatedValue getEmptyKey() {
return OffsetInstantiatedValue{
DenseMapInfo<InstantiatedValue>::getEmptyKey(),
DenseMapInfo<int64_t>::getEmptyKey()};
}
static OffsetInstantiatedValue getTombstoneKey() {
return OffsetInstantiatedValue{
DenseMapInfo<InstantiatedValue>::getTombstoneKey(),
DenseMapInfo<int64_t>::getEmptyKey()};
}
static unsigned getHashValue(const OffsetInstantiatedValue &OVal) {
return DenseMapInfo<std::pair<InstantiatedValue, int64_t>>::getHashValue(
std::make_pair(OVal.IVal, OVal.Offset));
}
static bool isEqual(const OffsetInstantiatedValue &LHS,
const OffsetInstantiatedValue &RHS) {
return LHS == RHS;
}
};
}
class CFLAndersAAResult::FunctionInfo {
/// Map a value to other values that may alias it
/// Since the alias relation is symmetric, to save some space we assume values
/// are properly ordered: if a and b alias each other, and a < b, then b is in
/// AliasMap[a] but not vice versa.
DenseMap<const Value *, std::vector<const Value *>> AliasMap;
DenseMap<const Value *, std::vector<OffsetValue>> AliasMap;
/// Map a value to its corresponding AliasAttrs
DenseMap<const Value *, AliasAttrs> AttrMap;
@ -246,7 +313,7 @@ public:
FunctionInfo(const Function &, const SmallVectorImpl<Value *> &,
const ReachabilitySet &, AliasAttrMap);
bool mayAlias(const Value *LHS, const Value *RHS) const;
bool mayAlias(const Value *, uint64_t, const Value *, uint64_t) const;
const AliasSummary &getAliasSummary() const { return Summary; }
};
@ -281,12 +348,12 @@ static void populateAttrMap(DenseMap<const Value *, AliasAttrs> &AttrMap,
// AttrMap only cares about top-level values
if (IVal.DerefLevel == 0)
AttrMap[IVal.Val] = Mapping.second;
AttrMap[IVal.Val] |= Mapping.second;
}
}
static void
populateAliasMap(DenseMap<const Value *, std::vector<const Value *>> &AliasMap,
populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap,
const ReachabilitySet &ReachSet) {
for (const auto &OuterMapping : ReachSet.value_mappings()) {
// AliasMap only cares about top-level values
@ -298,11 +365,11 @@ populateAliasMap(DenseMap<const Value *, std::vector<const Value *>> &AliasMap,
for (const auto &InnerMapping : OuterMapping.second) {
// Again, AliasMap only cares about top-level values
if (InnerMapping.first.DerefLevel == 0)
AliasList.push_back(InnerMapping.first.Val);
AliasList.push_back(OffsetValue{InnerMapping.first.Val, UnknownOffset});
}
// Sort AliasList for faster lookup
std::sort(AliasList.begin(), AliasList.end(), std::less<const Value *>());
std::sort(AliasList.begin(), AliasList.end());
}
}
@ -316,7 +383,7 @@ static void populateExternalRelations(
if (is_contained(RetVals, &Arg)) {
auto ArgVal = InterfaceValue{Arg.getArgNo() + 1, 0};
auto RetVal = InterfaceValue{0, 0};
ExtRelations.push_back(ExternalRelation{ArgVal, RetVal});
ExtRelations.push_back(ExternalRelation{ArgVal, RetVal, 0});
}
}
@ -344,7 +411,7 @@ static void populateExternalRelations(
continue;
if (hasReadOnlyState(InnerMapping.second))
ExtRelations.push_back(ExternalRelation{*Dst, *Src});
ExtRelations.push_back(ExternalRelation{*Dst, *Src, UnknownOffset});
// No need to check for WriteOnly state, since ReachSet is symmetric
} else {
// If Src is not a param/return, add it to ValueMap
@ -378,9 +445,9 @@ static void populateExternalRelations(
else
DstLevel += FromLevel - ToLevel;
ExtRelations.push_back(
ExternalRelation{InterfaceValue{SrcIndex, SrcLevel},
InterfaceValue{DstIndex, DstLevel}});
ExtRelations.push_back(ExternalRelation{
InterfaceValue{SrcIndex, SrcLevel},
InterfaceValue{DstIndex, DstLevel}, UnknownOffset});
}
}
}
@ -423,27 +490,69 @@ AliasAttrs CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const {
}
bool CFLAndersAAResult::FunctionInfo::mayAlias(const Value *LHS,
const Value *RHS) const {
uint64_t LHSSize,
const Value *RHS,
uint64_t RHSSize) const {
assert(LHS && RHS);
// Check AliasAttrs first since it's cheaper
auto AttrsA = getAttrs(LHS);
auto AttrsB = getAttrs(RHS);
if (hasUnknownOrCallerAttr(AttrsA))
return AttrsB.any();
if (hasUnknownOrCallerAttr(AttrsB))
return AttrsA.any();
if (isGlobalOrArgAttr(AttrsA))
return isGlobalOrArgAttr(AttrsB);
if (isGlobalOrArgAttr(AttrsB))
return isGlobalOrArgAttr(AttrsA);
// At this point both LHS and RHS should point to locally allocated objects
auto Itr = AliasMap.find(LHS);
if (Itr != AliasMap.end()) {
if (std::binary_search(Itr->second.begin(), Itr->second.end(), RHS,
std::less<const Value *>()))
return true;
// Find out all (X, Offset) where X == RHS
auto Comparator = [](OffsetValue LHS, OffsetValue RHS) {
return std::less<const Value *>()(LHS.Val, RHS.Val);
};
#ifdef EXPENSIVE_CHECKS
assert(std::is_sorted(Itr->second.begin(), Itr->second.end(), Comparator));
#endif
auto RangePair = std::equal_range(Itr->second.begin(), Itr->second.end(),
OffsetValue{RHS, 0}, Comparator);
if (RangePair.first != RangePair.second) {
// Be conservative about UnknownSize
if (LHSSize == MemoryLocation::UnknownSize ||
RHSSize == MemoryLocation::UnknownSize)
return true;
for (const auto &OVal : make_range(RangePair)) {
// Be conservative about UnknownOffset
if (OVal.Offset == UnknownOffset)
return true;
// We know that LHS aliases (RHS + OVal.Offset) if the control flow
// reaches here. The may-alias query essentially becomes integer
// range-overlap queries over two ranges [OVal.Offset, OVal.Offset +
// LHSSize) and [0, RHSSize).
// Try to be conservative on super large offsets
if (LLVM_UNLIKELY(LHSSize > INT64_MAX || RHSSize > INT64_MAX))
return true;
auto LHSStart = OVal.Offset;
// FIXME: Do we need to guard against integer overflow?
auto LHSEnd = OVal.Offset + static_cast<int64_t>(LHSSize);
auto RHSStart = 0;
auto RHSEnd = static_cast<int64_t>(RHSSize);
if (LHSEnd > RHSStart && LHSStart < RHSEnd)
return true;
}
}
}
// Even if LHS and RHS are not reachable, they may still alias due to their
// AliasAttrs
auto AttrsA = getAttrs(LHS);
auto AttrsB = getAttrs(RHS);
if (AttrsA.none() || AttrsB.none())
return false;
if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB))
return true;
if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB))
return true;
return false;
}
@ -725,7 +834,7 @@ AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA,
auto &FunInfo = ensureCached(*Fn);
// AliasMap lookup
if (FunInfo->mayAlias(ValA, ValB))
if (FunInfo->mayAlias(ValA, LocA.Size, ValB, LocB.Size))
return MayAlias;
return NoAlias;
}

View File

@ -24,18 +24,6 @@
namespace llvm {
namespace cflaa {
// We use UnknownOffset to represent pointer offsets that cannot be determined
// at compile time. Note that MemoryLocation::UnknownSize cannot be used here
// because we require a signed value.
enum : int64_t { UnknownOffset = INT64_MAX };
inline int64_t addOffset(int64_t LHS, int64_t RHS) {
if (LHS == UnknownOffset || RHS == UnknownOffset)
return UnknownOffset;
// FIXME: Do we need to guard against integer overflow here?
return LHS + RHS;
}
/// \brief The Program Expression Graph (PEG) of CFL analysis
/// CFLGraph is auxiliary data structure used by CFL-based alias analysis to
/// describe flow-insensitive pointer-related behaviors. Given an LLVM function,

View File

@ -153,7 +153,7 @@ CFLSteensAAResult::FunctionInfo::FunctionInfo(
if (Itr != InterfaceMap.end()) {
if (CurrValue != Itr->second)
Summary.RetParamRelations.push_back(
ExternalRelation{CurrValue, Itr->second});
ExternalRelation{CurrValue, Itr->second, UnknownOffset});
break;
}