forked from OSchip/llvm-project
[analyzer] Prefer wrapping SymbolicRegions by ElementRegions
It turns out that in certain cases `SymbolRegions` are wrapped by `ElementRegions`; in others, it's not. This discrepancy can cause the analyzer not to recognize if the two regions are actually referring to the same entity, which then can lead to unreachable paths discovered. Consider this example: ```lang=C++ struct Node { int* ptr; }; void with_structs(Node* n1) { Node c = *n1; // copy Node* n2 = &c; clang_analyzer_dump(*n1); // lazy... clang_analyzer_dump(*n2); // lazy... clang_analyzer_dump(n1->ptr); // rval(n1->ptr): reg_$2<int * SymRegion{reg_$0<struct Node * n1>}.ptr> clang_analyzer_dump(n2->ptr); // rval(n2->ptr): reg_$1<int * Element{SymRegion{reg_$0<struct Node * n1>},0 S64b,struct Node}.ptr> clang_analyzer_eval(n1->ptr != n2->ptr); // UNKNOWN, bad! (void)(*n1); (void)(*n2); } ``` The copy of `n1` will insert a new binding to the store; but for doing that it actually must create a `TypedValueRegion` which it could pass to the `LazyCompoundVal`. Since the memregion in question is a `SymbolicRegion` - which is untyped, it needs to first wrap it into an `ElementRegion` basically implementing this untyped -> typed conversion for the sake of passing it to the `LazyCompoundVal`. So, this is why we have `Element{SymRegion{.}, 0,struct Node}` for `n1`. The problem appears if the analyzer evaluates a read from the expression `n1->ptr`. The same logic won't apply for `SymbolRegionValues`, since they accept raw `SubRegions`, hence the `SymbolicRegion` won't be wrapped into an `ElementRegion` in that case. Later when we arrive at the equality comparison, we cannot prove that they are equal. For more details check the corresponding thread on discourse: https://discourse.llvm.org/t/are-symbolicregions-really-untyped/64406 --- In this patch, I'm eagerly wrapping each `SymbolicRegion` by an `ElementRegion`; basically canonicalizing to this form. It seems reasonable to do so since any object can be thought of as a single array of that object; so this should not make much of a difference. The tests also underpin this assumption, as only a few were broken by this change; and actually fixed a FIXME along the way. About the second example, which does the same copy operation - but on the heap - it will be fixed by the next patch. Reviewed By: martong Differential Revision: https://reviews.llvm.org/D132142
This commit is contained in:
parent
7ed68182d7
commit
f8643a9b31
|
@ -42,6 +42,18 @@ private:
|
|||
return false;
|
||||
}
|
||||
|
||||
bool isThisObject(const ElementRegion *R) {
|
||||
if (const auto *Idx = R->getIndex().getAsInteger()) {
|
||||
if (const auto *SR = R->getSuperRegion()->getAs<SymbolicRegion>()) {
|
||||
QualType Ty = SR->getPointeeStaticType();
|
||||
bool IsNotReinterpretCast = R->getValueType() == Ty;
|
||||
if (Idx->isZero() && IsNotReinterpretCast)
|
||||
return isThisObject(SR);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public:
|
||||
SValExplainer(ASTContext &Ctx) : ACtx(Ctx) {}
|
||||
|
||||
|
@ -144,7 +156,7 @@ public:
|
|||
// Add the relevant code once it does.
|
||||
|
||||
std::string VisitSymbolicRegion(const SymbolicRegion *R) {
|
||||
// Explain 'this' object here.
|
||||
// Explain 'this' object here - if it's not wrapped by an ElementRegion.
|
||||
// TODO: Explain CXXThisRegion itself, find a way to test it.
|
||||
if (isThisObject(R))
|
||||
return "'this' object";
|
||||
|
@ -174,6 +186,13 @@ public:
|
|||
std::string VisitElementRegion(const ElementRegion *R) {
|
||||
std::string Str;
|
||||
llvm::raw_string_ostream OS(Str);
|
||||
|
||||
// Explain 'this' object here.
|
||||
// They are represented by a SymRegion wrapped by an ElementRegion; so
|
||||
// match and handle it here.
|
||||
if (isThisObject(R))
|
||||
return "'this' object";
|
||||
|
||||
OS << "element of type '" << R->getElementType() << "' with index ";
|
||||
// For concrete index: omit type of the index integer.
|
||||
if (auto I = R->getIndex().getAs<nonloc::ConcreteInt>())
|
||||
|
|
|
@ -788,6 +788,18 @@ public:
|
|||
/// It might return null.
|
||||
SymbolRef getSymbol() const { return sym; }
|
||||
|
||||
/// Gets the type of the wrapped symbol.
|
||||
/// This type might not be accurate at all times - it's just our best guess.
|
||||
/// Consider these cases:
|
||||
/// void foo(void *data, char *str, base *obj) {...}
|
||||
/// The type of the pointee of `data` is of course not `void`, yet that's our
|
||||
/// best guess. `str` might point to any object and `obj` might point to some
|
||||
/// derived instance. `TypedRegions` other hand are representing the cases
|
||||
/// when we actually know their types.
|
||||
QualType getPointeeStaticType() const {
|
||||
return sym->getType()->getPointeeType();
|
||||
}
|
||||
|
||||
bool isBoundable() const override { return true; }
|
||||
|
||||
void Profile(llvm::FoldingSetNodeID& ID) const override;
|
||||
|
|
|
@ -354,8 +354,7 @@ void ExprInspectionChecker::analyzerDumpElementCount(const CallExpr *CE,
|
|||
if (const auto *TVR = MR->getAs<TypedValueRegion>()) {
|
||||
ElementTy = TVR->getValueType();
|
||||
} else {
|
||||
ElementTy =
|
||||
MR->castAs<SymbolicRegion>()->getSymbol()->getType()->getPointeeType();
|
||||
ElementTy = MR->castAs<SymbolicRegion>()->getPointeeStaticType();
|
||||
}
|
||||
|
||||
assert(!ElementTy->isPointerType());
|
||||
|
|
|
@ -285,8 +285,11 @@ NullabilityChecker::getTrackRegion(SVal Val, bool CheckSuperRegion) const {
|
|||
const MemRegion *Region = RegionSVal->getRegion();
|
||||
|
||||
if (CheckSuperRegion) {
|
||||
if (auto FieldReg = Region->getAs<FieldRegion>())
|
||||
if (const SubRegion *FieldReg = Region->getAs<FieldRegion>()) {
|
||||
if (const auto *ER = dyn_cast<ElementRegion>(FieldReg->getSuperRegion()))
|
||||
FieldReg = ER;
|
||||
return dyn_cast<SymbolicRegion>(FieldReg->getSuperRegion());
|
||||
}
|
||||
if (auto ElementReg = Region->getAs<ElementRegion>())
|
||||
return dyn_cast<SymbolicRegion>(ElementReg->getSuperRegion());
|
||||
}
|
||||
|
|
|
@ -2495,7 +2495,7 @@ public:
|
|||
// what is written inside the pointer.
|
||||
bool CanDereference = true;
|
||||
if (const auto *SR = L->getRegionAs<SymbolicRegion>()) {
|
||||
if (SR->getSymbol()->getType()->getPointeeType()->isVoidType())
|
||||
if (SR->getPointeeStaticType()->isVoidType())
|
||||
CanDereference = false;
|
||||
} else if (L->getRegionAs<AllocaRegion>())
|
||||
CanDereference = false;
|
||||
|
|
|
@ -3354,6 +3354,14 @@ void ExprEngine::VisitMemberExpr(const MemberExpr *M, ExplodedNode *Pred,
|
|||
SVal baseExprVal =
|
||||
MR ? loc::MemRegionVal(MR) : state->getSVal(BaseExpr, LCtx);
|
||||
|
||||
// FIXME: Copied from RegionStoreManager::bind()
|
||||
if (const auto *SR =
|
||||
dyn_cast_or_null<SymbolicRegion>(baseExprVal.getAsRegion())) {
|
||||
QualType T = SR->getPointeeStaticType();
|
||||
baseExprVal =
|
||||
loc::MemRegionVal(getStoreManager().GetElementZeroRegion(SR, T));
|
||||
}
|
||||
|
||||
const auto *field = cast<FieldDecl>(Member);
|
||||
SVal L = state->getLValue(field, baseExprVal);
|
||||
|
||||
|
|
|
@ -1485,7 +1485,7 @@ static RegionOffset calculateOffset(const MemRegion *R) {
|
|||
// If our base region is symbolic, we don't know what type it really is.
|
||||
// Pretend the type of the symbol is the true dynamic type.
|
||||
// (This will at least be self-consistent for the life of the symbol.)
|
||||
Ty = SR->getSymbol()->getType()->getPointeeType();
|
||||
Ty = SR->getPointeeStaticType();
|
||||
RootIsSymbolic = true;
|
||||
}
|
||||
|
||||
|
|
|
@ -1421,7 +1421,7 @@ SVal RegionStoreManager::getBinding(RegionBindingsConstRef B, Loc L, QualType T)
|
|||
if (const TypedRegion *TR = dyn_cast<TypedRegion>(MR))
|
||||
T = TR->getLocationType()->getPointeeType();
|
||||
else if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(MR))
|
||||
T = SR->getSymbol()->getType()->getPointeeType();
|
||||
T = SR->getPointeeStaticType();
|
||||
}
|
||||
assert(!T.isNull() && "Unable to auto-detect binding type!");
|
||||
assert(!T->isVoidType() && "Attempting to dereference a void pointer!");
|
||||
|
@ -2390,15 +2390,10 @@ RegionStoreManager::bind(RegionBindingsConstRef B, Loc L, SVal V) {
|
|||
return bindAggregate(B, TR, V);
|
||||
}
|
||||
|
||||
if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R)) {
|
||||
// Binding directly to a symbolic region should be treated as binding
|
||||
// to element 0.
|
||||
QualType T = SR->getSymbol()->getType();
|
||||
if (T->isAnyPointerType() || T->isReferenceType())
|
||||
T = T->getPointeeType();
|
||||
|
||||
R = GetElementZeroRegion(SR, T);
|
||||
}
|
||||
// Binding directly to a symbolic region should be treated as binding
|
||||
// to element 0.
|
||||
if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R))
|
||||
R = GetElementZeroRegion(SR, SR->getPointeeStaticType());
|
||||
|
||||
assert((!isa<CXXThisRegion>(R) || !B.lookup(R)) &&
|
||||
"'this' pointer is not an l-value and is not assignable");
|
||||
|
|
|
@ -218,9 +218,7 @@ namespace PODUninitialized {
|
|||
// Make sure that p4.x contains a symbol after copy.
|
||||
if (p4.x > 0)
|
||||
clang_analyzer_eval(p4.x > 0); // expected-warning{{TRUE}}
|
||||
// FIXME: Element region gets in the way, so these aren't the same symbols
|
||||
// as they should be.
|
||||
clang_analyzer_eval(pp.x == p4.x); // expected-warning{{UNKNOWN}}
|
||||
clang_analyzer_eval(pp.x == p4.x); // expected-warning{{TRUE}}
|
||||
|
||||
PODWrapper w;
|
||||
w.p.y = 1;
|
||||
|
|
|
@ -55,6 +55,6 @@ struct S {
|
|||
|
||||
void test_field_dumps(struct S s, struct S *p) {
|
||||
clang_analyzer_dump_pointer(&s.x); // expected-warning{{&s.x}}
|
||||
clang_analyzer_dump_pointer(&p->x); // expected-warning{{&SymRegion{reg_$1<struct S * p>}.x}}
|
||||
clang_analyzer_dump_pointer(&p->x); // expected-warning{{&Element{SymRegion{reg_$1<struct S * p>},0 S64b,struct S}.x}}
|
||||
clang_analyzer_dumpSvalType_pointer(&s.x); // expected-warning {{int *}}
|
||||
}
|
||||
|
|
|
@ -65,7 +65,7 @@ void field_ref(S a) {
|
|||
}
|
||||
|
||||
void field_ptr(S *a) {
|
||||
clang_analyzer_dump(&a->f); // expected-warning {{SymRegion{reg_$0<S * a>}.f}}
|
||||
clang_analyzer_dump(&a->f); // expected-warning {{Element{SymRegion{reg_$0<S * a>},0 S64b,struct S}.f}}
|
||||
clang_analyzer_dumpExtent(&a->f); // expected-warning {{4 S64b}}
|
||||
clang_analyzer_dumpElementCount(&a->f); // expected-warning {{1 S64b}}
|
||||
}
|
||||
|
|
|
@ -340,11 +340,11 @@ struct s {
|
|||
void struct_pointer_canon(struct s *ps) {
|
||||
struct s ss = *ps;
|
||||
clang_analyzer_dump((*ps).v);
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<struct s * ps>}.v>}}
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<struct s * ps>},0 S64b,struct s}.v>}}
|
||||
clang_analyzer_dump(ps[0].v);
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<struct s * ps>}.v>}}
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<struct s * ps>},0 S64b,struct s}.v>}}
|
||||
clang_analyzer_dump(ps->v);
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<struct s * ps>}.v>}}
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<struct s * ps>},0 S64b,struct s}.v>}}
|
||||
clang_analyzer_eval((*ps).v == ps[0].v); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval((*ps).v == ps->v); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(ps[0].v == ps->v); // expected-warning{{TRUE}}
|
||||
|
@ -360,11 +360,11 @@ typedef struct s T2;
|
|||
void struct_pointer_canon_typedef(T1 *ps) {
|
||||
T2 ss = *ps;
|
||||
clang_analyzer_dump((*ps).v);
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<T1 * ps>}.v>}}
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<T1 * ps>},0 S64b,struct s}.v>}}
|
||||
clang_analyzer_dump(ps[0].v);
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<T1 * ps>}.v>}}
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<T1 * ps>},0 S64b,struct s}.v>}}
|
||||
clang_analyzer_dump(ps->v);
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<T1 * ps>}.v>}}
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<T1 * ps>},0 S64b,struct s}.v>}}
|
||||
clang_analyzer_eval((*ps).v == ps[0].v); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval((*ps).v == ps->v); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(ps[0].v == ps->v); // expected-warning{{TRUE}}
|
||||
|
@ -378,11 +378,11 @@ void struct_pointer_canon_bidim_typedef(T1 **ps) {
|
|||
void struct_pointer_canon_const(const struct s *ps) {
|
||||
struct s ss = *ps;
|
||||
clang_analyzer_dump((*ps).v);
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<const struct s * ps>}.v>}}
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<const struct s * ps>},0 S64b,struct s}.v>}}
|
||||
clang_analyzer_dump(ps[0].v);
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<const struct s * ps>}.v>}}
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<const struct s * ps>},0 S64b,struct s}.v>}}
|
||||
clang_analyzer_dump(ps->v);
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<const struct s * ps>}.v>}}
|
||||
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<const struct s * ps>},0 S64b,struct s}.v>}}
|
||||
clang_analyzer_eval((*ps).v == ps[0].v); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval((*ps).v == ps->v); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(ps[0].v == ps->v); // expected-warning{{TRUE}}
|
||||
|
|
|
@ -134,10 +134,10 @@ struct parse_t {
|
|||
int parse(parse_t *p) {
|
||||
unsigned copy = p->bits2;
|
||||
clang_analyzer_dump(copy);
|
||||
// expected-warning@-1 {{reg_$1<unsigned int SymRegion{reg_$0<parse_t * p>}.bits2>}}
|
||||
// expected-warning@-1 {{reg_$1<unsigned int Element{SymRegion{reg_$0<parse_t * p>},0 S64b,struct Bug_55934::parse_t}.bits2>}}
|
||||
header *bits = (header *)©
|
||||
clang_analyzer_dump(bits->b);
|
||||
// expected-warning@-1 {{derived_$2{reg_$1<unsigned int SymRegion{reg_$0<parse_t * p>}.bits2>,Element{copy,0 S64b,struct Bug_55934::header}.b}}}
|
||||
// expected-warning@-1 {{derived_$2{reg_$1<unsigned int Element{SymRegion{reg_$0<parse_t * p>},0 S64b,struct Bug_55934::parse_t}.bits2>,Element{copy,0 S64b,struct Bug_55934::header}.b}}}
|
||||
return bits->b; // no-warning
|
||||
}
|
||||
} // namespace Bug_55934
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify %s
|
||||
|
||||
template <typename T> void clang_analyzer_dump(T);
|
||||
void clang_analyzer_warnIfReached();
|
||||
|
||||
struct Node { int* ptr; };
|
||||
|
||||
void copy_on_stack(Node* n1) {
|
||||
Node tmp = *n1;
|
||||
Node* n2 = &tmp;
|
||||
clang_analyzer_dump(n1); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}<Node * n1>}}}
|
||||
clang_analyzer_dump(n2); // expected-warning {{&tmp}}
|
||||
|
||||
clang_analyzer_dump(n1->ptr); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}<int * Element{SymRegion{reg_${{[0-9]+}}<Node * n1>},0 S64b,struct Node}.ptr>}}}
|
||||
clang_analyzer_dump(n2->ptr); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}<int * Element{SymRegion{reg_${{[0-9]+}}<Node * n1>},0 S64b,struct Node}.ptr>}}}
|
||||
|
||||
if (n1->ptr != n2->ptr)
|
||||
clang_analyzer_warnIfReached(); // unreachable
|
||||
(void)(n1->ptr);
|
||||
(void)(n2->ptr);
|
||||
}
|
||||
|
||||
void copy_on_heap(Node* n1) {
|
||||
Node* n2 = new Node(*n1);
|
||||
|
||||
clang_analyzer_dump(n1); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}<Node * n1>}}}
|
||||
clang_analyzer_dump(n2); // expected-warning-re {{&HeapSymRegion{conj_${{[0-9]+}}{Node *, LC{{[0-9]+}}, S{{[0-9]+}}, #{{[0-9]+}}}}}}
|
||||
|
||||
clang_analyzer_dump(n1->ptr); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}<int * Element{SymRegion{reg_${{[0-9]+}}<Node * n1>},0 S64b,struct Node}.ptr>}}}
|
||||
clang_analyzer_dump(n2->ptr); // expected-warning {{Unknown}} FIXME: This should be the same as above.
|
||||
|
||||
if (n1->ptr != n2->ptr)
|
||||
clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} FIXME: This should not be reachable.
|
||||
(void)(n1->ptr);
|
||||
(void)(n2->ptr);
|
||||
}
|
Loading…
Reference in New Issue