[analyzer] Prefer wrapping SymbolicRegions by ElementRegions

It turns out that in certain cases `SymbolRegions` are wrapped by
`ElementRegions`; in others, it's not. This discrepancy can cause the
analyzer not to recognize if the two regions are actually referring to
the same entity, which then can lead to unreachable paths discovered.

Consider this example:

```lang=C++
struct Node { int* ptr; };
void with_structs(Node* n1) {
  Node c = *n1; // copy
  Node* n2 = &c;
  clang_analyzer_dump(*n1); // lazy...
  clang_analyzer_dump(*n2); // lazy...
  clang_analyzer_dump(n1->ptr); // rval(n1->ptr): reg_$2<int * SymRegion{reg_$0<struct Node * n1>}.ptr>
  clang_analyzer_dump(n2->ptr); // rval(n2->ptr): reg_$1<int * Element{SymRegion{reg_$0<struct Node * n1>},0 S64b,struct Node}.ptr>
  clang_analyzer_eval(n1->ptr != n2->ptr); // UNKNOWN, bad!
  (void)(*n1);
  (void)(*n2);
}
```

The copy of `n1` will insert a new binding to the store; but for doing
that it actually must create a `TypedValueRegion` which it could pass to
the `LazyCompoundVal`. Since the memregion in question is a
`SymbolicRegion` - which is untyped, it needs to first wrap it into an
`ElementRegion` basically implementing this untyped -> typed conversion
for the sake of passing it to the `LazyCompoundVal`.
So, this is why we have `Element{SymRegion{.}, 0,struct Node}` for `n1`.

The problem appears if the analyzer evaluates a read from the expression
`n1->ptr`. The same logic won't apply for `SymbolRegionValues`, since
they accept raw `SubRegions`, hence the `SymbolicRegion` won't be
wrapped into an `ElementRegion` in that case.

Later when we arrive at the equality comparison, we cannot prove that
they are equal.

For more details check the corresponding thread on discourse:
https://discourse.llvm.org/t/are-symbolicregions-really-untyped/64406

---

In this patch, I'm eagerly wrapping each `SymbolicRegion` by an
`ElementRegion`; basically canonicalizing to this form.
It seems reasonable to do so since any object can be thought of as a single
array of that object; so this should not make much of a difference.

The tests also underpin this assumption, as only a few were broken by
this change; and actually fixed a FIXME along the way.

About the second example, which does the same copy operation - but on
the heap - it will be fixed by the next patch.

Reviewed By: martong

Differential Revision: https://reviews.llvm.org/D132142
This commit is contained in:
Balazs Benics 2022-09-13 08:58:46 +02:00
parent 7ed68182d7
commit f8643a9b31
14 changed files with 102 additions and 32 deletions

View File

@ -42,6 +42,18 @@ private:
return false;
}
bool isThisObject(const ElementRegion *R) {
if (const auto *Idx = R->getIndex().getAsInteger()) {
if (const auto *SR = R->getSuperRegion()->getAs<SymbolicRegion>()) {
QualType Ty = SR->getPointeeStaticType();
bool IsNotReinterpretCast = R->getValueType() == Ty;
if (Idx->isZero() && IsNotReinterpretCast)
return isThisObject(SR);
}
}
return false;
}
public:
SValExplainer(ASTContext &Ctx) : ACtx(Ctx) {}
@ -144,7 +156,7 @@ public:
// Add the relevant code once it does.
std::string VisitSymbolicRegion(const SymbolicRegion *R) {
// Explain 'this' object here.
// Explain 'this' object here - if it's not wrapped by an ElementRegion.
// TODO: Explain CXXThisRegion itself, find a way to test it.
if (isThisObject(R))
return "'this' object";
@ -174,6 +186,13 @@ public:
std::string VisitElementRegion(const ElementRegion *R) {
std::string Str;
llvm::raw_string_ostream OS(Str);
// Explain 'this' object here.
// They are represented by a SymRegion wrapped by an ElementRegion; so
// match and handle it here.
if (isThisObject(R))
return "'this' object";
OS << "element of type '" << R->getElementType() << "' with index ";
// For concrete index: omit type of the index integer.
if (auto I = R->getIndex().getAs<nonloc::ConcreteInt>())

View File

@ -788,6 +788,18 @@ public:
/// It might return null.
SymbolRef getSymbol() const { return sym; }
/// Gets the type of the wrapped symbol.
/// This type might not be accurate at all times - it's just our best guess.
/// Consider these cases:
/// void foo(void *data, char *str, base *obj) {...}
/// The type of the pointee of `data` is of course not `void`, yet that's our
/// best guess. `str` might point to any object and `obj` might point to some
/// derived instance. `TypedRegions` other hand are representing the cases
/// when we actually know their types.
QualType getPointeeStaticType() const {
return sym->getType()->getPointeeType();
}
bool isBoundable() const override { return true; }
void Profile(llvm::FoldingSetNodeID& ID) const override;

View File

@ -354,8 +354,7 @@ void ExprInspectionChecker::analyzerDumpElementCount(const CallExpr *CE,
if (const auto *TVR = MR->getAs<TypedValueRegion>()) {
ElementTy = TVR->getValueType();
} else {
ElementTy =
MR->castAs<SymbolicRegion>()->getSymbol()->getType()->getPointeeType();
ElementTy = MR->castAs<SymbolicRegion>()->getPointeeStaticType();
}
assert(!ElementTy->isPointerType());

View File

@ -285,8 +285,11 @@ NullabilityChecker::getTrackRegion(SVal Val, bool CheckSuperRegion) const {
const MemRegion *Region = RegionSVal->getRegion();
if (CheckSuperRegion) {
if (auto FieldReg = Region->getAs<FieldRegion>())
if (const SubRegion *FieldReg = Region->getAs<FieldRegion>()) {
if (const auto *ER = dyn_cast<ElementRegion>(FieldReg->getSuperRegion()))
FieldReg = ER;
return dyn_cast<SymbolicRegion>(FieldReg->getSuperRegion());
}
if (auto ElementReg = Region->getAs<ElementRegion>())
return dyn_cast<SymbolicRegion>(ElementReg->getSuperRegion());
}

View File

@ -2495,7 +2495,7 @@ public:
// what is written inside the pointer.
bool CanDereference = true;
if (const auto *SR = L->getRegionAs<SymbolicRegion>()) {
if (SR->getSymbol()->getType()->getPointeeType()->isVoidType())
if (SR->getPointeeStaticType()->isVoidType())
CanDereference = false;
} else if (L->getRegionAs<AllocaRegion>())
CanDereference = false;

View File

@ -3354,6 +3354,14 @@ void ExprEngine::VisitMemberExpr(const MemberExpr *M, ExplodedNode *Pred,
SVal baseExprVal =
MR ? loc::MemRegionVal(MR) : state->getSVal(BaseExpr, LCtx);
// FIXME: Copied from RegionStoreManager::bind()
if (const auto *SR =
dyn_cast_or_null<SymbolicRegion>(baseExprVal.getAsRegion())) {
QualType T = SR->getPointeeStaticType();
baseExprVal =
loc::MemRegionVal(getStoreManager().GetElementZeroRegion(SR, T));
}
const auto *field = cast<FieldDecl>(Member);
SVal L = state->getLValue(field, baseExprVal);

View File

@ -1485,7 +1485,7 @@ static RegionOffset calculateOffset(const MemRegion *R) {
// If our base region is symbolic, we don't know what type it really is.
// Pretend the type of the symbol is the true dynamic type.
// (This will at least be self-consistent for the life of the symbol.)
Ty = SR->getSymbol()->getType()->getPointeeType();
Ty = SR->getPointeeStaticType();
RootIsSymbolic = true;
}

View File

@ -1421,7 +1421,7 @@ SVal RegionStoreManager::getBinding(RegionBindingsConstRef B, Loc L, QualType T)
if (const TypedRegion *TR = dyn_cast<TypedRegion>(MR))
T = TR->getLocationType()->getPointeeType();
else if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(MR))
T = SR->getSymbol()->getType()->getPointeeType();
T = SR->getPointeeStaticType();
}
assert(!T.isNull() && "Unable to auto-detect binding type!");
assert(!T->isVoidType() && "Attempting to dereference a void pointer!");
@ -2390,15 +2390,10 @@ RegionStoreManager::bind(RegionBindingsConstRef B, Loc L, SVal V) {
return bindAggregate(B, TR, V);
}
if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R)) {
// Binding directly to a symbolic region should be treated as binding
// to element 0.
QualType T = SR->getSymbol()->getType();
if (T->isAnyPointerType() || T->isReferenceType())
T = T->getPointeeType();
R = GetElementZeroRegion(SR, T);
}
// Binding directly to a symbolic region should be treated as binding
// to element 0.
if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R))
R = GetElementZeroRegion(SR, SR->getPointeeStaticType());
assert((!isa<CXXThisRegion>(R) || !B.lookup(R)) &&
"'this' pointer is not an l-value and is not assignable");

View File

@ -218,9 +218,7 @@ namespace PODUninitialized {
// Make sure that p4.x contains a symbol after copy.
if (p4.x > 0)
clang_analyzer_eval(p4.x > 0); // expected-warning{{TRUE}}
// FIXME: Element region gets in the way, so these aren't the same symbols
// as they should be.
clang_analyzer_eval(pp.x == p4.x); // expected-warning{{UNKNOWN}}
clang_analyzer_eval(pp.x == p4.x); // expected-warning{{TRUE}}
PODWrapper w;
w.p.y = 1;

View File

@ -55,6 +55,6 @@ struct S {
void test_field_dumps(struct S s, struct S *p) {
clang_analyzer_dump_pointer(&s.x); // expected-warning{{&s.x}}
clang_analyzer_dump_pointer(&p->x); // expected-warning{{&SymRegion{reg_$1<struct S * p>}.x}}
clang_analyzer_dump_pointer(&p->x); // expected-warning{{&Element{SymRegion{reg_$1<struct S * p>},0 S64b,struct S}.x}}
clang_analyzer_dumpSvalType_pointer(&s.x); // expected-warning {{int *}}
}

View File

@ -65,7 +65,7 @@ void field_ref(S a) {
}
void field_ptr(S *a) {
clang_analyzer_dump(&a->f); // expected-warning {{SymRegion{reg_$0<S * a>}.f}}
clang_analyzer_dump(&a->f); // expected-warning {{Element{SymRegion{reg_$0<S * a>},0 S64b,struct S}.f}}
clang_analyzer_dumpExtent(&a->f); // expected-warning {{4 S64b}}
clang_analyzer_dumpElementCount(&a->f); // expected-warning {{1 S64b}}
}

View File

@ -340,11 +340,11 @@ struct s {
void struct_pointer_canon(struct s *ps) {
struct s ss = *ps;
clang_analyzer_dump((*ps).v);
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<struct s * ps>}.v>}}
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<struct s * ps>},0 S64b,struct s}.v>}}
clang_analyzer_dump(ps[0].v);
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<struct s * ps>}.v>}}
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<struct s * ps>},0 S64b,struct s}.v>}}
clang_analyzer_dump(ps->v);
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<struct s * ps>}.v>}}
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<struct s * ps>},0 S64b,struct s}.v>}}
clang_analyzer_eval((*ps).v == ps[0].v); // expected-warning{{TRUE}}
clang_analyzer_eval((*ps).v == ps->v); // expected-warning{{TRUE}}
clang_analyzer_eval(ps[0].v == ps->v); // expected-warning{{TRUE}}
@ -360,11 +360,11 @@ typedef struct s T2;
void struct_pointer_canon_typedef(T1 *ps) {
T2 ss = *ps;
clang_analyzer_dump((*ps).v);
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<T1 * ps>}.v>}}
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<T1 * ps>},0 S64b,struct s}.v>}}
clang_analyzer_dump(ps[0].v);
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<T1 * ps>}.v>}}
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<T1 * ps>},0 S64b,struct s}.v>}}
clang_analyzer_dump(ps->v);
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<T1 * ps>}.v>}}
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<T1 * ps>},0 S64b,struct s}.v>}}
clang_analyzer_eval((*ps).v == ps[0].v); // expected-warning{{TRUE}}
clang_analyzer_eval((*ps).v == ps->v); // expected-warning{{TRUE}}
clang_analyzer_eval(ps[0].v == ps->v); // expected-warning{{TRUE}}
@ -378,11 +378,11 @@ void struct_pointer_canon_bidim_typedef(T1 **ps) {
void struct_pointer_canon_const(const struct s *ps) {
struct s ss = *ps;
clang_analyzer_dump((*ps).v);
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<const struct s * ps>}.v>}}
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<const struct s * ps>},0 S64b,struct s}.v>}}
clang_analyzer_dump(ps[0].v);
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<const struct s * ps>}.v>}}
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<const struct s * ps>},0 S64b,struct s}.v>}}
clang_analyzer_dump(ps->v);
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int SymRegion{reg_${{[[:digit:]]+}}<const struct s * ps>}.v>}}
// expected-warning-re@-1{{reg_${{[[:digit:]]+}}<int Element{SymRegion{reg_${{[[:digit:]]+}}<const struct s * ps>},0 S64b,struct s}.v>}}
clang_analyzer_eval((*ps).v == ps[0].v); // expected-warning{{TRUE}}
clang_analyzer_eval((*ps).v == ps->v); // expected-warning{{TRUE}}
clang_analyzer_eval(ps[0].v == ps->v); // expected-warning{{TRUE}}

View File

@ -134,10 +134,10 @@ struct parse_t {
int parse(parse_t *p) {
unsigned copy = p->bits2;
clang_analyzer_dump(copy);
// expected-warning@-1 {{reg_$1<unsigned int SymRegion{reg_$0<parse_t * p>}.bits2>}}
// expected-warning@-1 {{reg_$1<unsigned int Element{SymRegion{reg_$0<parse_t * p>},0 S64b,struct Bug_55934::parse_t}.bits2>}}
header *bits = (header *)&copy;
clang_analyzer_dump(bits->b);
// expected-warning@-1 {{derived_$2{reg_$1<unsigned int SymRegion{reg_$0<parse_t * p>}.bits2>,Element{copy,0 S64b,struct Bug_55934::header}.b}}}
// expected-warning@-1 {{derived_$2{reg_$1<unsigned int Element{SymRegion{reg_$0<parse_t * p>},0 S64b,struct Bug_55934::parse_t}.bits2>,Element{copy,0 S64b,struct Bug_55934::header}.b}}}
return bits->b; // no-warning
}
} // namespace Bug_55934

View File

@ -0,0 +1,36 @@
// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify %s
template <typename T> void clang_analyzer_dump(T);
void clang_analyzer_warnIfReached();
struct Node { int* ptr; };
void copy_on_stack(Node* n1) {
Node tmp = *n1;
Node* n2 = &tmp;
clang_analyzer_dump(n1); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}<Node * n1>}}}
clang_analyzer_dump(n2); // expected-warning {{&tmp}}
clang_analyzer_dump(n1->ptr); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}<int * Element{SymRegion{reg_${{[0-9]+}}<Node * n1>},0 S64b,struct Node}.ptr>}}}
clang_analyzer_dump(n2->ptr); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}<int * Element{SymRegion{reg_${{[0-9]+}}<Node * n1>},0 S64b,struct Node}.ptr>}}}
if (n1->ptr != n2->ptr)
clang_analyzer_warnIfReached(); // unreachable
(void)(n1->ptr);
(void)(n2->ptr);
}
void copy_on_heap(Node* n1) {
Node* n2 = new Node(*n1);
clang_analyzer_dump(n1); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}<Node * n1>}}}
clang_analyzer_dump(n2); // expected-warning-re {{&HeapSymRegion{conj_${{[0-9]+}}{Node *, LC{{[0-9]+}}, S{{[0-9]+}}, #{{[0-9]+}}}}}}
clang_analyzer_dump(n1->ptr); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}<int * Element{SymRegion{reg_${{[0-9]+}}<Node * n1>},0 S64b,struct Node}.ptr>}}}
clang_analyzer_dump(n2->ptr); // expected-warning {{Unknown}} FIXME: This should be the same as above.
if (n1->ptr != n2->ptr)
clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} FIXME: This should not be reachable.
(void)(n1->ptr);
(void)(n2->ptr);
}