From f8643a9b31c4029942f67d4534c9139b45173504 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Tue, 13 Sep 2022 08:58:46 +0200 Subject: [PATCH] [analyzer] Prefer wrapping SymbolicRegions by ElementRegions It turns out that in certain cases `SymbolRegions` are wrapped by `ElementRegions`; in others, it's not. This discrepancy can cause the analyzer not to recognize if the two regions are actually referring to the same entity, which then can lead to unreachable paths discovered. Consider this example: ```lang=C++ struct Node { int* ptr; }; void with_structs(Node* n1) { Node c = *n1; // copy Node* n2 = &c; clang_analyzer_dump(*n1); // lazy... clang_analyzer_dump(*n2); // lazy... clang_analyzer_dump(n1->ptr); // rval(n1->ptr): reg_$2}.ptr> clang_analyzer_dump(n2->ptr); // rval(n2->ptr): reg_$1},0 S64b,struct Node}.ptr> clang_analyzer_eval(n1->ptr != n2->ptr); // UNKNOWN, bad! (void)(*n1); (void)(*n2); } ``` The copy of `n1` will insert a new binding to the store; but for doing that it actually must create a `TypedValueRegion` which it could pass to the `LazyCompoundVal`. Since the memregion in question is a `SymbolicRegion` - which is untyped, it needs to first wrap it into an `ElementRegion` basically implementing this untyped -> typed conversion for the sake of passing it to the `LazyCompoundVal`. So, this is why we have `Element{SymRegion{.}, 0,struct Node}` for `n1`. The problem appears if the analyzer evaluates a read from the expression `n1->ptr`. The same logic won't apply for `SymbolRegionValues`, since they accept raw `SubRegions`, hence the `SymbolicRegion` won't be wrapped into an `ElementRegion` in that case. Later when we arrive at the equality comparison, we cannot prove that they are equal. For more details check the corresponding thread on discourse: https://discourse.llvm.org/t/are-symbolicregions-really-untyped/64406 --- In this patch, I'm eagerly wrapping each `SymbolicRegion` by an `ElementRegion`; basically canonicalizing to this form. It seems reasonable to do so since any object can be thought of as a single array of that object; so this should not make much of a difference. The tests also underpin this assumption, as only a few were broken by this change; and actually fixed a FIXME along the way. About the second example, which does the same copy operation - but on the heap - it will be fixed by the next patch. Reviewed By: martong Differential Revision: https://reviews.llvm.org/D132142 --- .../StaticAnalyzer/Checkers/SValExplainer.h | 21 ++++++++++- .../Core/PathSensitive/MemRegion.h | 12 +++++++ .../Checkers/ExprInspectionChecker.cpp | 3 +- .../Checkers/NullabilityChecker.cpp | 5 ++- .../Core/BugReporterVisitors.cpp | 2 +- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 8 +++++ clang/lib/StaticAnalyzer/Core/MemRegion.cpp | 2 +- clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 15 +++----- clang/test/Analysis/ctor.mm | 4 +-- clang/test/Analysis/expr-inspection.c | 2 +- clang/test/Analysis/memory-model.cpp | 2 +- clang/test/Analysis/ptr-arith.c | 18 +++++----- clang/test/Analysis/ptr-arith.cpp | 4 +-- clang/test/Analysis/trivial-copy-struct.cpp | 36 +++++++++++++++++++ 14 files changed, 102 insertions(+), 32 deletions(-) create mode 100644 clang/test/Analysis/trivial-copy-struct.cpp diff --git a/clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h b/clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h index 4c9c8239113e..3ae28c1dba3e 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h +++ b/clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h @@ -42,6 +42,18 @@ private: return false; } + bool isThisObject(const ElementRegion *R) { + if (const auto *Idx = R->getIndex().getAsInteger()) { + if (const auto *SR = R->getSuperRegion()->getAs()) { + QualType Ty = SR->getPointeeStaticType(); + bool IsNotReinterpretCast = R->getValueType() == Ty; + if (Idx->isZero() && IsNotReinterpretCast) + return isThisObject(SR); + } + } + return false; + } + public: SValExplainer(ASTContext &Ctx) : ACtx(Ctx) {} @@ -144,7 +156,7 @@ public: // Add the relevant code once it does. std::string VisitSymbolicRegion(const SymbolicRegion *R) { - // Explain 'this' object here. + // Explain 'this' object here - if it's not wrapped by an ElementRegion. // TODO: Explain CXXThisRegion itself, find a way to test it. if (isThisObject(R)) return "'this' object"; @@ -174,6 +186,13 @@ public: std::string VisitElementRegion(const ElementRegion *R) { std::string Str; llvm::raw_string_ostream OS(Str); + + // Explain 'this' object here. + // They are represented by a SymRegion wrapped by an ElementRegion; so + // match and handle it here. + if (isThisObject(R)) + return "'this' object"; + OS << "element of type '" << R->getElementType() << "' with index "; // For concrete index: omit type of the index integer. if (auto I = R->getIndex().getAs()) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h index 34d44f709883..aa21ef272f20 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h @@ -788,6 +788,18 @@ public: /// It might return null. SymbolRef getSymbol() const { return sym; } + /// Gets the type of the wrapped symbol. + /// This type might not be accurate at all times - it's just our best guess. + /// Consider these cases: + /// void foo(void *data, char *str, base *obj) {...} + /// The type of the pointee of `data` is of course not `void`, yet that's our + /// best guess. `str` might point to any object and `obj` might point to some + /// derived instance. `TypedRegions` other hand are representing the cases + /// when we actually know their types. + QualType getPointeeStaticType() const { + return sym->getType()->getPointeeType(); + } + bool isBoundable() const override { return true; } void Profile(llvm::FoldingSetNodeID& ID) const override; diff --git a/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp index ec1b0a70d7d3..195a5fa97743 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp @@ -354,8 +354,7 @@ void ExprInspectionChecker::analyzerDumpElementCount(const CallExpr *CE, if (const auto *TVR = MR->getAs()) { ElementTy = TVR->getValueType(); } else { - ElementTy = - MR->castAs()->getSymbol()->getType()->getPointeeType(); + ElementTy = MR->castAs()->getPointeeStaticType(); } assert(!ElementTy->isPointerType()); diff --git a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp index 8b4004d059e8..a016eba29a0d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp @@ -285,8 +285,11 @@ NullabilityChecker::getTrackRegion(SVal Val, bool CheckSuperRegion) const { const MemRegion *Region = RegionSVal->getRegion(); if (CheckSuperRegion) { - if (auto FieldReg = Region->getAs()) + if (const SubRegion *FieldReg = Region->getAs()) { + if (const auto *ER = dyn_cast(FieldReg->getSuperRegion())) + FieldReg = ER; return dyn_cast(FieldReg->getSuperRegion()); + } if (auto ElementReg = Region->getAs()) return dyn_cast(ElementReg->getSuperRegion()); } diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index 336376b78ce8..3796ad51694a 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -2495,7 +2495,7 @@ public: // what is written inside the pointer. bool CanDereference = true; if (const auto *SR = L->getRegionAs()) { - if (SR->getSymbol()->getType()->getPointeeType()->isVoidType()) + if (SR->getPointeeStaticType()->isVoidType()) CanDereference = false; } else if (L->getRegionAs()) CanDereference = false; diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 99aa4c506f4e..b1514a5473f5 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -3354,6 +3354,14 @@ void ExprEngine::VisitMemberExpr(const MemberExpr *M, ExplodedNode *Pred, SVal baseExprVal = MR ? loc::MemRegionVal(MR) : state->getSVal(BaseExpr, LCtx); + // FIXME: Copied from RegionStoreManager::bind() + if (const auto *SR = + dyn_cast_or_null(baseExprVal.getAsRegion())) { + QualType T = SR->getPointeeStaticType(); + baseExprVal = + loc::MemRegionVal(getStoreManager().GetElementZeroRegion(SR, T)); + } + const auto *field = cast(Member); SVal L = state->getLValue(field, baseExprVal); diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp index 482862a23030..d25e40dcefee 100644 --- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -1485,7 +1485,7 @@ static RegionOffset calculateOffset(const MemRegion *R) { // If our base region is symbolic, we don't know what type it really is. // Pretend the type of the symbol is the true dynamic type. // (This will at least be self-consistent for the life of the symbol.) - Ty = SR->getSymbol()->getType()->getPointeeType(); + Ty = SR->getPointeeStaticType(); RootIsSymbolic = true; } diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 4670ae0fb248..58f3b1cebee6 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -1421,7 +1421,7 @@ SVal RegionStoreManager::getBinding(RegionBindingsConstRef B, Loc L, QualType T) if (const TypedRegion *TR = dyn_cast(MR)) T = TR->getLocationType()->getPointeeType(); else if (const SymbolicRegion *SR = dyn_cast(MR)) - T = SR->getSymbol()->getType()->getPointeeType(); + T = SR->getPointeeStaticType(); } assert(!T.isNull() && "Unable to auto-detect binding type!"); assert(!T->isVoidType() && "Attempting to dereference a void pointer!"); @@ -2390,15 +2390,10 @@ RegionStoreManager::bind(RegionBindingsConstRef B, Loc L, SVal V) { return bindAggregate(B, TR, V); } - if (const SymbolicRegion *SR = dyn_cast(R)) { - // Binding directly to a symbolic region should be treated as binding - // to element 0. - QualType T = SR->getSymbol()->getType(); - if (T->isAnyPointerType() || T->isReferenceType()) - T = T->getPointeeType(); - - R = GetElementZeroRegion(SR, T); - } + // Binding directly to a symbolic region should be treated as binding + // to element 0. + if (const SymbolicRegion *SR = dyn_cast(R)) + R = GetElementZeroRegion(SR, SR->getPointeeStaticType()); assert((!isa(R) || !B.lookup(R)) && "'this' pointer is not an l-value and is not assignable"); diff --git a/clang/test/Analysis/ctor.mm b/clang/test/Analysis/ctor.mm index 012e0b1d55f5..fb385833df9c 100644 --- a/clang/test/Analysis/ctor.mm +++ b/clang/test/Analysis/ctor.mm @@ -218,9 +218,7 @@ namespace PODUninitialized { // Make sure that p4.x contains a symbol after copy. if (p4.x > 0) clang_analyzer_eval(p4.x > 0); // expected-warning{{TRUE}} - // FIXME: Element region gets in the way, so these aren't the same symbols - // as they should be. - clang_analyzer_eval(pp.x == p4.x); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(pp.x == p4.x); // expected-warning{{TRUE}} PODWrapper w; w.p.y = 1; diff --git a/clang/test/Analysis/expr-inspection.c b/clang/test/Analysis/expr-inspection.c index 57cd2fc165b3..47ac0c251c1f 100644 --- a/clang/test/Analysis/expr-inspection.c +++ b/clang/test/Analysis/expr-inspection.c @@ -55,6 +55,6 @@ struct S { void test_field_dumps(struct S s, struct S *p) { clang_analyzer_dump_pointer(&s.x); // expected-warning{{&s.x}} - clang_analyzer_dump_pointer(&p->x); // expected-warning{{&SymRegion{reg_$1}.x}} + clang_analyzer_dump_pointer(&p->x); // expected-warning{{&Element{SymRegion{reg_$1},0 S64b,struct S}.x}} clang_analyzer_dumpSvalType_pointer(&s.x); // expected-warning {{int *}} } diff --git a/clang/test/Analysis/memory-model.cpp b/clang/test/Analysis/memory-model.cpp index ee5d4d4d656e..4aa88f97477e 100644 --- a/clang/test/Analysis/memory-model.cpp +++ b/clang/test/Analysis/memory-model.cpp @@ -65,7 +65,7 @@ void field_ref(S a) { } void field_ptr(S *a) { - clang_analyzer_dump(&a->f); // expected-warning {{SymRegion{reg_$0}.f}} + clang_analyzer_dump(&a->f); // expected-warning {{Element{SymRegion{reg_$0},0 S64b,struct S}.f}} clang_analyzer_dumpExtent(&a->f); // expected-warning {{4 S64b}} clang_analyzer_dumpElementCount(&a->f); // expected-warning {{1 S64b}} } diff --git a/clang/test/Analysis/ptr-arith.c b/clang/test/Analysis/ptr-arith.c index 8b20d6726f7d..40c8188704e8 100644 --- a/clang/test/Analysis/ptr-arith.c +++ b/clang/test/Analysis/ptr-arith.c @@ -340,11 +340,11 @@ struct s { void struct_pointer_canon(struct s *ps) { struct s ss = *ps; clang_analyzer_dump((*ps).v); - // expected-warning-re@-1{{reg_${{[[:digit:]]+}}}.v>}} + // expected-warning-re@-1{{reg_${{[[:digit:]]+}}},0 S64b,struct s}.v>}} clang_analyzer_dump(ps[0].v); - // expected-warning-re@-1{{reg_${{[[:digit:]]+}}}.v>}} + // expected-warning-re@-1{{reg_${{[[:digit:]]+}}},0 S64b,struct s}.v>}} clang_analyzer_dump(ps->v); - // expected-warning-re@-1{{reg_${{[[:digit:]]+}}}.v>}} + // expected-warning-re@-1{{reg_${{[[:digit:]]+}}},0 S64b,struct s}.v>}} clang_analyzer_eval((*ps).v == ps[0].v); // expected-warning{{TRUE}} clang_analyzer_eval((*ps).v == ps->v); // expected-warning{{TRUE}} clang_analyzer_eval(ps[0].v == ps->v); // expected-warning{{TRUE}} @@ -360,11 +360,11 @@ typedef struct s T2; void struct_pointer_canon_typedef(T1 *ps) { T2 ss = *ps; clang_analyzer_dump((*ps).v); - // expected-warning-re@-1{{reg_${{[[:digit:]]+}}}.v>}} + // expected-warning-re@-1{{reg_${{[[:digit:]]+}}},0 S64b,struct s}.v>}} clang_analyzer_dump(ps[0].v); - // expected-warning-re@-1{{reg_${{[[:digit:]]+}}}.v>}} + // expected-warning-re@-1{{reg_${{[[:digit:]]+}}},0 S64b,struct s}.v>}} clang_analyzer_dump(ps->v); - // expected-warning-re@-1{{reg_${{[[:digit:]]+}}}.v>}} + // expected-warning-re@-1{{reg_${{[[:digit:]]+}}},0 S64b,struct s}.v>}} clang_analyzer_eval((*ps).v == ps[0].v); // expected-warning{{TRUE}} clang_analyzer_eval((*ps).v == ps->v); // expected-warning{{TRUE}} clang_analyzer_eval(ps[0].v == ps->v); // expected-warning{{TRUE}} @@ -378,11 +378,11 @@ void struct_pointer_canon_bidim_typedef(T1 **ps) { void struct_pointer_canon_const(const struct s *ps) { struct s ss = *ps; clang_analyzer_dump((*ps).v); - // expected-warning-re@-1{{reg_${{[[:digit:]]+}}}.v>}} + // expected-warning-re@-1{{reg_${{[[:digit:]]+}}},0 S64b,struct s}.v>}} clang_analyzer_dump(ps[0].v); - // expected-warning-re@-1{{reg_${{[[:digit:]]+}}}.v>}} + // expected-warning-re@-1{{reg_${{[[:digit:]]+}}},0 S64b,struct s}.v>}} clang_analyzer_dump(ps->v); - // expected-warning-re@-1{{reg_${{[[:digit:]]+}}}.v>}} + // expected-warning-re@-1{{reg_${{[[:digit:]]+}}},0 S64b,struct s}.v>}} clang_analyzer_eval((*ps).v == ps[0].v); // expected-warning{{TRUE}} clang_analyzer_eval((*ps).v == ps->v); // expected-warning{{TRUE}} clang_analyzer_eval(ps[0].v == ps->v); // expected-warning{{TRUE}} diff --git a/clang/test/Analysis/ptr-arith.cpp b/clang/test/Analysis/ptr-arith.cpp index fd0cc3d907fe..e44939768960 100644 --- a/clang/test/Analysis/ptr-arith.cpp +++ b/clang/test/Analysis/ptr-arith.cpp @@ -134,10 +134,10 @@ struct parse_t { int parse(parse_t *p) { unsigned copy = p->bits2; clang_analyzer_dump(copy); - // expected-warning@-1 {{reg_$1}.bits2>}} + // expected-warning@-1 {{reg_$1},0 S64b,struct Bug_55934::parse_t}.bits2>}} header *bits = (header *)© clang_analyzer_dump(bits->b); - // expected-warning@-1 {{derived_$2{reg_$1}.bits2>,Element{copy,0 S64b,struct Bug_55934::header}.b}}} + // expected-warning@-1 {{derived_$2{reg_$1},0 S64b,struct Bug_55934::parse_t}.bits2>,Element{copy,0 S64b,struct Bug_55934::header}.b}}} return bits->b; // no-warning } } // namespace Bug_55934 diff --git a/clang/test/Analysis/trivial-copy-struct.cpp b/clang/test/Analysis/trivial-copy-struct.cpp new file mode 100644 index 000000000000..85efe854c38e --- /dev/null +++ b/clang/test/Analysis/trivial-copy-struct.cpp @@ -0,0 +1,36 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify %s + +template void clang_analyzer_dump(T); +void clang_analyzer_warnIfReached(); + +struct Node { int* ptr; }; + +void copy_on_stack(Node* n1) { + Node tmp = *n1; + Node* n2 = &tmp; + clang_analyzer_dump(n1); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}}}} + clang_analyzer_dump(n2); // expected-warning {{&tmp}} + + clang_analyzer_dump(n1->ptr); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}},0 S64b,struct Node}.ptr>}}} + clang_analyzer_dump(n2->ptr); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}},0 S64b,struct Node}.ptr>}}} + + if (n1->ptr != n2->ptr) + clang_analyzer_warnIfReached(); // unreachable + (void)(n1->ptr); + (void)(n2->ptr); +} + +void copy_on_heap(Node* n1) { + Node* n2 = new Node(*n1); + + clang_analyzer_dump(n1); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}}}} + clang_analyzer_dump(n2); // expected-warning-re {{&HeapSymRegion{conj_${{[0-9]+}}{Node *, LC{{[0-9]+}}, S{{[0-9]+}}, #{{[0-9]+}}}}}} + + clang_analyzer_dump(n1->ptr); // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}},0 S64b,struct Node}.ptr>}}} + clang_analyzer_dump(n2->ptr); // expected-warning {{Unknown}} FIXME: This should be the same as above. + + if (n1->ptr != n2->ptr) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} FIXME: This should not be reachable. + (void)(n1->ptr); + (void)(n2->ptr); +}