From f02c5f3478318075d1a469203900e452ba651421 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Fri, 26 Nov 2021 10:59:09 +0100 Subject: [PATCH] [Analyzer][solver] Do not remove the simplified symbol from the eq class Currently, during symbol simplification we remove the original member symbol from the equivalence class (`ClassMembers` trait). However, we keep the reverse link (`ClassMap` trait), in order to be able the query the related constraints even for the old member. This asymmetry can lead to a problem when we merge equivalence classes: ``` ClassA: [a, b] // ClassMembers trait, a->a, b->a // ClassMap trait, a is the representative symbol ``` Now lets delete `a`: ``` ClassA: [b] a->a, b->a ``` Let's merge the trivial class `c` into ClassA: ``` ClassA: [c, b] c->c, b->c, a->a ``` Now after the merge operation, `c` and `a` are actually in different equivalence classes, which is inconsistent. One solution to this problem is to simply avoid removing the original member and this is what this patch does. Other options I have considered: 1) Always merge the trivial class into the non-trivial class. This might work most of the time, however, will fail if we have to merge two non-trivial classes (in that case we no longer can track equivalences precisely). 2) In `removeMember`, update the reverse link as well. This would cease the inconsistency, but we'd loose precision since we could not query the constraints for the removed member. Differential Revision: https://reviews.llvm.org/D114619 --- .../Core/RangeConstraintManager.cpp | 54 +++++------------- .../expr-inspection-printState-eq-classes.c | 4 +- ...symbol-simplification-disequality-info.cpp | 57 ++++++++++--------- ...-simplification-fixpoint-one-iteration.cpp | 17 +++--- ...simplification-fixpoint-two-iterations.cpp | 25 ++++---- 5 files changed, 67 insertions(+), 90 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index 74403a160b8e..3f8d9e4298a0 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -601,10 +601,6 @@ public: LLVM_NODISCARD static inline Optional areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second); - /// Remove one member from the class. - LLVM_NODISCARD ProgramStateRef removeMember(ProgramStateRef State, - const SymbolRef Old); - /// Iterate over all symbols and try to simplify them. LLVM_NODISCARD static inline ProgramStateRef simplify(SValBuilder &SVB, RangeSet::Factory &F, @@ -2136,34 +2132,6 @@ inline Optional EquivalenceClass::areEqual(ProgramStateRef State, return llvm::None; } -LLVM_NODISCARD ProgramStateRef -EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) { - - SymbolSet ClsMembers = getClassMembers(State); - assert(ClsMembers.contains(Old)); - - // We don't remove `Old`'s Sym->Class relation for two reasons: - // 1) This way constraints for the old symbol can still be found via it's - // equivalence class that it used to be the member of. - // 2) Performance and resource reasons. We can spare one removal and thus one - // additional tree in the forest of `ClassMap`. - - // Remove `Old`'s Class->Sym relation. - SymbolSet::Factory &F = getMembersFactory(State); - ClassMembersTy::Factory &EMFactory = State->get_context(); - ClsMembers = F.remove(ClsMembers, Old); - // Ensure another precondition of the removeMember function (we can check - // this only with isEmpty, thus we have to do the remove first). - assert(!ClsMembers.isEmpty() && - "Class should have had at least two members before member removal"); - // Overwrite the existing members assigned to this class. - ClassMembersTy ClassMembersMap = State->get(); - ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers); - State = State->set(ClassMembersMap); - - return State; -} - // Re-evaluate an SVal with top-level `State->assume` logic. LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State, const RangeSet *Constraint, @@ -2228,8 +2196,6 @@ EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F, continue; assert(find(State, MemberSym) == find(State, SimplifiedMemberSym)); - // Remove the old and more complex symbol. - State = find(State, MemberSym).removeMember(State, MemberSym); // Query the class constraint again b/c that may have changed during the // merge above. @@ -2241,19 +2207,25 @@ EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F, // About performance and complexity: Let us assume that in a State we // have N non-trivial equivalence classes and that all constraints and // disequality info is related to non-trivial classes. In the worst case, - // we can simplify only one symbol of one class in each iteration. The - // number of symbols in one class cannot grow b/c we replace the old - // symbol with the simplified one. Also, the number of the equivalence - // classes can decrease only, b/c the algorithm does a merge operation - // optionally. We need N iterations in this case to reach the fixpoint. - // Thus, the steps needed to be done in the worst case is proportional to - // N*N. + // we can simplify only one symbol of one class in each iteration. // + // The number of the equivalence classes can decrease only, because the + // algorithm does a merge operation optionally. + // ASSUMPTION G: Let us assume that the + // number of symbols in one class cannot grow because we replace the old + // symbol with the simplified one. + // If assumption G holds then we need N iterations in this case to reach + // the fixpoint. Thus, the steps needed to be done in the worst case is + // proportional to N*N. // This worst case scenario can be extended to that case when we have // trivial classes in the constraints and in the disequality map. This // case can be reduced to the case with a State where there are only // non-trivial classes. This is because a merge operation on two trivial // classes results in one non-trivial class. + // + // Empirical measurements show that if we relax assumption G (i.e. if we + // do not replace the complex symbol just add the simplified one) then + // the runtime and memory consumption does not grow noticeably. State = reAssume(State, ClassConstraint, SimplifiedMemberVal); if (!State) return nullptr; diff --git a/clang/test/Analysis/expr-inspection-printState-eq-classes.c b/clang/test/Analysis/expr-inspection-printState-eq-classes.c index 08a1c6cbd60b..c56fcd627b0a 100644 --- a/clang/test/Analysis/expr-inspection-printState-eq-classes.c +++ b/clang/test/Analysis/expr-inspection-printState-eq-classes.c @@ -16,6 +16,6 @@ void test_equivalence_classes(int a, int b, int c, int d) { } // CHECK: "equivalence_classes": [ -// CHECK-NEXT: [ "(reg_$0) != (reg_$2)" ], -// CHECK-NEXT: [ "reg_$0", "reg_$2", "reg_$3" ] +// CHECK-NEXT: [ "((reg_$0) + (reg_$1)) != (reg_$2)", "(reg_$0) != (reg_$2)" ], +// CHECK-NEXT: [ "(reg_$0) + (reg_$1)", "reg_$0", "reg_$2", "reg_$3" ] // CHECK-NEXT: ], diff --git a/clang/test/Analysis/symbol-simplification-disequality-info.cpp b/clang/test/Analysis/symbol-simplification-disequality-info.cpp index 69238b583eb8..45d8c05bcfc5 100644 --- a/clang/test/Analysis/symbol-simplification-disequality-info.cpp +++ b/clang/test/Analysis/symbol-simplification-disequality-info.cpp @@ -12,18 +12,18 @@ void test(int a, int b, int c, int d) { if (a + b + c == d) return; clang_analyzer_printState(); - // CHECK: "disequality_info": [ - // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "((reg_$0) + (reg_$1)) + (reg_$2)" ], - // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "reg_$3" ]] - // CHECK-NEXT: }, - // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "reg_$3" ], - // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) + (reg_$2)" ]] - // CHECK-NEXT: } - // CHECK-NEXT: ], + // CHECK: "disequality_info": [ + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "((reg_$0) + (reg_$1)) + (reg_$2)" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "reg_$3" ]] + // CHECK-NEXT: }, + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "reg_$3" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) + (reg_$2)" ]] + // CHECK-NEXT: } + // CHECK-NEXT: ], // Simplification starts here. @@ -32,32 +32,33 @@ void test(int a, int b, int c, int d) { clang_analyzer_printState(); // CHECK: "disequality_info": [ // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "(reg_$0) + (reg_$2)" ], + // CHECK-NEXT: "class": [ "((reg_$0) + (reg_$1)) + (reg_$2)", "(reg_$0) + (reg_$2)" ], // CHECK-NEXT: "disequal_to": [ // CHECK-NEXT: [ "reg_$3" ]] // CHECK-NEXT: }, // CHECK-NEXT: { // CHECK-NEXT: "class": [ "reg_$3" ], // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "(reg_$0) + (reg_$2)" ]] - // CHECK-NEXT: } - // CHECK-NEXT: ], + // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) + (reg_$2)", "(reg_$0) + (reg_$2)" ]] + // CHECK-NEXT: } + // CHECK-NEXT: ], if (c != 0) return; clang_analyzer_printState(); - // CHECK: "disequality_info": [ - // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "reg_$0" ], - // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "reg_$3" ]] - // CHECK-NEXT: }, - // CHECK-NEXT: { - // CHECK-NEXT: "class": [ "reg_$3" ], - // CHECK-NEXT: "disequal_to": [ - // CHECK-NEXT: [ "reg_$0" ]] - // CHECK-NEXT: } - // CHECK-NEXT: ], + + // CHECK: "disequality_info": [ + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "((reg_$0) + (reg_$1)) + (reg_$2)", "(reg_$0) + (reg_$2)", "reg_$0" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "reg_$3" ]] + // CHECK-NEXT: }, + // CHECK-NEXT: { + // CHECK-NEXT: "class": [ "reg_$3" ], + // CHECK-NEXT: "disequal_to": [ + // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) + (reg_$2)", "(reg_$0) + (reg_$2)", "reg_$0" ]] + // CHECK-NEXT: } + // CHECK-NEXT: ], // Keep the symbols and the constraints! alive. (void)(a * b * c * d); diff --git a/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp b/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp index 73922d420a8c..aec0da1b82f1 100644 --- a/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp +++ b/clang/test/Analysis/symbol-simplification-fixpoint-one-iteration.cpp @@ -24,14 +24,15 @@ void test(int a, int b, int c) { if (b != 0) return; clang_analyzer_printState(); - // CHECK: "constraints": [ - // CHECK-NEXT: { "symbol": "(reg_$0) != (reg_$2)", "range": "{ [0, 0] }" }, - // CHECK-NEXT: { "symbol": "reg_$1", "range": "{ [0, 0] }" } - // CHECK-NEXT: ], - // CHECK-NEXT: "equivalence_classes": [ - // CHECK-NEXT: [ "(reg_$0) != (reg_$2)" ], - // CHECK-NEXT: [ "reg_$0", "reg_$2" ] - // CHECK-NEXT: ], + // CHECK: "constraints": [ + // CHECK-NEXT: { "symbol": "((reg_$0) + (reg_$1)) != (reg_$2)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "(reg_$0) != (reg_$2)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "reg_$1", "range": "{ [0, 0] }" } + // CHECK-NEXT: ], + // CHECK-NEXT: "equivalence_classes": [ + // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) != (reg_$2)", "(reg_$0) != (reg_$2)" ], + // CHECK-NEXT: [ "(reg_$0) + (reg_$1)", "reg_$0", "reg_$2" ] + // CHECK-NEXT: ], // CHECK-NEXT: "disequality_info": null, // Keep the symbols and the constraints! alive. diff --git a/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp b/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp index 679ed3fda7a7..f1b057be3abd 100644 --- a/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp +++ b/clang/test/Analysis/symbol-simplification-fixpoint-two-iterations.cpp @@ -27,17 +27,20 @@ void test(int a, int b, int c, int d) { if (b != 0) return; clang_analyzer_printState(); - // CHECK: "constraints": [ - // CHECK-NEXT: { "symbol": "(reg_$0) != (reg_$3)", "range": "{ [0, 0] }" }, - // CHECK-NEXT: { "symbol": "reg_$1", "range": "{ [0, 0] }" }, - // CHECK-NEXT: { "symbol": "reg_$2", "range": "{ [0, 0] }" } - // CHECK-NEXT: ], - // CHECK-NEXT: "equivalence_classes": [ - // CHECK-NEXT: [ "(reg_$0) != (reg_$3)" ], - // CHECK-NEXT: [ "reg_$0", "reg_$3" ], - // CHECK-NEXT: [ "reg_$2" ] - // CHECK-NEXT: ], - // CHECK-NEXT: "disequality_info": null, + // CHECK: "constraints": [ + // CHECK-NEXT: { "symbol": "(((reg_$0) + (reg_$1)) + (reg_$2)) != (reg_$3)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "((reg_$0) + (reg_$2)) != (reg_$3)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "(reg_$0) != (reg_$3)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "(reg_$2) + (reg_$1)", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "reg_$1", "range": "{ [0, 0] }" }, + // CHECK-NEXT: { "symbol": "reg_$2", "range": "{ [0, 0] }" } + // CHECK-NEXT: ], + // CHECK-NEXT: "equivalence_classes": [ + // CHECK-NEXT: [ "(((reg_$0) + (reg_$1)) + (reg_$2)) != (reg_$3)", "((reg_$0) + (reg_$2)) != (reg_$3)", "(reg_$0) != (reg_$3)" ], + // CHECK-NEXT: [ "((reg_$0) + (reg_$1)) + (reg_$2)", "(reg_$0) + (reg_$2)", "reg_$0", "reg_$3" ], + // CHECK-NEXT: [ "(reg_$2) + (reg_$1)", "reg_$2" ] + // CHECK-NEXT: ], + // CHECK-NEXT: "disequality_info": null, // Keep the symbols and the constraints! alive. (void)(a * b * c * d);