[analyzer] Improve loads from reinterpret-cast fields

Consider this example:

```lang=C++
struct header {
  unsigned a : 1;
  unsigned b : 1;
};
struct parse_t {
  unsigned bits0 : 1;
  unsigned bits2 : 2; // <-- header
  unsigned bits4 : 4;
};
int parse(parse_t *p) {
  unsigned copy = p->bits2;
  clang_analyzer_dump(copy);
  // expected-warning@-1 {{reg_$1<unsigned int SymRegion{reg_$0<struct Bug_55934::parse_t * p>}.bits2>}}

  header *bits = (header *)&copy;
  clang_analyzer_dump(bits->b); // <--- Was UndefinedVal previously.
  // expected-warning@-1 {{derived_$2{reg_$1<unsigned int SymRegion{reg_$0<struct Bug_55934::parse_t * p>}.bits2>,Element{copy,0 S64b,struct Bug_55934::header}.b}}}
  return bits->b; // no-warning: it's not UndefinedVal
}
```

`bits->b` should have the same content as the second bit of `p->bits2`
(assuming that the bitfields are in spelling order).

---

The `Store` has the correct bindings. The problem is with the load of `bits->b`.
It will eventually reach `RegionStoreManager::getBindingForField()` with
`Element{copy,0 S64b,struct header}.b`, which is a `FieldRegion`.
It did not find any direct bindings, so the `getBindingForFieldOrElementCommon()`
gets called. That won't find any bindings, but it sees that the variable
is on the //stack//, thus it must be an uninitialized local variable;
thus it returns `UndefinedVal`.

Instead of doing this, it should have created a //derived symbol//
representing the slice of the region corresponding to the member.
So, if the value of `copy` is `reg1`, then the value of `bits->b` should
be `derived{reg1, elem{copy,0, header}.b}`.

Actually, the `getBindingForElement()` already does exactly this for
reinterpret-casts, so I decided to hoist that and reuse the logic.

Fixes #55934

Reviewed By: martong

Differential Revision: https://reviews.llvm.org/D128535
This commit is contained in:
Balazs Benics 2022-07-26 12:31:21 +02:00
parent 15ddc09ef9
commit a80418eec0
2 changed files with 71 additions and 20 deletions

View File

@ -1888,6 +1888,30 @@ SVal RegionStoreManager::getSValFromStringLiteral(const StringLiteral *SL,
return svalBuilder.makeIntVal(Code, ElemT);
}
static Optional<SVal> getDerivedSymbolForBinding(
RegionBindingsConstRef B, const TypedValueRegion *BaseRegion,
const TypedValueRegion *SubReg, const ASTContext &Ctx, SValBuilder &SVB) {
assert(BaseRegion);
QualType BaseTy = BaseRegion->getValueType();
QualType Ty = SubReg->getValueType();
if (BaseTy->isScalarType() && Ty->isScalarType()) {
if (Ctx.getTypeSizeInChars(BaseTy) >= Ctx.getTypeSizeInChars(Ty)) {
if (const Optional<SVal> &ParentValue = B.getDirectBinding(BaseRegion)) {
if (SymbolRef ParentValueAsSym = ParentValue->getAsSymbol())
return SVB.getDerivedRegionValueSymbolVal(ParentValueAsSym, SubReg);
if (ParentValue->isUndef())
return UndefinedVal();
// Other cases: give up. We are indexing into a larger object
// that has some value, but we don't know how to handle that yet.
return UnknownVal();
}
}
}
return None;
}
SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B,
const ElementRegion* R) {
// Check if the region has a binding.
@ -1932,27 +1956,10 @@ SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B,
if (!O.getRegion())
return UnknownVal();
if (const TypedValueRegion *baseR =
dyn_cast_or_null<TypedValueRegion>(O.getRegion())) {
QualType baseT = baseR->getValueType();
if (baseT->isScalarType()) {
QualType elemT = R->getElementType();
if (elemT->isScalarType()) {
if (Ctx.getTypeSizeInChars(baseT) >= Ctx.getTypeSizeInChars(elemT)) {
if (const Optional<SVal> &V = B.getDirectBinding(superR)) {
if (SymbolRef parentSym = V->getAsSymbol())
return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R);
if (const TypedValueRegion *baseR = dyn_cast<TypedValueRegion>(O.getRegion()))
if (auto V = getDerivedSymbolForBinding(B, baseR, R, Ctx, svalBuilder))
return *V;
if (V->isUnknownOrUndef())
return *V;
// Other cases: give up. We are indexing into a larger object
// that has some value, but we don't know how to handle that yet.
return UnknownVal();
}
}
}
}
}
return getBindingForFieldOrElementCommon(B, R, R->getElementType());
}
@ -1988,6 +1995,26 @@ SVal RegionStoreManager::getBindingForField(RegionBindingsConstRef B,
}
}
// Handle the case where we are accessing into a larger scalar object.
// For example, this handles:
// struct header {
// unsigned a : 1;
// unsigned b : 1;
// };
// struct parse_t {
// unsigned bits0 : 1;
// unsigned bits2 : 2; // <-- header
// unsigned bits4 : 4;
// };
// int parse(parse_t *p) {
// unsigned copy = p->bits2;
// header *bits = (header *)&copy;
// return bits->b; <-- here
// }
if (const auto *Base = dyn_cast<TypedValueRegion>(R->getBaseRegion()))
if (auto V = getDerivedSymbolForBinding(B, Base, R, Ctx, svalBuilder))
return *V;
return getBindingForFieldOrElementCommon(B, R, Ty);
}

View File

@ -1,4 +1,7 @@
// RUN: %clang_analyze_cc1 -Wno-unused-value -std=c++14 -analyzer-checker=core,debug.ExprInspection,alpha.core.PointerArithm -verify %s
template <typename T> void clang_analyzer_dump(T);
struct X {
int *p;
int zero;
@ -117,3 +120,24 @@ bool integerAsPtrSubtractionNoCrash(char *p, __UINTPTR_TYPE__ m) {
auto n = p - reinterpret_cast<char*>((__UINTPTR_TYPE__)1);
return n == m;
}
namespace Bug_55934 {
struct header {
unsigned a : 1;
unsigned b : 1;
};
struct parse_t {
unsigned bits0 : 1;
unsigned bits2 : 2; // <-- header
unsigned bits4 : 4;
};
int parse(parse_t *p) {
unsigned copy = p->bits2;
clang_analyzer_dump(copy);
// expected-warning@-1 {{reg_$1<unsigned int SymRegion{reg_$0<struct Bug_55934::parse_t * p>}.bits2>}}
header *bits = (header *)&copy;
clang_analyzer_dump(bits->b);
// expected-warning@-1 {{derived_$2{reg_$1<unsigned int SymRegion{reg_$0<struct Bug_55934::parse_t * p>}.bits2>,Element{copy,0 S64b,struct Bug_55934::header}.b}}}
return bits->b; // no-warning
}
} // namespace Bug_55934