forked from OSchip/llvm-project
[analyzer] Retrieve a character from StringLiteral as an initializer for constant arrays.
Summary: Assuming that values of constant arrays never change, we can retrieve values for specific position(index) right from the initializer, if presented. Retrieve a character code by index from StringLiteral which is an initializer of constant arrays in global scope. This patch has a known issue of getting access to characters past the end of the literal. The declaration, in which the literal is used, is an implicit cast of kind `array-to-pointer`. The offset should be in literal length's bounds. This should be distinguished from the states in the Standard C++20 [dcl.init.string] 9.4.2.3. Example: const char arr[42] = "123"; char c = arr[41]; // OK const char * const str = "123"; char c = str[41]; // NOK Differential Revision: https://reviews.llvm.org/D107339
This commit is contained in:
parent
9faed889cf
commit
1deccd05ba
|
@ -441,6 +441,8 @@ public:
|
|||
RegionBindingsConstRef B, const VarRegion *VR, const ElementRegion *R);
|
||||
Optional<SVal> getSValFromInitListExpr(const InitListExpr *ILE,
|
||||
uint64_t Offset, QualType ElemT);
|
||||
SVal getSValFromStringLiteral(const StringLiteral *SL, uint64_t Offset,
|
||||
QualType ElemT);
|
||||
|
||||
public: // Part of public interface to class.
|
||||
|
||||
|
@ -1701,10 +1703,16 @@ Optional<SVal> RegionStoreManager::getConstantValFromConstArrayInitializer(
|
|||
// From here `Offset` is in the bounds.
|
||||
|
||||
// Handle InitListExpr.
|
||||
// Example:
|
||||
// const char arr[] = { 1, 2, 3 };
|
||||
if (const auto *ILE = dyn_cast<InitListExpr>(Init))
|
||||
return getSValFromInitListExpr(ILE, Offset, R->getElementType());
|
||||
|
||||
// FIXME: Handle StringLiteral.
|
||||
// Handle StringLiteral.
|
||||
// Example:
|
||||
// const char arr[] = "abc";
|
||||
if (const auto *SL = dyn_cast<StringLiteral>(Init))
|
||||
return getSValFromStringLiteral(SL, Offset, R->getElementType());
|
||||
|
||||
// FIXME: Handle CompoundLiteralExpr.
|
||||
|
||||
|
@ -1716,6 +1724,15 @@ RegionStoreManager::getSValFromInitListExpr(const InitListExpr *ILE,
|
|||
uint64_t Offset, QualType ElemT) {
|
||||
assert(ILE && "InitListExpr should not be null");
|
||||
|
||||
// C++20 [dcl.init.string] 9.4.2.1:
|
||||
// An array of ordinary character type [...] can be initialized by [...]
|
||||
// an appropriately-typed string-literal enclosed in braces.
|
||||
// Example:
|
||||
// const char arr[] = { "abc" };
|
||||
if (ILE->isStringLiteralInit())
|
||||
if (const auto *SL = dyn_cast<StringLiteral>(ILE->getInit(0)))
|
||||
return getSValFromStringLiteral(SL, Offset, ElemT);
|
||||
|
||||
// C++20 [expr.add] 9.4.17.5 (excerpt):
|
||||
// i-th array element is value-initialized for each k < i ≤ n,
|
||||
// where k is an expression-list size and n is an array extent.
|
||||
|
@ -1728,6 +1745,42 @@ RegionStoreManager::getSValFromInitListExpr(const InitListExpr *ILE,
|
|||
return svalBuilder.getConstantVal(E);
|
||||
}
|
||||
|
||||
/// Returns an SVal, if possible, for the specified position in a string
|
||||
/// literal.
|
||||
///
|
||||
/// \param SL The given string literal.
|
||||
/// \param Offset The unsigned offset. E.g. for the expression
|
||||
/// `char x = str[42];` an offset should be 42.
|
||||
/// E.g. for the string "abc" offset:
|
||||
/// - 1 returns SVal{b}, because it's the second position in the string.
|
||||
/// - 42 returns SVal{0}, because there's no explicit value at this
|
||||
/// position in the string.
|
||||
/// \param ElemT The type of the result SVal expression.
|
||||
///
|
||||
/// NOTE: We return `0` for every offset >= the literal length for array
|
||||
/// declarations, like:
|
||||
/// const char str[42] = "123"; // Literal length is 4.
|
||||
/// char c = str[41]; // Offset is 41.
|
||||
/// FIXME: Nevertheless, we can't do the same for pointer declaraions, like:
|
||||
/// const char * const str = "123"; // Literal length is 4.
|
||||
/// char c = str[41]; // Offset is 41. Returns `0`, but Undef
|
||||
/// // expected.
|
||||
/// It should be properly handled before reaching this point.
|
||||
/// The main problem is that we can't distinguish between these declarations,
|
||||
/// because in case of array we can get the Decl from VarRegion, but in case
|
||||
/// of pointer the region is a StringRegion, which doesn't contain a Decl.
|
||||
/// Possible solution could be passing an array extent along with the offset.
|
||||
SVal RegionStoreManager::getSValFromStringLiteral(const StringLiteral *SL,
|
||||
uint64_t Offset,
|
||||
QualType ElemT) {
|
||||
assert(SL && "StringLiteral should not be null");
|
||||
// C++20 [dcl.init.string] 9.4.2.3:
|
||||
// If there are fewer initializers than there are array elements, each
|
||||
// element not explicitly initialized shall be zero-initialized [dcl.init].
|
||||
uint32_t Code = (Offset >= SL->getLength()) ? 0 : SL->getCodeUnit(Offset);
|
||||
return svalBuilder.makeIntVal(Code, ElemT);
|
||||
}
|
||||
|
||||
SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B,
|
||||
const ElementRegion* R) {
|
||||
// Check if the region has a binding.
|
||||
|
@ -1739,26 +1792,17 @@ SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B,
|
|||
// Check if the region is an element region of a string literal.
|
||||
if (const StringRegion *StrR = dyn_cast<StringRegion>(superR)) {
|
||||
// FIXME: Handle loads from strings where the literal is treated as
|
||||
// an integer, e.g., *((unsigned int*)"hello")
|
||||
// an integer, e.g., *((unsigned int*)"hello"). Such loads are UB according
|
||||
// to C++20 7.2.1.11 [basic.lval].
|
||||
QualType T = Ctx.getAsArrayType(StrR->getValueType())->getElementType();
|
||||
if (!Ctx.hasSameUnqualifiedType(T, R->getElementType()))
|
||||
return UnknownVal();
|
||||
|
||||
const StringLiteral *Str = StrR->getStringLiteral();
|
||||
SVal Idx = R->getIndex();
|
||||
if (Optional<nonloc::ConcreteInt> CI = Idx.getAs<nonloc::ConcreteInt>()) {
|
||||
int64_t i = CI->getValue().getSExtValue();
|
||||
// Abort on string underrun. This can be possible by arbitrary
|
||||
// clients of getBindingForElement().
|
||||
if (i < 0)
|
||||
if (const auto CI = R->getIndex().getAs<nonloc::ConcreteInt>()) {
|
||||
const llvm::APSInt &Idx = CI->getValue();
|
||||
if (Idx < 0)
|
||||
return UndefinedVal();
|
||||
int64_t length = Str->getLength();
|
||||
// Technically, only i == length is guaranteed to be null.
|
||||
// However, such overflows should be caught before reaching this point;
|
||||
// the only time such an access would be made is if a string literal was
|
||||
// used to initialize a larger array.
|
||||
char c = (i >= length) ? '\0' : Str->getCodeUnit(i);
|
||||
return svalBuilder.makeIntVal(c, T);
|
||||
const StringLiteral *SL = StrR->getStringLiteral();
|
||||
return getSValFromStringLiteral(SL, Idx.getZExtValue(), T);
|
||||
}
|
||||
} else if (const VarRegion *VR = dyn_cast<VarRegion>(superR)) {
|
||||
if (Optional<SVal> V = getConstantValFromConstArrayInitializer(B, VR, R))
|
||||
|
|
|
@ -146,3 +146,110 @@ struct S2 {
|
|||
void struct_arr_index1() {
|
||||
clang_analyzer_eval(S2::arr_no_init[2]); // expected-warning{{UNKNOWN}}
|
||||
}
|
||||
|
||||
char const glob_arr6[5] = "123";
|
||||
void glob_array_index5() {
|
||||
clang_analyzer_eval(glob_arr6[0] == '1'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_arr6[1] == '2'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_arr6[2] == '3'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_arr6[3] == '\0'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_arr6[4] == '\0'); // expected-warning{{TRUE}}
|
||||
}
|
||||
|
||||
void glob_ptr_index3() {
|
||||
char const *ptr = glob_arr6;
|
||||
clang_analyzer_eval(ptr[-42] == '\0'); // expected-warning{{UNDEFINED}}
|
||||
clang_analyzer_eval(ptr[0] == '1'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(ptr[1] == '2'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(ptr[2] == '3'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(ptr[3] == '\0'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(ptr[4] == '\0'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(ptr[5] == '\0'); // expected-warning{{UNDEFINED}}
|
||||
clang_analyzer_eval(ptr[6] == '\0'); // expected-warning{{UNDEFINED}}
|
||||
}
|
||||
|
||||
void glob_invalid_index7() {
|
||||
int idx = -42;
|
||||
auto x = glob_arr6[idx]; // expected-warning{{garbage or undefined}}
|
||||
}
|
||||
|
||||
void glob_invalid_index8() {
|
||||
const char *ptr = glob_arr6;
|
||||
int idx = 42;
|
||||
auto x = ptr[idx]; // expected-warning{{garbage or undefined}}
|
||||
}
|
||||
|
||||
char const glob_arr7[5] = {"123"};
|
||||
void glob_array_index6() {
|
||||
clang_analyzer_eval(glob_arr7[0] == '1'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_arr7[1] == '2'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_arr7[2] == '3'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_arr7[3] == '\0'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_arr7[4] == '\0'); // expected-warning{{TRUE}}
|
||||
}
|
||||
|
||||
void glob_invalid_index9() {
|
||||
int idx = -42;
|
||||
auto x = glob_arr7[idx]; // expected-warning{{garbage or undefined}}
|
||||
}
|
||||
|
||||
void glob_invalid_index10() {
|
||||
const char *ptr = glob_arr7;
|
||||
int idx = 42;
|
||||
auto x = ptr[idx]; // expected-warning{{garbage or undefined}}
|
||||
}
|
||||
|
||||
char const *const glob_ptr8 = "123";
|
||||
void glob_ptr_index4() {
|
||||
clang_analyzer_eval(glob_ptr8[0] == '1'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_ptr8[1] == '2'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_ptr8[2] == '3'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_ptr8[3] == '\0'); // expected-warning{{TRUE}}
|
||||
// FIXME: Should be UNDEFINED.
|
||||
// We should take into account a declaration in which the literal is used.
|
||||
clang_analyzer_eval(glob_ptr8[4] == '\0'); // expected-warning{{TRUE}}
|
||||
}
|
||||
|
||||
void glob_invalid_index11() {
|
||||
int idx = -42;
|
||||
auto x = glob_ptr8[idx]; // expected-warning{{garbage or undefined}}
|
||||
}
|
||||
|
||||
void glob_invalid_index12() {
|
||||
int idx = 42;
|
||||
// FIXME: Should warn {{garbage or undefined}}
|
||||
// We should take into account a declaration in which the literal is used.
|
||||
auto x = glob_ptr8[idx]; // no-warning
|
||||
}
|
||||
|
||||
const char16_t *const glob_ptr9 = u"абв";
|
||||
void glob_ptr_index5() {
|
||||
clang_analyzer_eval(glob_ptr9[0] == u'а'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_ptr9[1] == u'б'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_ptr9[2] == u'в'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_ptr9[3] == '\0'); // expected-warning{{TRUE}}
|
||||
}
|
||||
|
||||
const char32_t *const glob_ptr10 = U"\U0001F607\U0001F608\U0001F609";
|
||||
void glob_ptr_index6() {
|
||||
clang_analyzer_eval(glob_ptr10[0] == U'\U0001F607'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_ptr10[1] == U'\U0001F608'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_ptr10[2] == U'\U0001F609'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_ptr10[3] == '\0'); // expected-warning{{TRUE}}
|
||||
}
|
||||
|
||||
const wchar_t *const glob_ptr11 = L"\123\u0041\xFF";
|
||||
void glob_ptr_index7() {
|
||||
clang_analyzer_eval(glob_ptr11[0] == L'\123'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_ptr11[1] == L'\u0041'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_ptr11[2] == L'\xFF'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_ptr11[3] == L'\0'); // expected-warning{{TRUE}}
|
||||
}
|
||||
|
||||
const char *const glob_ptr12 = u8"abc";
|
||||
void glob_ptr_index8() {
|
||||
clang_analyzer_eval(glob_ptr12[0] == 'a'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_ptr12[1] == 'b'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_ptr12[2] == 'c'); // expected-warning{{TRUE}}
|
||||
clang_analyzer_eval(glob_ptr12[3] == '\0'); // expected-warning{{TRUE}}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue