[clang] Introduce -fstrict-flex-arrays=<n> for stricter handling of flexible arrays

Some code [0] consider that trailing arrays are flexible, whatever their size.
Support for these legacy code has been introduced in
f8f6324983 but it prevents evaluation of
__builtin_object_size and __builtin_dynamic_object_size in some legit cases.

Introduce -fstrict-flex-arrays=<n> to have stricter conformance when it is
desirable.

n = 0: current behavior, any trailing array member is a flexible array. The default.
n = 1: any trailing array member of undefined, 0 or 1 size is a flexible array member
n = 2: any trailing array member of undefined or 0 size is a flexible array member

This takes into account two specificities of clang: array bounds as macro id
disqualify FAM, as well as non standard layout.

Similar patch for gcc discuss here: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101836

[0] https://docs.freebsd.org/en/books/developers-handbook/sockets/#sockets-essential-functions
This commit is contained in:
serge-sans-paille 2022-06-28 11:01:55 +02:00
parent 432cbd7827
commit f764dc99b3
17 changed files with 265 additions and 81 deletions

View File

@ -2647,6 +2647,12 @@ Enable unstable and experimental features
.. option:: -fuse-init-array, -fno-use-init-array
.. option:: -fstrict-flex-arrays=<arg>, -fno-strict-flex-arrays
Control which arrays are considered as flexible arrays members. <arg>
can be 1 (array of size 0, 1 and undefined are considered) or 2 (array of size 0
and undefined are considered).
.. option:: -fuse-ld=<arg>
.. option:: -fuse-line-directives, -fno-use-line-directives

View File

@ -68,6 +68,10 @@ Major New Features
Randomizing structure layout is a C-only feature.
- Clang now supports the ``-fstrict-flex-arrays=<arg>`` option to control which
array bounds lead to flexible array members. The option yields more accurate
``__builtin_object_size`` and ``__builtin_dynamic_object_size`` results in
most cases but may be overly conservative for some legacy code.
- Experimental support for HLSL has been added. The implementation is
incomplete and highly experimental. For more information about the ongoing
work to support HLSL see the `documentation

View File

@ -424,6 +424,7 @@ LANGOPT(PaddingOnUnsignedFixedPoint, 1, 0,
LANGOPT(RegisterStaticDestructors, 1, 1, "Register C++ static destructors")
LANGOPT(MatrixTypes, 1, 0, "Enable or disable the builtin matrix type")
LANGOPT(StrictFlexArrays, 2, 0, "Rely on strict definition of flexible arrays")
COMPATIBLE_VALUE_LANGOPT(MaxTokens, 32, 0, "Max number of tokens per TU or 0")

View File

@ -1140,6 +1140,12 @@ def fallow_unsupported : Flag<["-"], "fallow-unsupported">, Group<f_Group>;
def fapple_kext : Flag<["-"], "fapple-kext">, Group<f_Group>, Flags<[CC1Option]>,
HelpText<"Use Apple's kernel extensions ABI">,
MarshallingInfoFlag<LangOpts<"AppleKext">>;
def fstrict_flex_arrays_EQ : Joined<["-"], "fstrict-flex-arrays=">,Group<f_Group>,
MetaVarName<"<n>">, Values<"0,1,2">,
LangOpts<"StrictFlexArrays">,
Flags<[CC1Option]>,
HelpText<"Enable optimizations based on the strict definition of flexible arrays">,
MarshallingInfoInt<LangOpts<"StrictFlexArrays">>;
defm apple_pragma_pack : BoolFOption<"apple-pragma-pack",
LangOpts<"ApplePragmaPack">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Enable Apple gcc-compatible #pragma pack handling">,

View File

@ -1364,6 +1364,7 @@ public:
~MemRegionManager();
ASTContext &getContext() { return Ctx; }
const ASTContext &getContext() const { return Ctx; }
llvm::BumpPtrAllocator &getAllocator() { return A; }

View File

@ -11592,9 +11592,15 @@ static bool isUserWritingOffTheEnd(const ASTContext &Ctx, const LValue &LVal) {
// conservative with the last element in structs (if it's an array), so our
// current behavior is more compatible than an explicit list approach would
// be.
int StrictFlexArraysLevel = Ctx.getLangOpts().StrictFlexArrays;
return LVal.InvalidBase &&
Designator.Entries.size() == Designator.MostDerivedPathLength &&
Designator.MostDerivedIsArrayElement &&
(Designator.isMostDerivedAnUnsizedArray() ||
Designator.getMostDerivedArraySize() == 0 ||
(Designator.getMostDerivedArraySize() == 1 &&
StrictFlexArraysLevel < 2) ||
StrictFlexArraysLevel == 0) &&
isDesignatorAtObjectEnd(Ctx, LVal);
}

View File

@ -877,7 +877,8 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
/// Determine whether this expression refers to a flexible array member in a
/// struct. We disable array bounds checks for such members.
static bool isFlexibleArrayMemberExpr(const Expr *E) {
static bool isFlexibleArrayMemberExpr(const Expr *E,
unsigned StrictFlexArraysLevel) {
// For compatibility with existing code, we treat arrays of length 0 or
// 1 as flexible array members.
// FIXME: This is inconsistent with the warning code in SemaChecking. Unify
@ -886,6 +887,11 @@ static bool isFlexibleArrayMemberExpr(const Expr *E) {
if (const auto *CAT = dyn_cast<ConstantArrayType>(AT)) {
// FIXME: Sema doesn't treat [1] as a flexible array member if the bound
// was produced by macro expansion.
if (StrictFlexArraysLevel >= 2 && CAT->getSize().ugt(0))
return false;
// FIXME: While the default -fstrict-flex-arrays=0 permits Size>1 trailing
// arrays to be treated as flexible-array-members, we still emit ubsan
// checks as if they are not.
if (CAT->getSize().ugt(1))
return false;
} else if (!isa<IncompleteArrayType>(AT))
@ -900,8 +906,10 @@ static bool isFlexibleArrayMemberExpr(const Expr *E) {
if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
// FIXME: Sema doesn't treat a T[1] union member as a flexible array
// member, only a T[0] or T[] member gets that treatment.
// Under StrictFlexArraysLevel, obey c99+ that disallows FAM in union, see
// C11 6.7.2.1 §18
if (FD->getParent()->isUnion())
return true;
return StrictFlexArraysLevel < 2;
RecordDecl::field_iterator FI(
DeclContext::decl_iterator(const_cast<FieldDecl *>(FD)));
return ++FI == FD->getParent()->field_end();
@ -954,8 +962,10 @@ llvm::Value *CodeGenFunction::LoadPassedObjectSize(const Expr *E,
/// If Base is known to point to the start of an array, return the length of
/// that array. Return 0 if the length cannot be determined.
static llvm::Value *getArrayIndexingBound(
CodeGenFunction &CGF, const Expr *Base, QualType &IndexedType) {
static llvm::Value *getArrayIndexingBound(CodeGenFunction &CGF,
const Expr *Base,
QualType &IndexedType,
unsigned StrictFlexArraysLevel) {
// For the vector indexing extension, the bound is the number of elements.
if (const VectorType *VT = Base->getType()->getAs<VectorType>()) {
IndexedType = Base->getType();
@ -966,7 +976,7 @@ static llvm::Value *getArrayIndexingBound(
if (const auto *CE = dyn_cast<CastExpr>(Base)) {
if (CE->getCastKind() == CK_ArrayToPointerDecay &&
!isFlexibleArrayMemberExpr(CE->getSubExpr())) {
!isFlexibleArrayMemberExpr(CE->getSubExpr(), StrictFlexArraysLevel)) {
IndexedType = CE->getSubExpr()->getType();
const ArrayType *AT = IndexedType->castAsArrayTypeUnsafe();
if (const auto *CAT = dyn_cast<ConstantArrayType>(AT))
@ -993,8 +1003,11 @@ void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base,
"should not be called unless adding bounds checks");
SanitizerScope SanScope(this);
const unsigned StrictFlexArraysLevel = getLangOpts().StrictFlexArrays;
QualType IndexedType;
llvm::Value *Bound = getArrayIndexingBound(*this, Base, IndexedType);
llvm::Value *Bound =
getArrayIndexingBound(*this, Base, IndexedType, StrictFlexArraysLevel);
if (!Bound)
return;

View File

@ -6265,6 +6265,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_funroll_loops,
options::OPT_fno_unroll_loops);
Args.AddLastArg(CmdArgs, options::OPT_fstrict_flex_arrays_EQ);
Args.AddLastArg(CmdArgs, options::OPT_pthread);
if (Args.hasFlag(options::OPT_mspeculative_load_hardening,

View File

@ -15830,11 +15830,26 @@ void Sema::CheckCastAlign(Expr *Op, QualType T, SourceRange TRange) {
/// We avoid emitting out-of-bounds access warnings for such arrays as they are
/// commonly used to emulate flexible arrays in C89 code.
static bool IsTailPaddedMemberArray(Sema &S, const llvm::APInt &Size,
const NamedDecl *ND) {
if (Size != 1 || !ND) return false;
const NamedDecl *ND,
unsigned StrictFlexArraysLevel) {
if (!ND)
return false;
if (StrictFlexArraysLevel >= 2 && Size != 0)
return false;
if (StrictFlexArraysLevel == 1 && Size.ule(1))
return false;
// FIXME: While the default -fstrict-flex-arrays=0 permits Size>1 trailing
// arrays to be treated as flexible-array-members, we still emit diagnostics
// as if they are not. Pending further discussion...
if (StrictFlexArraysLevel == 0 && Size != 1)
return false;
const FieldDecl *FD = dyn_cast<FieldDecl>(ND);
if (!FD) return false;
if (!FD)
return false;
// Don't consider sizes resulting from macro expansions or template argument
// substitution to form C89 tail-padded arrays.
@ -15857,10 +15872,13 @@ static bool IsTailPaddedMemberArray(Sema &S, const llvm::APInt &Size,
}
const RecordDecl *RD = dyn_cast<RecordDecl>(FD->getDeclContext());
if (!RD) return false;
if (RD->isUnion()) return false;
if (!RD)
return false;
if (RD->isUnion())
return false;
if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) {
if (!CRD->isStandardLayout()) return false;
if (!CRD->isStandardLayout())
return false;
}
// See if this is the last field decl in the record.
@ -15988,9 +16006,14 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
// example). In this case we have no information about whether the array
// access exceeds the array bounds. However we can still diagnose an array
// access which precedes the array bounds.
//
// FIXME: this check should be redundant with the IsUnboundedArray check
// above.
if (BaseType->isIncompleteType())
return;
// FIXME: this check should belong to the IsTailPaddedMemberArray call
// below.
llvm::APInt size = ArrayTy->getSize();
if (!size.isStrictlyPositive())
return;
@ -16023,10 +16046,9 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
if (AllowOnePastEnd ? index.ule(size) : index.ult(size))
return;
// Also don't warn for arrays of size 1 which are members of some
// structure. These are often used to approximate flexible arrays in C89
// code.
if (IsTailPaddedMemberArray(*this, size, ND))
// Also don't warn for Flexible Array Member emulation.
const unsigned StrictFlexArraysLevel = getLangOpts().StrictFlexArrays;
if (IsTailPaddedMemberArray(*this, size, ND, StrictFlexArraysLevel))
return;
// Suppress the warning if the subscript expression (as identified by the

View File

@ -794,7 +794,11 @@ DefinedOrUnknownSVal MemRegionManager::getStaticSize(const MemRegion *MR,
if (Size.isZero())
return true;
if (getContext().getLangOpts().StrictFlexArrays >= 2)
return false;
const AnalyzerOptions &Opts = SVB.getAnalyzerOptions();
// FIXME: this option is probably redundant with -fstrict-flex-arrays=1.
if (Opts.ShouldConsiderSingleElementArraysAsFlexibleArrayMembers &&
Size.isOne())
return true;

View File

@ -1,11 +1,14 @@
// REQUIRES: x86-registered-target
// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fsanitize=array-bounds %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-0
// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fsanitize=array-bounds -x c++ %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-0,CXX,CXX-STRICT-0
/// Before flexible array member was added to C99, many projects use a
/// one-element array as the last emember of a structure as an alternative.
/// E.g. https://github.com/python/cpython/issues/84301
/// Suppress such errors by default.
// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fstrict-flex-arrays=0 -fsanitize=array-bounds %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-0
// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fstrict-flex-arrays=0 -fsanitize=array-bounds -x c++ %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-0,CXX,CXX-STRICT-0
// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fstrict-flex-arrays=1 -fsanitize=array-bounds %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-1
// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fstrict-flex-arrays=1 -fsanitize=array-bounds -x c++ %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-1,CXX
// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fstrict-flex-arrays=2 -fsanitize=array-bounds %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-2
// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fstrict-flex-arrays=2 -fsanitize=array-bounds -x c++ %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-2,CXX
// Before flexible array member was added to C99, many projects use a
// one-element array as the last member of a structure as an alternative.
// E.g. https://github.com/python/cpython/issues/84301
// Suppress such errors with -fstrict-flex-arrays=0.
struct One {
int a[1];
};
@ -19,18 +22,24 @@ struct Three {
// CHECK-LABEL: define {{.*}} @{{.*}}test_one{{.*}}(
int test_one(struct One *p, int i) {
// CHECK-STRICT-0-NOT: @__ubsan
// CHECK-STRICT-1-NOT: @__ubsan
// CHECK-STRICT-2: call void @__ubsan_handle_out_of_bounds_abort(
return p->a[i] + (p->a)[i];
}
// CHECK-LABEL: define {{.*}} @{{.*}}test_two{{.*}}(
int test_two(struct Two *p, int i) {
// CHECK-STRICT-0: call void @__ubsan_handle_out_of_bounds_abort(
// CHECK-STRICT-1: call void @__ubsan_handle_out_of_bounds_abort(
// CHECK-STRICT-2: call void @__ubsan_handle_out_of_bounds_abort(
return p->a[i] + (p->a)[i];
}
// CHECK-LABEL: define {{.*}} @{{.*}}test_three{{.*}}(
int test_three(struct Three *p, int i) {
// CHECK-STRICT-0: call void @__ubsan_handle_out_of_bounds_abort(
// CHECK-STRICT-1: call void @__ubsan_handle_out_of_bounds_abort(
// CHECK-STRICT-2: call void @__ubsan_handle_out_of_bounds_abort(
return p->a[i] + (p->a)[i];
}

View File

@ -0,0 +1,18 @@
// REQUIRES: x86-registered-target
// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fsanitize=array-bounds %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-STRICT-0
//
// Disable checks on FAM even though the class doesn't have standard layout.
struct C {
int head;
};
struct S : C {
int tail[1];
};
// CHECK-LABEL: define {{.*}} @_Z8test_oneP1Si(
int test_one(S *p, int i) {
// CHECK-STRICT-0-NOT: @__ubsan
return p->tail[i] + (p->tail)[i];
}

View File

@ -0,0 +1,98 @@
// RUN: %clang -fstrict-flex-arrays=2 -target x86_64-apple-darwin -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-STRICT-2 %s
// RUN: %clang -fstrict-flex-arrays=1 -target x86_64-apple-darwin -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-STRICT-1 %s
// RUN: %clang -fstrict-flex-arrays=0 -target x86_64-apple-darwin -S -emit-llvm %s -o - 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-STRICT-0 %s
#define OBJECT_SIZE_BUILTIN __builtin_object_size
typedef struct {
float f;
double c[];
} foo_t;
typedef struct {
float f;
double c[0];
} foo0_t;
typedef struct {
float f;
double c[1];
} foo1_t;
typedef struct {
float f;
double c[2];
} foo2_t;
// CHECK-LABEL: @bar
unsigned bar(foo_t *f) {
// CHECK-STRICT-0: ret i32 %
// CHECK-STRICT-1: ret i32 %
// CHECK-STRICT-2: ret i32 %
return OBJECT_SIZE_BUILTIN(f->c, 1);
}
// CHECK-LABEL: @bar0
unsigned bar0(foo0_t *f) {
// CHECK-STRICT-0: ret i32 %
// CHECK-STRICT-1: ret i32 %
// CHECK-STRICT-2: ret i32 %
return OBJECT_SIZE_BUILTIN(f->c, 1);
}
// CHECK-LABEL: @bar1
unsigned bar1(foo1_t *f) {
// CHECK-STRICT-0: ret i32 %
// CHECK-STRICT-1: ret i32 %
// CHECK-STRICT-2: ret i32 8
return OBJECT_SIZE_BUILTIN(f->c, 1);
}
// CHECK-LABEL: @bar2
unsigned bar2(foo2_t *f) {
// CHECK-STRICT-0: ret i32 %
// CHECK-STRICT-1: ret i32 16
// CHECK-STRICT-2: ret i32 16
return OBJECT_SIZE_BUILTIN(f->c, 1);
}
// Also checks for non-trailing flex-array like members
typedef struct {
double c[0];
float f;
} foofoo0_t;
typedef struct {
double c[1];
float f;
} foofoo1_t;
typedef struct {
double c[2];
float f;
} foofoo2_t;
// CHECK-LABEL: @babar0
unsigned babar0(foofoo0_t *f) {
// CHECK-STRICT-0: ret i32 0
// CHECK-STRICT-1: ret i32 0
// CHECK-STRICT-2: ret i32 0
return OBJECT_SIZE_BUILTIN(f->c, 1);
}
// CHECK-LABEL: @babar1
unsigned babar1(foofoo1_t *f) {
// CHECK-STRICT-0: ret i32 8
// CHECK-STRICT-1: ret i32 8
// CHECK-STRICT-2: ret i32 8
return OBJECT_SIZE_BUILTIN(f->c, 1);
}
// CHECK-LABEL: @babar2
unsigned babar2(foofoo2_t *f) {
// CHECK-STRICT-0: ret i32 16
// CHECK-STRICT-1: ret i32 16
// CHECK-STRICT-2: ret i32 16
return OBJECT_SIZE_BUILTIN(f->c, 1);
}

View File

@ -14,46 +14,3 @@ char test_FlexibleArray1(FlexibleArray1 *FA1) {
return FA1->chars[1];
// CHECK: }
}
@interface FlexibleArray2 {
@public
char chars[0];
}
@end
@implementation FlexibleArray2 {
@public
char chars2[0];
}
@end
// CHECK-LABEL: test_FlexibleArray2_1
char test_FlexibleArray2_1(FlexibleArray2 *FA2) {
// CHECK: !nosanitize
return FA2->chars[1];
// CHECK: }
}
// CHECK-LABEL: test_FlexibleArray2_2
char test_FlexibleArray2_2(FlexibleArray2 *FA2) {
// CHECK-NOT: !nosanitize
return FA2->chars2[1];
// CHECK: }
}
@interface FlexibleArray3 {
@public
char chars[0];
}
@end
@implementation FlexibleArray3 {
@public
int i;
}
@end
// CHECK-LABEL: test_FlexibleArray3
char test_FlexibleArray3(FlexibleArray3 *FA3) {
// CHECK: !nosanitize
return FA3->chars[1];
// CHECK: }
}

View File

@ -1,4 +1,6 @@
// RUN: %clang_cc1 -verify -Warray-bounds-pointer-arithmetic %s
// RUN: %clang_cc1 -verify=expected -Warray-bounds-pointer-arithmetic %s
// RUN: %clang_cc1 -verify=expected -Warray-bounds-pointer-arithmetic %s -fstrict-flex-arrays=0
// RUN: %clang_cc1 -verify=expected,strict -Warray-bounds-pointer-arithmetic %s -fstrict-flex-arrays=2
// Test case from PR10615
struct ext2_super_block{
@ -14,7 +16,9 @@ void* broken (struct ext2_super_block *es,int a)
}
// Test case reduced from PR11594
struct S { int n; };
struct S {
int n;
};
void pr11594(struct S *s) {
int a[10];
int *p = a - s->n;
@ -26,26 +30,28 @@ void pr11594(struct S *s) {
struct RDar11387038 {};
typedef struct RDar11387038 RDar11387038Array[1];
struct RDar11387038_Table {
RDar11387038Array z;
RDar11387038Array z; // strict-note {{array 'z' declared here}}
};
typedef struct RDar11387038_Table * TPtr;
typedef struct RDar11387038_Table *TPtr;
typedef TPtr *TabHandle;
struct RDar11387038_B { TabHandle x; };
struct RDar11387038_B {
TabHandle x;
};
typedef struct RDar11387038_B RDar11387038_B;
void radar11387038(void) {
RDar11387038_B *pRDar11387038_B;
struct RDar11387038* y = &(*pRDar11387038_B->x)->z[4];
struct RDar11387038 *y = &(*pRDar11387038_B->x)->z[4]; // strict-warning {{array index 4 is past the end of the array (which contains 1 element)}}
}
void pr51682 (void) {
int arr [1];
void pr51682(void) {
int arr[1];
switch (0) {
case 0:
break;
case 1:
asm goto (""::"r"(arr[42] >> 1)::failed); // no-warning
break;
case 0:
break;
case 1:
asm goto("" ::"r"(arr[42] >> 1)::failed);
break;
}
failed:;
}

View File

@ -0,0 +1,28 @@
// RUN: %clang_cc1 -verify -fstrict-flex-arrays=2 %s
// We cannot know for sure the size of a flexible array.
struct t {
int f;
int a[];
};
void test(t *s2) {
s2->a[2] = 0; // no-warning
}
// Under -fstrict-flex-arrays `a` is not a flexible array.
struct t1 {
int f;
int a[1]; // expected-note {{array 'a' declared here}}
};
void test1(t1 *s2) {
s2->a[2] = 0; // expected-warning {{array index 2 is past the end of the array (which contains 1 element)}}
}
// Under -fstrict-flex-arrays `a` is a flexible array.
struct t2 {
int f;
int a[0];
};
void test1(t2 *s2) {
s2->a[2] = 0; // no-warning
}

View File

@ -208,12 +208,15 @@ namespace tailpad {
namespace metaprogramming {
#define ONE 1
struct foo { char c[ONE]; }; // expected-note {{declared here}}
struct foo {
char c[ONE]; // expected-note {{array 'c' declared here}}
};
template <int N> struct bar { char c[N]; }; // expected-note {{declared here}}
char test(foo *F, bar<1> *B) {
return F->c[3] + // expected-warning {{array index 3 is past the end of the array (which contains 1 element)}}
B->c[3]; // expected-warning {{array index 3 is past the end of the array (which contains 1 element)}}
B->c[3]; // expected-warning {{array index 3 is past the end of the array (which contains 1 element)}}
}
}