2007-10-17 14:26:11 +08:00
|
|
|
#ifndef __LINUX_COMPILER_H
|
|
|
|
#error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead."
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Common definitions for all gcc versions go here.
|
|
|
|
*/
|
2015-06-26 06:01:00 +08:00
|
|
|
#define GCC_VERSION (__GNUC__ * 10000 \
|
|
|
|
+ __GNUC_MINOR__ * 100 \
|
|
|
|
+ __GNUC_PATCHLEVEL__)
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* Optimization barrier */
|
lib: make memzero_explicit more robust against dead store elimination
In commit 0b053c951829 ("lib: memzero_explicit: use barrier instead
of OPTIMIZER_HIDE_VAR"), we made memzero_explicit() more robust in
case LTO would decide to inline memzero_explicit() and eventually
find out it could be elimiated as dead store.
While using barrier() works well for the case of gcc, recent efforts
from LLVMLinux people suggest to use llvm as an alternative to gcc,
and there, Stephan found in a simple stand-alone user space example
that llvm could nevertheless optimize and thus elimitate the memset().
A similar issue has been observed in the referenced llvm bug report,
which is regarded as not-a-bug.
Based on some experiments, icc is a bit special on its own, while it
doesn't seem to eliminate the memset(), it could do so with an own
implementation, and then result in similar findings as with llvm.
The fix in this patch now works for all three compilers (also tested
with more aggressive optimization levels). Arguably, in the current
kernel tree it's more of a theoretical issue, but imho, it's better
to be pedantic about it.
It's clearly visible with gcc/llvm though, with the below code: if we
would have used barrier() only here, llvm would have omitted clearing,
not so with barrier_data() variant:
static inline void memzero_explicit(void *s, size_t count)
{
memset(s, 0, count);
barrier_data(s);
}
int main(void)
{
char buff[20];
memzero_explicit(buff, sizeof(buff));
return 0;
}
$ gcc -O2 test.c
$ gdb a.out
(gdb) disassemble main
Dump of assembler code for function main:
0x0000000000400400 <+0>: lea -0x28(%rsp),%rax
0x0000000000400405 <+5>: movq $0x0,-0x28(%rsp)
0x000000000040040e <+14>: movq $0x0,-0x20(%rsp)
0x0000000000400417 <+23>: movl $0x0,-0x18(%rsp)
0x000000000040041f <+31>: xor %eax,%eax
0x0000000000400421 <+33>: retq
End of assembler dump.
$ clang -O2 test.c
$ gdb a.out
(gdb) disassemble main
Dump of assembler code for function main:
0x00000000004004f0 <+0>: xorps %xmm0,%xmm0
0x00000000004004f3 <+3>: movaps %xmm0,-0x18(%rsp)
0x00000000004004f8 <+8>: movl $0x0,-0x8(%rsp)
0x0000000000400500 <+16>: lea -0x18(%rsp),%rax
0x0000000000400505 <+21>: xor %eax,%eax
0x0000000000400507 <+23>: retq
End of assembler dump.
As gcc, clang, but also icc defines __GNUC__, it's sufficient to define
this in compiler-gcc.h only to be picked up. For a fallback or otherwise
unsupported compiler, we define it as a barrier. Similarly, for ecc which
does not support gcc inline asm.
Reference: https://llvm.org/bugs/show_bug.cgi?id=15495
Reported-by: Stephan Mueller <smueller@chronox.de>
Tested-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Stephan Mueller <smueller@chronox.de>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: mancha security <mancha1@zoho.com>
Cc: Mark Charlebois <charlebm@gmail.com>
Cc: Behan Webster <behanw@converseincode.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-04-30 10:13:52 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* The "volatile" is due to gcc bugs */
|
|
|
|
#define barrier() __asm__ __volatile__("": : :"memory")
|
lib: make memzero_explicit more robust against dead store elimination
In commit 0b053c951829 ("lib: memzero_explicit: use barrier instead
of OPTIMIZER_HIDE_VAR"), we made memzero_explicit() more robust in
case LTO would decide to inline memzero_explicit() and eventually
find out it could be elimiated as dead store.
While using barrier() works well for the case of gcc, recent efforts
from LLVMLinux people suggest to use llvm as an alternative to gcc,
and there, Stephan found in a simple stand-alone user space example
that llvm could nevertheless optimize and thus elimitate the memset().
A similar issue has been observed in the referenced llvm bug report,
which is regarded as not-a-bug.
Based on some experiments, icc is a bit special on its own, while it
doesn't seem to eliminate the memset(), it could do so with an own
implementation, and then result in similar findings as with llvm.
The fix in this patch now works for all three compilers (also tested
with more aggressive optimization levels). Arguably, in the current
kernel tree it's more of a theoretical issue, but imho, it's better
to be pedantic about it.
It's clearly visible with gcc/llvm though, with the below code: if we
would have used barrier() only here, llvm would have omitted clearing,
not so with barrier_data() variant:
static inline void memzero_explicit(void *s, size_t count)
{
memset(s, 0, count);
barrier_data(s);
}
int main(void)
{
char buff[20];
memzero_explicit(buff, sizeof(buff));
return 0;
}
$ gcc -O2 test.c
$ gdb a.out
(gdb) disassemble main
Dump of assembler code for function main:
0x0000000000400400 <+0>: lea -0x28(%rsp),%rax
0x0000000000400405 <+5>: movq $0x0,-0x28(%rsp)
0x000000000040040e <+14>: movq $0x0,-0x20(%rsp)
0x0000000000400417 <+23>: movl $0x0,-0x18(%rsp)
0x000000000040041f <+31>: xor %eax,%eax
0x0000000000400421 <+33>: retq
End of assembler dump.
$ clang -O2 test.c
$ gdb a.out
(gdb) disassemble main
Dump of assembler code for function main:
0x00000000004004f0 <+0>: xorps %xmm0,%xmm0
0x00000000004004f3 <+3>: movaps %xmm0,-0x18(%rsp)
0x00000000004004f8 <+8>: movl $0x0,-0x8(%rsp)
0x0000000000400500 <+16>: lea -0x18(%rsp),%rax
0x0000000000400505 <+21>: xor %eax,%eax
0x0000000000400507 <+23>: retq
End of assembler dump.
As gcc, clang, but also icc defines __GNUC__, it's sufficient to define
this in compiler-gcc.h only to be picked up. For a fallback or otherwise
unsupported compiler, we define it as a barrier. Similarly, for ecc which
does not support gcc inline asm.
Reference: https://llvm.org/bugs/show_bug.cgi?id=15495
Reported-by: Stephan Mueller <smueller@chronox.de>
Tested-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Stephan Mueller <smueller@chronox.de>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: mancha security <mancha1@zoho.com>
Cc: Mark Charlebois <charlebm@gmail.com>
Cc: Behan Webster <behanw@converseincode.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2015-04-30 10:13:52 +08:00
|
|
|
/*
|
|
|
|
* This version is i.e. to prevent dead stores elimination on @ptr
|
|
|
|
* where gcc and llvm may behave differently when otherwise using
|
|
|
|
* normal barrier(): while gcc behavior gets along with a normal
|
|
|
|
* barrier(), llvm needs an explicit input variable to be assumed
|
|
|
|
* clobbered. The issue is as follows: while the inline asm might
|
|
|
|
* access any memory it wants, the compiler could have fit all of
|
|
|
|
* @ptr into memory registers instead, and since @ptr never escaped
|
|
|
|
* from that, it proofed that the inline asm wasn't touching any of
|
|
|
|
* it. This version works well with both compilers, i.e. we're telling
|
|
|
|
* the compiler that the inline asm absolutely may see the contents
|
|
|
|
* of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495
|
|
|
|
*/
|
|
|
|
#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-01-10 15:21:20 +08:00
|
|
|
/*
|
2009-01-10 08:40:53 +08:00
|
|
|
* This macro obfuscates arithmetic on a variable address so that gcc
|
|
|
|
* shouldn't recognize the original var, and make assumptions about it.
|
|
|
|
*
|
|
|
|
* This is needed because the C standard makes it undefined to do
|
|
|
|
* pointer arithmetic on "objects" outside their boundaries and the
|
|
|
|
* gcc optimizers assume this is the case. In particular they
|
|
|
|
* assume such arithmetic does not wrap.
|
|
|
|
*
|
|
|
|
* A miscompilation has been observed because of this on PPC.
|
|
|
|
* To work around it we hide the relationship of the pointer and the object
|
|
|
|
* using this macro.
|
|
|
|
*
|
2006-01-10 15:21:20 +08:00
|
|
|
* Versions of the ppc64 compiler before 4.1 had a bug where use of
|
|
|
|
* RELOC_HIDE could trash r30. The bug can be worked around by changing
|
|
|
|
* the inline assembly constraint from =g to =r, in this particular
|
|
|
|
* case either is valid.
|
|
|
|
*/
|
2015-06-26 06:01:00 +08:00
|
|
|
#define RELOC_HIDE(ptr, off) \
|
|
|
|
({ \
|
|
|
|
unsigned long __ptr; \
|
|
|
|
__asm__ ("" : "=r"(__ptr) : "0"(ptr)); \
|
|
|
|
(typeof(ptr)) (__ptr + (off)); \
|
|
|
|
})
|
2006-01-08 17:04:09 +08:00
|
|
|
|
2013-11-26 08:00:41 +08:00
|
|
|
/* Make the optimizer believe the variable can be manipulated arbitrarily. */
|
2015-06-26 06:01:00 +08:00
|
|
|
#define OPTIMIZER_HIDE_VAR(var) \
|
|
|
|
__asm__ ("" : "=r" (var) : "0" (var))
|
2013-11-26 08:00:41 +08:00
|
|
|
|
2011-05-25 08:13:17 +08:00
|
|
|
#ifdef __CHECKER__
|
2015-06-26 06:01:00 +08:00
|
|
|
#define __must_be_array(a) 0
|
2011-05-25 08:13:17 +08:00
|
|
|
#else
|
2007-05-07 05:51:05 +08:00
|
|
|
/* &a[0] degrades to a pointer: a different type from an array */
|
2015-06-26 06:01:00 +08:00
|
|
|
#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
|
2011-05-25 08:13:17 +08:00
|
|
|
#endif
|
2006-01-08 17:04:09 +08:00
|
|
|
|
2008-03-03 19:38:52 +08:00
|
|
|
/*
|
2008-04-30 06:15:31 +08:00
|
|
|
* Force always-inline if the user requests it so via the .config,
|
|
|
|
* or if gcc is too old:
|
2008-03-03 19:38:52 +08:00
|
|
|
*/
|
2015-06-26 06:01:00 +08:00
|
|
|
#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
|
2008-04-30 06:15:31 +08:00
|
|
|
!defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
|
2015-06-26 06:01:00 +08:00
|
|
|
#define inline inline __attribute__((always_inline)) notrace
|
|
|
|
#define __inline__ __inline__ __attribute__((always_inline)) notrace
|
|
|
|
#define __inline __inline __attribute__((always_inline)) notrace
|
ftrace: Do not function trace inlined functions
When gcc inlines a function, it does not mark it with the mcount
prologue, which in turn means that inlined functions are not traced
by the function tracer. But if CONFIG_OPTIMIZE_INLINING is set, then
gcc is allowed not to inline a function that is marked inline.
Depending on the options and the compiler, a function may or may
not be traced by the function tracer, depending on whether gcc
decides to inline a function or not. This has caused several
problems in the pass becaues gcc is not always consistent with
what it decides to inline between different gcc versions.
Some places should not be traced (like paravirt native_* functions)
and these are mostly marked as inline. When gcc decides not to
inline the function, and if that function should not be traced, then
the ftrace function tracer will suddenly break when it use to work
fine. This becomes even harder to debug when different versions of
gcc will not inline that function, making the same kernel and config
work for some gcc versions and not work for others.
By making all functions marked inline to not be traced will remove
the ambiguity that gcc adds when it comes to tracing functions marked
inline. All gcc versions will be consistent with what functions are
traced and having volatile working code will be removed.
Note, only the inline macro when CONFIG_OPTIMIZE_INLINING is set needs
to have notrace added, as the attribute __always_inline will force
the function to be inlined and then not traced.
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2011-12-13 04:22:41 +08:00
|
|
|
#else
|
|
|
|
/* A lot of inline functions can cause havoc with function tracing */
|
2015-06-26 06:01:00 +08:00
|
|
|
#define inline inline notrace
|
|
|
|
#define __inline__ __inline__ notrace
|
|
|
|
#define __inline __inline notrace
|
2008-03-03 19:38:52 +08:00
|
|
|
#endif
|
|
|
|
|
2015-06-26 06:01:00 +08:00
|
|
|
#define __always_inline inline __attribute__((always_inline))
|
|
|
|
#define noinline __attribute__((noinline))
|
|
|
|
|
|
|
|
#define __deprecated __attribute__((deprecated))
|
|
|
|
#define __packed __attribute__((packed))
|
|
|
|
#define __weak __attribute__((weak))
|
|
|
|
#define __alias(symbol) __attribute__((alias(#symbol)))
|
2009-03-13 01:03:16 +08:00
|
|
|
|
|
|
|
/*
|
2015-06-26 06:01:00 +08:00
|
|
|
* it doesn't make sense on ARM (currently the only user of __naked)
|
|
|
|
* to trace naked functions because then mcount is called without
|
|
|
|
* stack and frame pointer being set up and there is no chance to
|
|
|
|
* restore the lr register to the value before mcount was called.
|
|
|
|
*
|
|
|
|
* The asm() bodies of naked functions often depend on standard calling
|
|
|
|
* conventions, therefore they must be noinline and noclone.
|
2010-06-30 06:05:25 +08:00
|
|
|
*
|
2015-06-26 06:01:00 +08:00
|
|
|
* GCC 4.[56] currently fail to enforce this, so we must do so ourselves.
|
|
|
|
* See GCC PR44290.
|
2009-03-13 01:03:16 +08:00
|
|
|
*/
|
2015-06-26 06:01:00 +08:00
|
|
|
#define __naked __attribute__((naked)) noinline __noclone notrace
|
2009-03-13 01:03:16 +08:00
|
|
|
|
2015-06-26 06:01:00 +08:00
|
|
|
#define __noreturn __attribute__((noreturn))
|
2007-10-18 18:07:07 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* From the GCC manual:
|
|
|
|
*
|
|
|
|
* Many functions have no effects except the return value and their
|
|
|
|
* return value depends only on the parameters and/or global
|
|
|
|
* variables. Such a function can be subject to common subexpression
|
|
|
|
* elimination and loop optimization just as an arithmetic operator
|
|
|
|
* would be.
|
|
|
|
* [...]
|
|
|
|
*/
|
2015-06-26 06:01:00 +08:00
|
|
|
#define __pure __attribute__((pure))
|
|
|
|
#define __aligned(x) __attribute__((aligned(x)))
|
|
|
|
#define __printf(a, b) __attribute__((format(printf, a, b)))
|
|
|
|
#define __scanf(a, b) __attribute__((format(scanf, a, b)))
|
|
|
|
#define __attribute_const__ __attribute__((__const__))
|
|
|
|
#define __maybe_unused __attribute__((unused))
|
|
|
|
#define __always_unused __attribute__((unused))
|
2009-01-03 01:23:03 +08:00
|
|
|
|
2015-06-26 06:01:02 +08:00
|
|
|
/* gcc version specific checks */
|
|
|
|
|
|
|
|
#if GCC_VERSION < 30200
|
|
|
|
# error Sorry, your compiler is too old - please upgrade it.
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if GCC_VERSION < 30300
|
|
|
|
# define __used __attribute__((__unused__))
|
|
|
|
#else
|
|
|
|
# define __used __attribute__((__used__))
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef CONFIG_GCOV_KERNEL
|
|
|
|
# if GCC_VERSION < 30400
|
|
|
|
# error "GCOV profiling support for gcc versions below 3.4 not included"
|
|
|
|
# endif /* __GNUC_MINOR__ */
|
|
|
|
#endif /* CONFIG_GCOV_KERNEL */
|
|
|
|
|
|
|
|
#if GCC_VERSION >= 30400
|
|
|
|
#define __must_check __attribute__((warn_unused_result))
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if GCC_VERSION >= 40000
|
|
|
|
|
|
|
|
/* GCC 4.1.[01] miscompiles __weak */
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
# if GCC_VERSION >= 40100 && GCC_VERSION <= 40101
|
|
|
|
# error Your version of gcc miscompiles the __weak directive
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define __used __attribute__((__used__))
|
|
|
|
#define __compiler_offsetof(a, b) \
|
|
|
|
__builtin_offsetof(a, b)
|
|
|
|
|
|
|
|
#if GCC_VERSION >= 40100 && GCC_VERSION < 40600
|
|
|
|
# define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if GCC_VERSION >= 40300
|
|
|
|
/* Mark functions as cold. gcc will assume any path leading to a call
|
|
|
|
* to them will be unlikely. This means a lot of manual unlikely()s
|
|
|
|
* are unnecessary now for any paths leading to the usual suspects
|
|
|
|
* like BUG(), printk(), panic() etc. [but let's keep them for now for
|
|
|
|
* older compilers]
|
|
|
|
*
|
|
|
|
* Early snapshots of gcc 4.3 don't support this and we can't detect this
|
|
|
|
* in the preprocessor, but we can live with this because they're unreleased.
|
|
|
|
* Maketime probing would be overkill here.
|
|
|
|
*
|
|
|
|
* gcc also has a __attribute__((__hot__)) to move hot functions into
|
|
|
|
* a special section, but I don't see any sense in this right now in
|
|
|
|
* the kernel context
|
|
|
|
*/
|
|
|
|
#define __cold __attribute__((__cold__))
|
|
|
|
|
|
|
|
#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
|
|
|
|
|
|
|
|
#ifndef __CHECKER__
|
|
|
|
# define __compiletime_warning(message) __attribute__((warning(message)))
|
|
|
|
# define __compiletime_error(message) __attribute__((error(message)))
|
|
|
|
#endif /* __CHECKER__ */
|
|
|
|
#endif /* GCC_VERSION >= 40300 */
|
|
|
|
|
|
|
|
#if GCC_VERSION >= 40500
|
|
|
|
/*
|
|
|
|
* Mark a position in code as unreachable. This can be used to
|
|
|
|
* suppress control flow warnings after asm blocks that transfer
|
|
|
|
* control elsewhere.
|
|
|
|
*
|
|
|
|
* Early snapshots of gcc 4.5 don't support this and we can't detect
|
|
|
|
* this in the preprocessor, but we can live with this because they're
|
|
|
|
* unreleased. Really, we need to have autoconf for the kernel.
|
|
|
|
*/
|
|
|
|
#define unreachable() __builtin_unreachable()
|
|
|
|
|
|
|
|
/* Mark a function definition as prohibited from being cloned. */
|
|
|
|
#define __noclone __attribute__((__noclone__))
|
|
|
|
|
|
|
|
#endif /* GCC_VERSION >= 40500 */
|
|
|
|
|
|
|
|
#if GCC_VERSION >= 40600
|
|
|
|
/*
|
|
|
|
* Tell the optimizer that something else uses this function or variable.
|
|
|
|
*/
|
|
|
|
#define __visible __attribute__((externally_visible))
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* GCC 'asm goto' miscompiles certain code sequences:
|
|
|
|
*
|
|
|
|
* http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
|
|
|
|
*
|
|
|
|
* Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
|
|
|
|
*
|
|
|
|
* (asm goto is automatically volatile - the naming reflects this.)
|
|
|
|
*/
|
|
|
|
#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0)
|
|
|
|
|
|
|
|
#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
|
|
|
|
#if GCC_VERSION >= 40400
|
|
|
|
#define __HAVE_BUILTIN_BSWAP32__
|
|
|
|
#define __HAVE_BUILTIN_BSWAP64__
|
|
|
|
#endif
|
|
|
|
#if GCC_VERSION >= 40800 || (defined(__powerpc__) && GCC_VERSION >= 40600)
|
|
|
|
#define __HAVE_BUILTIN_BSWAP16__
|
|
|
|
#endif
|
|
|
|
#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */
|
|
|
|
|
|
|
|
#if GCC_VERSION >= 50000
|
|
|
|
#define KASAN_ABI_VERSION 4
|
|
|
|
#elif GCC_VERSION >= 40902
|
|
|
|
#define KASAN_ABI_VERSION 3
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* gcc version >= 40000 specific checks */
|
2010-06-30 06:05:25 +08:00
|
|
|
|
|
|
|
#if !defined(__noclone)
|
|
|
|
#define __noclone /* not needed */
|
|
|
|
#endif
|
2011-03-23 07:33:55 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* A trick to suppress uninitialized variable warning without generating any
|
|
|
|
* code
|
|
|
|
*/
|
|
|
|
#define uninitialized_var(x) x = x
|