Solve 'Too many args to microtask' problem

This patch solves 'Too many args to microtask' problem which occurs
while executing lulesh2.0.3 benchmark on AArch64.

To solve this I had to wrtite AArch64 assembly version of
__kmp_invoke_microtask() function, similar to x86 and x86_64
implementations.

Differential Revision: http://reviews.llvm.org/D19879

llvm-svn: 269399
This commit is contained in:
Paul Osmialowski 2016-05-13 08:26:42 +00:00
parent 12e7931d0b
commit 7e5e8684fb
2 changed files with 144 additions and 3 deletions

View File

@ -109,6 +109,32 @@ KMP_PREFIX_UNDERSCORE(\proc):
# endif // KMP_OS_DARWIN
#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
#if KMP_OS_LINUX && KMP_ARCH_AARCH64
# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols
// Format labels so that they don't override function names in gdb's backtraces
# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
.macro ALIGN size
.align 1<<(\size)
.endm
.macro DEBUG_INFO proc
.cfi_endproc
// Not sure why we need .type and .size for the functions
ALIGN 2
.type \proc,@function
.size \proc,.-\proc
.endm
.macro PROC proc
ALIGN 2
.globl KMP_PREFIX_UNDERSCORE(\proc)
KMP_PREFIX_UNDERSCORE(\proc):
.cfi_startproc
.endm
#endif // KMP_OS_LINUX && KMP_ARCH_AARCH64
// -----------------------------------------------------------------------
// data
@ -1414,6 +1440,121 @@ KMP_LABEL(kmp_1_exit):
// -----------------------------------------------------------------------
#endif /* KMP_ARCH_X86_64 */
// '
#if KMP_OS_LINUX && KMP_ARCH_AARCH64
//------------------------------------------------------------------------
//
// typedef void (*microtask_t)( int *gtid, int *tid, ... );
//
// int
// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
// int gtid, int tid,
// int argc, void *p_argv[] ) {
// (*pkfn)( & gtid, & tid, argv[0], ... );
// return 1;
// }
//
// parameters:
// x0: pkfn
// w1: gtid
// w2: tid
// w3: argc
// x4: p_argv
// x5: &exit_frame
//
// locals:
// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
// __tid: tid parm pushed on stack so can pass &tid to pkfn
//
// reg temps:
// x8: used to hold pkfn address
// w9: used as temporary for number of pkfn parms
// x10: used to traverse p_argv array
// x11: used as temporary for stack placement calculation
// x12: used as temporary for stack parameters
// x19: used to preserve exit_frame_ptr, callee-save
//
// return: w0 (always 1/TRUE)
//
__gtid = 4
__tid = 8
// -- Begin __kmp_invoke_microtask
// mark_begin;
.text
PROC __kmp_invoke_microtask
stp x29, x30, [sp, #-16]!
# if OMPT_SUPPORT
stp x19, x20, [sp, #-16]!
# endif
mov x29, sp
orr w9, wzr, #1
add w9, w9, w3, lsr #1
sub sp, sp, w9, lsl #4
mov x11, sp
mov x8, x0
str w1, [x29, #-__gtid]
str w2, [x29, #-__tid]
mov w9, w3
mov x10, x4
# if OMPT_SUPPORT
mov x19, x5
str x29, [x19]
# endif
sub x0, x29, #__gtid
sub x1, x29, #__tid
cbz w9, KMP_LABEL(kmp_1)
ldr x2, [x10]
sub w9, w9, #1
cbz w9, KMP_LABEL(kmp_1)
ldr x3, [x10, #8]!
sub w9, w9, #1
cbz w9, KMP_LABEL(kmp_1)
ldr x4, [x10, #8]!
sub w9, w9, #1
cbz w9, KMP_LABEL(kmp_1)
ldr x5, [x10, #8]!
sub w9, w9, #1
cbz w9, KMP_LABEL(kmp_1)
ldr x6, [x10, #8]!
sub w9, w9, #1
cbz w9, KMP_LABEL(kmp_1)
ldr x7, [x10, #8]!
KMP_LABEL(kmp_0):
sub w9, w9, #1
cbz w9, KMP_LABEL(kmp_1)
ldr x12, [x10, #8]!
str x12, [x11], #8
b KMP_LABEL(kmp_0)
KMP_LABEL(kmp_1):
blr x8
orr w0, wzr, #1
mov sp, x29
# if OMPT_SUPPORT
str xzr, [x19]
ldp x19, x20, [sp], #16
# endif
ldp x29, x30, [sp], #16
ret
DEBUG_INFO __kmp_invoke_microtask
// -- End __kmp_invoke_microtask
#endif /* KMP_OS_LINUX && KMP_ARCH_AARCH64 */
#if KMP_ARCH_ARM
.data
.comm .gomp_critical_user_,32,8

View File

@ -518,7 +518,7 @@ __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d )
return old_value;
}
# if KMP_ARCH_X86 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64
# if KMP_ARCH_X86 || KMP_ARCH_PPC64 || (KMP_OS_LINUX && KMP_ARCH_AARCH64)
kmp_int8
__kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 d )
{
@ -552,7 +552,7 @@ __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d )
}
return old_value;
}
# endif /* KMP_ARCH_X86 */
# endif /* KMP_ARCH_X86 || KMP_ARCH_PPC64 || (KMP_OS_LINUX && KMP_ARCH_AARCH64) */
kmp_int64
__kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d )
@ -2574,7 +2574,7 @@ __kmp_get_load_balance( int max )
#endif // USE_LOAD_BALANCE
#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC)
#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX && KMP_ARCH_AARCH64))
// we really only need the case with 1 argument, because CLANG always build
// a struct of pointers to shared variables referenced in the outlined function