From 293da70b831f635be52ccf7fc79fb4c483623046 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Thu, 7 Dec 2017 09:54:39 +0000 Subject: [PATCH] [ARM] ACLE parallel arithmetic and DSP style multiplications This is a follow up of r302131, in which we forgot to add SemaChecking tests. Adding these tests revealed two problems which have been fixed: - added missing intrinsic __qdbl, - properly range checking ssat16 and usat16. Differential Revision: https://reviews.llvm.org/D40888 llvm-svn: 320019 --- clang/include/clang/Basic/BuiltinsARM.def | 1 + clang/lib/Sema/SemaChecking.cpp | 21 ++- clang/test/Sema/builtins-arm.c | 184 ++++++++++++++++++++++ 3 files changed, 198 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsARM.def b/clang/include/clang/Basic/BuiltinsARM.def index 4e277f8a5a6b..941d320d729f 100644 --- a/clang/include/clang/Basic/BuiltinsARM.def +++ b/clang/include/clang/Basic/BuiltinsARM.def @@ -36,6 +36,7 @@ BUILTIN(__builtin_arm_smulwt, "iii", "nc") // Saturating arithmetic BUILTIN(__builtin_arm_qadd, "iii", "nc") BUILTIN(__builtin_arm_qsub, "iii", "nc") +BUILTIN(__builtin_arm_qdbl, "ii", "nc") BUILTIN(__builtin_arm_ssat, "iiUi", "nc") BUILTIN(__builtin_arm_usat, "UiiUi", "nc") diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 6dec8d173013..d0017dabbf5f 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1554,21 +1554,26 @@ bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { // For intrinsics which take an immediate value as part of the instruction, // range check them here. - unsigned i = 0, l = 0, u = 0; + // FIXME: VFP Intrinsics should error if VFP not present. switch (BuiltinID) { default: return false; - case ARM::BI__builtin_arm_ssat: i = 1; l = 1; u = 31; break; - case ARM::BI__builtin_arm_usat: i = 1; u = 31; break; + case ARM::BI__builtin_arm_ssat: + return SemaBuiltinConstantArgRange(TheCall, 1, 1, 32); + case ARM::BI__builtin_arm_usat: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + case ARM::BI__builtin_arm_ssat16: + return SemaBuiltinConstantArgRange(TheCall, 1, 1, 16); + case ARM::BI__builtin_arm_usat16: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); case ARM::BI__builtin_arm_vcvtr_f: - case ARM::BI__builtin_arm_vcvtr_d: i = 1; u = 1; break; + case ARM::BI__builtin_arm_vcvtr_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); case ARM::BI__builtin_arm_dmb: case ARM::BI__builtin_arm_dsb: case ARM::BI__builtin_arm_isb: - case ARM::BI__builtin_arm_dbg: l = 0; u = 15; break; + case ARM::BI__builtin_arm_dbg: + return SemaBuiltinConstantArgRange(TheCall, 0, 0, 15); } - - // FIXME: VFP Intrinsics should error if VFP not present. - return SemaBuiltinConstantArgRange(TheCall, i, l, u + l); } bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID, diff --git a/clang/test/Sema/builtins-arm.c b/clang/test/Sema/builtins-arm.c index 668b8284ffeb..373bbae31e7f 100644 --- a/clang/test/Sema/builtins-arm.c +++ b/clang/test/Sema/builtins-arm.c @@ -2,6 +2,8 @@ // RUN: %clang_cc1 -triple armv7 -target-abi apcs-gnu \ // RUN: -fsyntax-only -verify %s +#include + void f(void *a, void *b) { __clear_cache(); // expected-error {{too few arguments to function call, expected 2, have 0}} // expected-note {{'__clear_cache' is a builtin with type 'void (void *, void *)}} __clear_cache(a); // expected-error {{too few arguments to function call, expected 2, have 1}} @@ -136,3 +138,185 @@ void test6(int a, int b, int c) { __builtin_arm_mrrc2(15, a, 0); // expected-error {{argument to '__builtin_arm_mrrc2' must be a constant integer}} __builtin_arm_mrrc2(15, 0, a); // expected-error {{argument to '__builtin_arm_mrrc2' must be a constant integer}} } + +void test_9_3_multiplications(int a, int b) { + int r; + r = __builtin_arm_smulbb(a, b); + r = __builtin_arm_smulbb(1, -9); + + r = __builtin_arm_smulbt(a, b); + r = __builtin_arm_smulbt(0, b); + + r = __builtin_arm_smultb(a, b); + r = __builtin_arm_smultb(5, b); + + r = __builtin_arm_smultt(a, b); + r = __builtin_arm_smultt(a, -1); + + r = __builtin_arm_smulwb(a, b); + r = __builtin_arm_smulwb(1, 2); + + r = __builtin_arm_smulwt(a, b); + r = __builtin_arm_smulwt(-1, -2); + r = __builtin_arm_smulwt(-1.0f, -2); +} + +void test_9_4_1_width_specified_saturation(int a, int b) { + unsigned u; + int s; + + s = __builtin_arm_ssat(8, 2); + s = __builtin_arm_ssat(a, 1); + s = __builtin_arm_ssat(a, 32); + s = __builtin_arm_ssat(a, 0); // expected-error {{argument should be a value from 1 to 32}} + s = __builtin_arm_ssat(a, 33); // expected-error {{argument should be a value from 1 to 32}} + s = __builtin_arm_ssat(a, b); // expected-error {{argument to '__builtin_arm_ssat' must be a constant integer}} + + u = __builtin_arm_usat(8, 2); + u = __builtin_arm_usat(a, 0); + u = __builtin_arm_usat(a, 31); + u = __builtin_arm_usat(a, 32); // expected-error {{argument should be a value from 0 to 31}} + u = __builtin_arm_usat(a, b); // expected-error {{argument to '__builtin_arm_usat' must be a constant integer}} +} + +void test_9_4_2_saturating_addition_subtraction(int a, int b) { + int s; + s = __builtin_arm_qadd(a, b); + s = __builtin_arm_qadd(-1, 0); + + s = __builtin_arm_qsub(a, b); + s = __builtin_arm_qsub(0, -1); + + s = __builtin_arm_qdbl(a); +} + +void test_9_4_3_accumulating_multiplications(int a, int b, int c) { + int s; + + s = __builtin_arm_smlabb(a, b, c); + s = __builtin_arm_smlabb(1, b, c); + s = __builtin_arm_smlabb(a, 2, c); + s = __builtin_arm_smlabb(a, b, -3); + + s = __builtin_arm_smlabt(a, b, c); + s = __builtin_arm_smlabt(1, b, c); + s = __builtin_arm_smlabt(a, 2, c); + s = __builtin_arm_smlabt(a, b, -3); + + s = __builtin_arm_smlatb(a, b, c); + s = __builtin_arm_smlatt(1, b, c); + s = __builtin_arm_smlawb(a, 2, c); + s = __builtin_arm_smlawt(a, b, -3); +} + +void test_9_5_4_parallel_16bit_saturation(int16x2_t a) { + unsigned u; + int s; + + s = __builtin_arm_ssat16(a, 1); + s = __builtin_arm_ssat16(a, 16); + s = __builtin_arm_ssat16(a, 0); // expected-error {{argument should be a value from 1 to 16}} + s = __builtin_arm_ssat16(a, 17); // expected-error {{argument should be a value from 1 to 16}} + + u = __builtin_arm_usat16(a, 0); + u = __builtin_arm_usat16(a, 15); + u = __builtin_arm_usat16(a, 16); // expected-error {{argument should be a value from 0 to 15}} +} + +void test_9_5_5_packing_and_unpacking(int16x2_t a, int8x4_t b, uint16x2_t c, uint8x4_t d) { + int16x2_t x; + uint16x2_t y; + + x = __builtin_arm_sxtab16(a, b); + x = __builtin_arm_sxtab16(1, -1); + x = __builtin_arm_sxtb16(b); + x = __builtin_arm_sxtb16(-b); + + y = __builtin_arm_uxtab16(c, d); + y = __builtin_arm_uxtab16(-1, -2); + y = __builtin_arm_uxtb16(d); + y = __builtin_arm_uxtb16(-1); +} + +uint8x4_t +test_9_5_6_parallel_selection(uint8x4_t a, uint8x4_t b) { + return __builtin_arm_sel(a, b); +} + +void test_9_5_7_parallel_8bit_addition_substraction(int8x4_t a, int8x4_t b, + uint8x4_t c, uint8x4_t d) { + int8x4_t s; + uint8x4_t u; + + s = __builtin_arm_qadd8(a, b); + s = __builtin_arm_qsub8(a, b); + s = __builtin_arm_sadd8(a, b); + s = __builtin_arm_shadd8(a, b); + s = __builtin_arm_shsub8(a, b); + s = __builtin_arm_ssub8(a, b); + + u = __builtin_arm_uadd8(c, d); + u = __builtin_arm_uhadd8(c, d); + u = __builtin_arm_uhsub8(c, d); + u = __builtin_arm_uqadd8(c, d); + u = __builtin_arm_uqsub8(c, d); + u = __builtin_arm_usub8(c, d); +} + +void test_9_5_8_absolute_differences(uint8x4_t a, uint8x4_t b, uint32_t c) { + uint32_t r; + + r = __builtin_arm_usad8(a, b); + r = __builtin_arm_usada8(a, b, c); +} + +void test_9_5_9_parallel_addition_and_subtraction(int16x2_t a, int16x2_t b, + uint16x2_t c, uint16x2_t d) { + int16x2_t x; + uint16x2_t y; + + x = __builtin_arm_qadd16(a, b); + x = __builtin_arm_qasx(a, b); + x = __builtin_arm_qsax(a, b); + x = __builtin_arm_qsub16(a, b); + x = __builtin_arm_sadd16(a, b); + x = __builtin_arm_sasx(a, b); + x = __builtin_arm_shadd16(a, b); + x = __builtin_arm_shasx(a, b); + x = __builtin_arm_shsax(a, b); + x = __builtin_arm_shsub16(a, b); + x = __builtin_arm_ssax(a, b); + x = __builtin_arm_ssub16(a, b); + + y = __builtin_arm_uadd16(c, d); + y = __builtin_arm_uasx(c, d); + y = __builtin_arm_uhadd16(c, d); + y = __builtin_arm_uhasx(c, d); + y = __builtin_arm_uhsax(c, d); + y = __builtin_arm_uhsub16(c, d); + y = __builtin_arm_uqadd16(c, d); + y = __builtin_arm_uqasx(c, d); + y = __builtin_arm_uqsax(c, d); + y = __builtin_arm_uqsub16(c, d); + y = __builtin_arm_usax(c, d); + y = __builtin_arm_usub16(c, d); +} + +void test_9_5_10_parallel_16bit_multiplication(int16x2_t a, int16x2_t b, + int32_t c, int64_t d) { + int32_t x; + int64_t y; + + x = __builtin_arm_smlad(a, b, c); + x = __builtin_arm_smladx(a, b, c); + y = __builtin_arm_smlald(a, b, d); + y = __builtin_arm_smlaldx(a, b, d); + x = __builtin_arm_smlsd(a, b, c); + x = __builtin_arm_smlsdx(a, b, c); + y = __builtin_arm_smlsld(a, b, d); + y = __builtin_arm_smlsldx(a, b, d); + x = __builtin_arm_smuad(a, b); + x = __builtin_arm_smuadx(a, b); + x = __builtin_arm_smusd(a, b); + x = __builtin_arm_smusdx(a, b); +}