[ARM] ACLE parallel arithmetic and DSP style multiplications

This is a follow up of r302131, in which we forgot to add SemaChecking
tests. Adding these tests revealed two problems which have been fixed:
- added missing intrinsic __qdbl,
- properly range checking ssat16 and usat16.

Differential Revision: https://reviews.llvm.org/D40888

llvm-svn: 320019
This commit is contained in:
Sjoerd Meijer 2017-12-07 09:54:39 +00:00
parent d590c85753
commit 293da70b83
3 changed files with 198 additions and 8 deletions

View File

@ -36,6 +36,7 @@ BUILTIN(__builtin_arm_smulwt, "iii", "nc")
// Saturating arithmetic // Saturating arithmetic
BUILTIN(__builtin_arm_qadd, "iii", "nc") BUILTIN(__builtin_arm_qadd, "iii", "nc")
BUILTIN(__builtin_arm_qsub, "iii", "nc") BUILTIN(__builtin_arm_qsub, "iii", "nc")
BUILTIN(__builtin_arm_qdbl, "ii", "nc")
BUILTIN(__builtin_arm_ssat, "iiUi", "nc") BUILTIN(__builtin_arm_ssat, "iiUi", "nc")
BUILTIN(__builtin_arm_usat, "UiiUi", "nc") BUILTIN(__builtin_arm_usat, "UiiUi", "nc")

View File

@ -1554,21 +1554,26 @@ bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
// For intrinsics which take an immediate value as part of the instruction, // For intrinsics which take an immediate value as part of the instruction,
// range check them here. // range check them here.
unsigned i = 0, l = 0, u = 0; // FIXME: VFP Intrinsics should error if VFP not present.
switch (BuiltinID) { switch (BuiltinID) {
default: return false; default: return false;
case ARM::BI__builtin_arm_ssat: i = 1; l = 1; u = 31; break; case ARM::BI__builtin_arm_ssat:
case ARM::BI__builtin_arm_usat: i = 1; u = 31; break; return SemaBuiltinConstantArgRange(TheCall, 1, 1, 32);
case ARM::BI__builtin_arm_usat:
return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31);
case ARM::BI__builtin_arm_ssat16:
return SemaBuiltinConstantArgRange(TheCall, 1, 1, 16);
case ARM::BI__builtin_arm_usat16:
return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15);
case ARM::BI__builtin_arm_vcvtr_f: case ARM::BI__builtin_arm_vcvtr_f:
case ARM::BI__builtin_arm_vcvtr_d: i = 1; u = 1; break; case ARM::BI__builtin_arm_vcvtr_d:
return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1);
case ARM::BI__builtin_arm_dmb: case ARM::BI__builtin_arm_dmb:
case ARM::BI__builtin_arm_dsb: case ARM::BI__builtin_arm_dsb:
case ARM::BI__builtin_arm_isb: case ARM::BI__builtin_arm_isb:
case ARM::BI__builtin_arm_dbg: l = 0; u = 15; break; case ARM::BI__builtin_arm_dbg:
return SemaBuiltinConstantArgRange(TheCall, 0, 0, 15);
} }
// FIXME: VFP Intrinsics should error if VFP not present.
return SemaBuiltinConstantArgRange(TheCall, i, l, u + l);
} }
bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID, bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID,

View File

@ -2,6 +2,8 @@
// RUN: %clang_cc1 -triple armv7 -target-abi apcs-gnu \ // RUN: %clang_cc1 -triple armv7 -target-abi apcs-gnu \
// RUN: -fsyntax-only -verify %s // RUN: -fsyntax-only -verify %s
#include <arm_acle.h>
void f(void *a, void *b) { void f(void *a, void *b) {
__clear_cache(); // expected-error {{too few arguments to function call, expected 2, have 0}} // expected-note {{'__clear_cache' is a builtin with type 'void (void *, void *)}} __clear_cache(); // expected-error {{too few arguments to function call, expected 2, have 0}} // expected-note {{'__clear_cache' is a builtin with type 'void (void *, void *)}}
__clear_cache(a); // expected-error {{too few arguments to function call, expected 2, have 1}} __clear_cache(a); // expected-error {{too few arguments to function call, expected 2, have 1}}
@ -136,3 +138,185 @@ void test6(int a, int b, int c) {
__builtin_arm_mrrc2(15, a, 0); // expected-error {{argument to '__builtin_arm_mrrc2' must be a constant integer}} __builtin_arm_mrrc2(15, a, 0); // expected-error {{argument to '__builtin_arm_mrrc2' must be a constant integer}}
__builtin_arm_mrrc2(15, 0, a); // expected-error {{argument to '__builtin_arm_mrrc2' must be a constant integer}} __builtin_arm_mrrc2(15, 0, a); // expected-error {{argument to '__builtin_arm_mrrc2' must be a constant integer}}
} }
void test_9_3_multiplications(int a, int b) {
int r;
r = __builtin_arm_smulbb(a, b);
r = __builtin_arm_smulbb(1, -9);
r = __builtin_arm_smulbt(a, b);
r = __builtin_arm_smulbt(0, b);
r = __builtin_arm_smultb(a, b);
r = __builtin_arm_smultb(5, b);
r = __builtin_arm_smultt(a, b);
r = __builtin_arm_smultt(a, -1);
r = __builtin_arm_smulwb(a, b);
r = __builtin_arm_smulwb(1, 2);
r = __builtin_arm_smulwt(a, b);
r = __builtin_arm_smulwt(-1, -2);
r = __builtin_arm_smulwt(-1.0f, -2);
}
void test_9_4_1_width_specified_saturation(int a, int b) {
unsigned u;
int s;
s = __builtin_arm_ssat(8, 2);
s = __builtin_arm_ssat(a, 1);
s = __builtin_arm_ssat(a, 32);
s = __builtin_arm_ssat(a, 0); // expected-error {{argument should be a value from 1 to 32}}
s = __builtin_arm_ssat(a, 33); // expected-error {{argument should be a value from 1 to 32}}
s = __builtin_arm_ssat(a, b); // expected-error {{argument to '__builtin_arm_ssat' must be a constant integer}}
u = __builtin_arm_usat(8, 2);
u = __builtin_arm_usat(a, 0);
u = __builtin_arm_usat(a, 31);
u = __builtin_arm_usat(a, 32); // expected-error {{argument should be a value from 0 to 31}}
u = __builtin_arm_usat(a, b); // expected-error {{argument to '__builtin_arm_usat' must be a constant integer}}
}
void test_9_4_2_saturating_addition_subtraction(int a, int b) {
int s;
s = __builtin_arm_qadd(a, b);
s = __builtin_arm_qadd(-1, 0);
s = __builtin_arm_qsub(a, b);
s = __builtin_arm_qsub(0, -1);
s = __builtin_arm_qdbl(a);
}
void test_9_4_3_accumulating_multiplications(int a, int b, int c) {
int s;
s = __builtin_arm_smlabb(a, b, c);
s = __builtin_arm_smlabb(1, b, c);
s = __builtin_arm_smlabb(a, 2, c);
s = __builtin_arm_smlabb(a, b, -3);
s = __builtin_arm_smlabt(a, b, c);
s = __builtin_arm_smlabt(1, b, c);
s = __builtin_arm_smlabt(a, 2, c);
s = __builtin_arm_smlabt(a, b, -3);
s = __builtin_arm_smlatb(a, b, c);
s = __builtin_arm_smlatt(1, b, c);
s = __builtin_arm_smlawb(a, 2, c);
s = __builtin_arm_smlawt(a, b, -3);
}
void test_9_5_4_parallel_16bit_saturation(int16x2_t a) {
unsigned u;
int s;
s = __builtin_arm_ssat16(a, 1);
s = __builtin_arm_ssat16(a, 16);
s = __builtin_arm_ssat16(a, 0); // expected-error {{argument should be a value from 1 to 16}}
s = __builtin_arm_ssat16(a, 17); // expected-error {{argument should be a value from 1 to 16}}
u = __builtin_arm_usat16(a, 0);
u = __builtin_arm_usat16(a, 15);
u = __builtin_arm_usat16(a, 16); // expected-error {{argument should be a value from 0 to 15}}
}
void test_9_5_5_packing_and_unpacking(int16x2_t a, int8x4_t b, uint16x2_t c, uint8x4_t d) {
int16x2_t x;
uint16x2_t y;
x = __builtin_arm_sxtab16(a, b);
x = __builtin_arm_sxtab16(1, -1);
x = __builtin_arm_sxtb16(b);
x = __builtin_arm_sxtb16(-b);
y = __builtin_arm_uxtab16(c, d);
y = __builtin_arm_uxtab16(-1, -2);
y = __builtin_arm_uxtb16(d);
y = __builtin_arm_uxtb16(-1);
}
uint8x4_t
test_9_5_6_parallel_selection(uint8x4_t a, uint8x4_t b) {
return __builtin_arm_sel(a, b);
}
void test_9_5_7_parallel_8bit_addition_substraction(int8x4_t a, int8x4_t b,
uint8x4_t c, uint8x4_t d) {
int8x4_t s;
uint8x4_t u;
s = __builtin_arm_qadd8(a, b);
s = __builtin_arm_qsub8(a, b);
s = __builtin_arm_sadd8(a, b);
s = __builtin_arm_shadd8(a, b);
s = __builtin_arm_shsub8(a, b);
s = __builtin_arm_ssub8(a, b);
u = __builtin_arm_uadd8(c, d);
u = __builtin_arm_uhadd8(c, d);
u = __builtin_arm_uhsub8(c, d);
u = __builtin_arm_uqadd8(c, d);
u = __builtin_arm_uqsub8(c, d);
u = __builtin_arm_usub8(c, d);
}
void test_9_5_8_absolute_differences(uint8x4_t a, uint8x4_t b, uint32_t c) {
uint32_t r;
r = __builtin_arm_usad8(a, b);
r = __builtin_arm_usada8(a, b, c);
}
void test_9_5_9_parallel_addition_and_subtraction(int16x2_t a, int16x2_t b,
uint16x2_t c, uint16x2_t d) {
int16x2_t x;
uint16x2_t y;
x = __builtin_arm_qadd16(a, b);
x = __builtin_arm_qasx(a, b);
x = __builtin_arm_qsax(a, b);
x = __builtin_arm_qsub16(a, b);
x = __builtin_arm_sadd16(a, b);
x = __builtin_arm_sasx(a, b);
x = __builtin_arm_shadd16(a, b);
x = __builtin_arm_shasx(a, b);
x = __builtin_arm_shsax(a, b);
x = __builtin_arm_shsub16(a, b);
x = __builtin_arm_ssax(a, b);
x = __builtin_arm_ssub16(a, b);
y = __builtin_arm_uadd16(c, d);
y = __builtin_arm_uasx(c, d);
y = __builtin_arm_uhadd16(c, d);
y = __builtin_arm_uhasx(c, d);
y = __builtin_arm_uhsax(c, d);
y = __builtin_arm_uhsub16(c, d);
y = __builtin_arm_uqadd16(c, d);
y = __builtin_arm_uqasx(c, d);
y = __builtin_arm_uqsax(c, d);
y = __builtin_arm_uqsub16(c, d);
y = __builtin_arm_usax(c, d);
y = __builtin_arm_usub16(c, d);
}
void test_9_5_10_parallel_16bit_multiplication(int16x2_t a, int16x2_t b,
int32_t c, int64_t d) {
int32_t x;
int64_t y;
x = __builtin_arm_smlad(a, b, c);
x = __builtin_arm_smladx(a, b, c);
y = __builtin_arm_smlald(a, b, d);
y = __builtin_arm_smlaldx(a, b, d);
x = __builtin_arm_smlsd(a, b, c);
x = __builtin_arm_smlsdx(a, b, c);
y = __builtin_arm_smlsld(a, b, d);
y = __builtin_arm_smlsldx(a, b, d);
x = __builtin_arm_smuad(a, b);
x = __builtin_arm_smuadx(a, b);
x = __builtin_arm_smusd(a, b);
x = __builtin_arm_smusdx(a, b);
}