llvm-project/clang/test/CodeGen/arm-mve-intrinsics/vadc.c

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s

#include <arm_mve.h>

// CHECK-LABEL: @test_vadciq_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 29
// CHECK-NEXT:    [[TMP3:%.*]] = and i32 1, [[TMP2]]
// CHECK-NEXT:    store i32 [[TMP3]], i32* [[CARRY_OUT:%.*]], align 4
// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
//
int32x4_t test_vadciq_s32(int32x4_t a, int32x4_t b, unsigned *carry_out)
{
#ifdef POLYMORPHIC
    return vadciq(a, b, carry_out);
#else /* POLYMORPHIC */
    return vadciq_s32(a, b, carry_out);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vadcq_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CARRY:%.*]], align 4
// CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[TMP0]], 29
// CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]])
// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
// CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
// CHECK-NEXT:    [[TMP5:%.*]] = and i32 1, [[TMP4]]
// CHECK-NEXT:    store i32 [[TMP5]], i32* [[CARRY]], align 4
// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
// CHECK-NEXT:    ret <4 x i32> [[TMP6]]
//
uint32x4_t test_vadcq_u32(uint32x4_t a, uint32x4_t b, unsigned *carry)
{
#ifdef POLYMORPHIC
    return vadcq(a, b, carry);
#else /* POLYMORPHIC */
    return vadcq_u32(a, b, carry);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vadciq_m_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
// CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
// CHECK-NEXT:    [[TMP5:%.*]] = and i32 1, [[TMP4]]
// CHECK-NEXT:    store i32 [[TMP5]], i32* [[CARRY_OUT:%.*]], align 4
// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
// CHECK-NEXT:    ret <4 x i32> [[TMP6]]
//
uint32x4_t test_vadciq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry_out, mve_pred16_t p)
{
#ifdef POLYMORPHIC
    return vadciq_m(inactive, a, b, carry_out, p);
#else /* POLYMORPHIC */
    return vadciq_m_u32(inactive, a, b, carry_out, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vadcq_m_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CARRY:%.*]], align 4
// CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[TMP0]], 29
// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
// CHECK-NEXT:    [[TMP4:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]], <4 x i1> [[TMP3]])
// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 1
// CHECK-NEXT:    [[TMP6:%.*]] = lshr i32 [[TMP5]], 29
// CHECK-NEXT:    [[TMP7:%.*]] = and i32 1, [[TMP6]]
// CHECK-NEXT:    store i32 [[TMP7]], i32* [[CARRY]], align 4
// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 0
// CHECK-NEXT:    ret <4 x i32> [[TMP8]]
//
int32x4_t test_vadcq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry, mve_pred16_t p)
{
#ifdef POLYMORPHIC
    return vadcq_m(inactive, a, b, carry, p);
#else /* POLYMORPHIC */
    return vadcq_m_s32(inactive, a, b, carry, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbciq_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 29
// CHECK-NEXT:    [[TMP3:%.*]] = and i32 1, [[TMP2]]
// CHECK-NEXT:    store i32 [[TMP3]], i32* [[CARRY_OUT:%.*]], align 4
// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
//
int32x4_t test_vsbciq_s32(int32x4_t a, int32x4_t b, unsigned *carry_out) {
#ifdef POLYMORPHIC
  return vsbciq(a, b, carry_out);
#else  /* POLYMORPHIC */
  return vsbciq_s32(a, b, carry_out);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbciq_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1
// CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 29
// CHECK-NEXT:    [[TMP3:%.*]] = and i32 1, [[TMP2]]
// CHECK-NEXT:    store i32 [[TMP3]], i32* [[CARRY_OUT:%.*]], align 4
// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
//
uint32x4_t test_vsbciq_u32(uint32x4_t a, uint32x4_t b, unsigned *carry_out) {
#ifdef POLYMORPHIC
  return vsbciq(a, b, carry_out);
#else  /* POLYMORPHIC */
  return vsbciq_u32(a, b, carry_out);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbcq_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CARRY:%.*]], align 4
// CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[TMP0]], 29
// CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]])
// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
// CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
// CHECK-NEXT:    [[TMP5:%.*]] = and i32 1, [[TMP4]]
// CHECK-NEXT:    store i32 [[TMP5]], i32* [[CARRY]], align 4
// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
// CHECK-NEXT:    ret <4 x i32> [[TMP6]]
//
int32x4_t test_vsbcq_s32(int32x4_t a, int32x4_t b, unsigned *carry) {
#ifdef POLYMORPHIC
  return vsbcq(a, b, carry);
#else  /* POLYMORPHIC */
  return vsbcq_s32(a, b, carry);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbcq_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CARRY:%.*]], align 4
// CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[TMP0]], 29
// CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]])
// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
// CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
// CHECK-NEXT:    [[TMP5:%.*]] = and i32 1, [[TMP4]]
// CHECK-NEXT:    store i32 [[TMP5]], i32* [[CARRY]], align 4
// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
// CHECK-NEXT:    ret <4 x i32> [[TMP6]]
//
uint32x4_t test_vsbcq_u32(uint32x4_t a, uint32x4_t b, unsigned *carry) {
#ifdef POLYMORPHIC
  return vsbcq(a, b, carry);
#else  /* POLYMORPHIC */
  return vsbcq_u32(a, b, carry);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbciq_m_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
// CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
// CHECK-NEXT:    [[TMP5:%.*]] = and i32 1, [[TMP4]]
// CHECK-NEXT:    store i32 [[TMP5]], i32* [[CARRY_OUT:%.*]], align 4
// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
// CHECK-NEXT:    ret <4 x i32> [[TMP6]]
//
int32x4_t test_vsbciq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry_out, mve_pred16_t p) {
#ifdef POLYMORPHIC
  return vsbciq_m(inactive, a, b, carry_out, p);
#else  /* POLYMORPHIC */
  return vsbciq_m_s32(inactive, a, b, carry_out, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbciq_m_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
// CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
// CHECK-NEXT:    [[TMP5:%.*]] = and i32 1, [[TMP4]]
// CHECK-NEXT:    store i32 [[TMP5]], i32* [[CARRY_OUT:%.*]], align 4
// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
// CHECK-NEXT:    ret <4 x i32> [[TMP6]]
//
uint32x4_t test_vsbciq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry_out, mve_pred16_t p) {
#ifdef POLYMORPHIC
  return vsbciq_m(inactive, a, b, carry_out, p);
#else  /* POLYMORPHIC */
  return vsbciq_m_u32(inactive, a, b, carry_out, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbcq_m_s32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CARRY:%.*]], align 4
// CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[TMP0]], 29
// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
// CHECK-NEXT:    [[TMP4:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]], <4 x i1> [[TMP3]])
// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 1
// CHECK-NEXT:    [[TMP6:%.*]] = lshr i32 [[TMP5]], 29
// CHECK-NEXT:    [[TMP7:%.*]] = and i32 1, [[TMP6]]
// CHECK-NEXT:    store i32 [[TMP7]], i32* [[CARRY]], align 4
// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 0
// CHECK-NEXT:    ret <4 x i32> [[TMP8]]
//
int32x4_t test_vsbcq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry, mve_pred16_t p) {
#ifdef POLYMORPHIC
  return vsbcq_m(inactive, a, b, carry, p);
#else  /* POLYMORPHIC */
  return vsbcq_m_s32(inactive, a, b, carry, p);
#endif /* POLYMORPHIC */
}

// CHECK-LABEL: @test_vsbcq_m_u32(
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CARRY:%.*]], align 4
// CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[TMP0]], 29
// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
// CHECK-NEXT:    [[TMP4:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]], <4 x i1> [[TMP3]])
// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 1
// CHECK-NEXT:    [[TMP6:%.*]] = lshr i32 [[TMP5]], 29
// CHECK-NEXT:    [[TMP7:%.*]] = and i32 1, [[TMP6]]
// CHECK-NEXT:    store i32 [[TMP7]], i32* [[CARRY]], align 4
// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 0
// CHECK-NEXT:    ret <4 x i32> [[TMP8]]
//
uint32x4_t test_vsbcq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry, mve_pred16_t p) {
#ifdef POLYMORPHIC
  return vsbcq_m(inactive, a, b, carry, p);
#else  /* POLYMORPHIC */
  return vsbcq_m_u32(inactive, a, b, carry, p);
#endif /* POLYMORPHIC */
}
[clang,ARM] Initial ACLE intrinsics for MVE. This commit sets up the infrastructure for auto-generating <arm_mve.h> and doing clang-side code generation for the builtins it relies on, and demonstrates that it works by implementing a representative sample of the ACLE intrinsics, more or less matching the ones introduced in LLVM IR by D67158,D68699,D68700. Like NEON, that header file will provide a set of vector types like uint16x8_t and C functions with names like vaddq_u32(). Unlike NEON, the ACLE spec for <arm_mve.h> includes a polymorphism system, so that you can write plain vaddq() and disambiguate by the vector types you pass to it. Unlike the corresponding NEON code, I've arranged to make every user- facing ACLE intrinsic into a clang builtin, and implement all the code generation inside clang. So <arm_mve.h> itself contains nothing but typedefs and function declarations, with the latter all using the new `__attribute__((__clang_builtin))` system to arrange that the user- facing function names correspond to the right internal BuiltinIDs. So the new MveEmitter tablegen system specifies the full sequence of IRBuilder operations that each user-facing ACLE intrinsic should translate into. Where possible, the ACLE intrinsics map to standard IR operations such as vector-typed `add` and `fadd`; where no standard representation exists, I call down to the sample IR intrinsics introduced in an earlier commit. Doing it like this means that you get the polymorphism for free just by using __attribute__((overloadable)): the clang overload resolution decides which function declaration is the relevant one, and _then_ its BuiltinID is looked up, so by the time we're doing code generation, that's all been resolved by the standard system. It also means that you get really nice error messages if the user passes the wrong combination of types: clang will show the declarations from the header file and explain why each one doesn't match. (The obvious alternative approach would be to have wrapper functions in <arm_mve.h> which pass their arguments to the underlying builtins. But that doesn't work in the case where one of the arguments has to be a constant integer: the wrapper function can't pass the constantness through. So you'd have to do that case using a macro instead, and then use C11 `_Generic` to handle the polymorphism. Then you have to add horrible workarounds because `_Generic` requires even the untaken branches to type-check successfully, and //then// if the user gets the types wrong, the error message is totally unreadable!) Reviewers: dmgreen, miyuki, ostannard Subscribers: mgorny, javed.absar, kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D67161 2019-09-02 22:50:50 +08:00			`// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py`
[ARM] Replace arm vendor with none. NFC 2020-04-22 23:33:11 +08:00			`// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s \| opt -S -mem2reg \| FileCheck %s`
			`// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s \| opt -S -mem2reg \| FileCheck %s`
[clang,ARM] Initial ACLE intrinsics for MVE. This commit sets up the infrastructure for auto-generating <arm_mve.h> and doing clang-side code generation for the builtins it relies on, and demonstrates that it works by implementing a representative sample of the ACLE intrinsics, more or less matching the ones introduced in LLVM IR by D67158,D68699,D68700. Like NEON, that header file will provide a set of vector types like uint16x8_t and C functions with names like vaddq_u32(). Unlike NEON, the ACLE spec for <arm_mve.h> includes a polymorphism system, so that you can write plain vaddq() and disambiguate by the vector types you pass to it. Unlike the corresponding NEON code, I've arranged to make every user- facing ACLE intrinsic into a clang builtin, and implement all the code generation inside clang. So <arm_mve.h> itself contains nothing but typedefs and function declarations, with the latter all using the new `__attribute__((__clang_builtin))` system to arrange that the user- facing function names correspond to the right internal BuiltinIDs. So the new MveEmitter tablegen system specifies the full sequence of IRBuilder operations that each user-facing ACLE intrinsic should translate into. Where possible, the ACLE intrinsics map to standard IR operations such as vector-typed `add` and `fadd`; where no standard representation exists, I call down to the sample IR intrinsics introduced in an earlier commit. Doing it like this means that you get the polymorphism for free just by using __attribute__((overloadable)): the clang overload resolution decides which function declaration is the relevant one, and _then_ its BuiltinID is looked up, so by the time we're doing code generation, that's all been resolved by the standard system. It also means that you get really nice error messages if the user passes the wrong combination of types: clang will show the declarations from the header file and explain why each one doesn't match. (The obvious alternative approach would be to have wrapper functions in <arm_mve.h> which pass their arguments to the underlying builtins. But that doesn't work in the case where one of the arguments has to be a constant integer: the wrapper function can't pass the constantness through. So you'd have to do that case using a macro instead, and then use C11 `_Generic` to handle the polymorphism. Then you have to add horrible workarounds because `_Generic` requires even the untaken branches to type-check successfully, and //then// if the user gets the types wrong, the error message is totally unreadable!) Reviewers: dmgreen, miyuki, ostannard Subscribers: mgorny, javed.absar, kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D67161 2019-09-02 22:50:50 +08:00
			`#include <arm_mve.h>`

			`// CHECK-LABEL: @test_vadciq_s32(`
			`// CHECK-NEXT: entry:`
			`// CHECK-NEXT: [[TMP0:%.]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> [[A:%.]], <4 x i32> [[B:%.*]], i32 0)`
			`// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1`
			`// CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 29`
			`// CHECK-NEXT: [[TMP3:%.*]] = and i32 1, [[TMP2]]`
			`// CHECK-NEXT: store i32 [[TMP3]], i32* [[CARRY_OUT:%.*]], align 4`
			`// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0`
			`// CHECK-NEXT: ret <4 x i32> [[TMP4]]`
			`//`
			`int32x4_t test_vadciq_s32(int32x4_t a, int32x4_t b, unsigned *carry_out)`
			`{`
			`#ifdef POLYMORPHIC`
			`return vadciq(a, b, carry_out);`
			`#else /* POLYMORPHIC */`
			`return vadciq_s32(a, b, carry_out);`
			`#endif /* POLYMORPHIC */`
			`}`

			`// CHECK-LABEL: @test_vadcq_u32(`
			`// CHECK-NEXT: entry:`
			`// CHECK-NEXT: [[TMP0:%.]] = load i32, i32 [[CARRY:%.*]], align 4`
			`// CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29`
			`// CHECK-NEXT: [[TMP2:%.]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> [[A:%.]], <4 x i32> [[B:%.*]], i32 [[TMP1]])`
			`// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1`
			`// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29`
			`// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]`
			`// CHECK-NEXT: store i32 [[TMP5]], i32* [[CARRY]], align 4`
			`// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0`
			`// CHECK-NEXT: ret <4 x i32> [[TMP6]]`
			`//`
			`uint32x4_t test_vadcq_u32(uint32x4_t a, uint32x4_t b, unsigned *carry)`
			`{`
			`#ifdef POLYMORPHIC`
			`return vadcq(a, b, carry);`
			`#else /* POLYMORPHIC */`
			`return vadcq_u32(a, b, carry);`
			`#endif /* POLYMORPHIC */`
			`}`

			`// CHECK-LABEL: @test_vadciq_m_u32(`
			`// CHECK-NEXT: entry:`
			`// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32`
			`// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])`
			`// CHECK-NEXT: [[TMP2:%.]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.]], <4 x i32> [[A:%.]], <4 x i32> [[B:%.]], i32 0, <4 x i1> [[TMP1]])`
			`// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1`
			`// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29`
			`// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]`
			`// CHECK-NEXT: store i32 [[TMP5]], i32* [[CARRY_OUT:%.*]], align 4`
			`// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0`
			`// CHECK-NEXT: ret <4 x i32> [[TMP6]]`
			`//`
			`uint32x4_t test_vadciq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry_out, mve_pred16_t p)`
			`{`
			`#ifdef POLYMORPHIC`
			`return vadciq_m(inactive, a, b, carry_out, p);`
			`#else /* POLYMORPHIC */`
			`return vadciq_m_u32(inactive, a, b, carry_out, p);`
			`#endif /* POLYMORPHIC */`
			`}`

			`// CHECK-LABEL: @test_vadcq_m_s32(`
			`// CHECK-NEXT: entry:`
			`// CHECK-NEXT: [[TMP0:%.]] = load i32, i32 [[CARRY:%.*]], align 4`
			`// CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29`
			`// CHECK-NEXT: [[TMP2:%.]] = zext i16 [[P:%.]] to i32`
			`// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])`
			`// CHECK-NEXT: [[TMP4:%.]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.]], <4 x i32> [[A:%.]], <4 x i32> [[B:%.]], i32 [[TMP1]], <4 x i1> [[TMP3]])`
			`// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 1`
			`// CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP5]], 29`
			`// CHECK-NEXT: [[TMP7:%.*]] = and i32 1, [[TMP6]]`
			`// CHECK-NEXT: store i32 [[TMP7]], i32* [[CARRY]], align 4`
			`// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 0`
			`// CHECK-NEXT: ret <4 x i32> [[TMP8]]`
			`//`
			`int32x4_t test_vadcq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry, mve_pred16_t p)`
			`{`
			`#ifdef POLYMORPHIC`
			`return vadcq_m(inactive, a, b, carry, p);`
			`#else /* POLYMORPHIC */`
			`return vadcq_m_s32(inactive, a, b, carry, p);`
			`#endif /* POLYMORPHIC */`
			`}`
[ARM,MVE] Add the `vsbciq` intrinsics. Summary: These are exactly parallel to the existing `vadciq` intrinsics, which we implemented last year as part of the original MVE intrinsics framework setup. Just like VADC/VADCI, the MVE VSBC/VSBCI instructions deliver two outputs, both of which the intrinsic exposes: a modified vector register and a carry flag. So they have to be instruction-selected in C++ rather than Tablegen. However, in this case, that's trivial: the same C++ isel routine we already have for VADC works unchanged, and all we have to do is to pass it a different instruction id. Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard Reviewed By: miyuki Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D75444 2020-03-04 01:37:05 +08:00
			`// CHECK-LABEL: @test_vsbciq_s32(`
			`// CHECK-NEXT: entry:`
			`// CHECK-NEXT: [[TMP0:%.]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.]], <4 x i32> [[B:%.*]], i32 0)`
			`// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1`
			`// CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 29`
			`// CHECK-NEXT: [[TMP3:%.*]] = and i32 1, [[TMP2]]`
			`// CHECK-NEXT: store i32 [[TMP3]], i32* [[CARRY_OUT:%.*]], align 4`
			`// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0`
			`// CHECK-NEXT: ret <4 x i32> [[TMP4]]`
			`//`
			`int32x4_t test_vsbciq_s32(int32x4_t a, int32x4_t b, unsigned *carry_out) {`
			`#ifdef POLYMORPHIC`
			`return vsbciq(a, b, carry_out);`
			`#else /* POLYMORPHIC */`
			`return vsbciq_s32(a, b, carry_out);`
			`#endif /* POLYMORPHIC */`
			`}`

			`// CHECK-LABEL: @test_vsbciq_u32(`
			`// CHECK-NEXT: entry:`
			`// CHECK-NEXT: [[TMP0:%.]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.]], <4 x i32> [[B:%.*]], i32 0)`
			`// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1`
			`// CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 29`
			`// CHECK-NEXT: [[TMP3:%.*]] = and i32 1, [[TMP2]]`
			`// CHECK-NEXT: store i32 [[TMP3]], i32* [[CARRY_OUT:%.*]], align 4`
			`// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0`
			`// CHECK-NEXT: ret <4 x i32> [[TMP4]]`
			`//`
			`uint32x4_t test_vsbciq_u32(uint32x4_t a, uint32x4_t b, unsigned *carry_out) {`
			`#ifdef POLYMORPHIC`
			`return vsbciq(a, b, carry_out);`
			`#else /* POLYMORPHIC */`
			`return vsbciq_u32(a, b, carry_out);`
			`#endif /* POLYMORPHIC */`
			`}`

			`// CHECK-LABEL: @test_vsbcq_s32(`
			`// CHECK-NEXT: entry:`
			`// CHECK-NEXT: [[TMP0:%.]] = load i32, i32 [[CARRY:%.*]], align 4`
			`// CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29`
			`// CHECK-NEXT: [[TMP2:%.]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.]], <4 x i32> [[B:%.*]], i32 [[TMP1]])`
			`// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1`
			`// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29`
			`// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]`
			`// CHECK-NEXT: store i32 [[TMP5]], i32* [[CARRY]], align 4`
			`// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0`
			`// CHECK-NEXT: ret <4 x i32> [[TMP6]]`
			`//`
			`int32x4_t test_vsbcq_s32(int32x4_t a, int32x4_t b, unsigned *carry) {`
			`#ifdef POLYMORPHIC`
			`return vsbcq(a, b, carry);`
			`#else /* POLYMORPHIC */`
			`return vsbcq_s32(a, b, carry);`
			`#endif /* POLYMORPHIC */`
			`}`

			`// CHECK-LABEL: @test_vsbcq_u32(`
			`// CHECK-NEXT: entry:`
			`// CHECK-NEXT: [[TMP0:%.]] = load i32, i32 [[CARRY:%.*]], align 4`
			`// CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29`
			`// CHECK-NEXT: [[TMP2:%.]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.]], <4 x i32> [[B:%.*]], i32 [[TMP1]])`
			`// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1`
			`// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29`
			`// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]`
			`// CHECK-NEXT: store i32 [[TMP5]], i32* [[CARRY]], align 4`
			`// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0`
			`// CHECK-NEXT: ret <4 x i32> [[TMP6]]`
			`//`
			`uint32x4_t test_vsbcq_u32(uint32x4_t a, uint32x4_t b, unsigned *carry) {`
			`#ifdef POLYMORPHIC`
			`return vsbcq(a, b, carry);`
			`#else /* POLYMORPHIC */`
			`return vsbcq_u32(a, b, carry);`
			`#endif /* POLYMORPHIC */`
			`}`

			`// CHECK-LABEL: @test_vsbciq_m_s32(`
			`// CHECK-NEXT: entry:`
			`// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32`
			`// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])`
			`// CHECK-NEXT: [[TMP2:%.]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.]], <4 x i32> [[A:%.]], <4 x i32> [[B:%.]], i32 0, <4 x i1> [[TMP1]])`
			`// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1`
			`// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29`
			`// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]`
			`// CHECK-NEXT: store i32 [[TMP5]], i32* [[CARRY_OUT:%.*]], align 4`
			`// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0`
			`// CHECK-NEXT: ret <4 x i32> [[TMP6]]`
			`//`
			`int32x4_t test_vsbciq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry_out, mve_pred16_t p) {`
			`#ifdef POLYMORPHIC`
			`return vsbciq_m(inactive, a, b, carry_out, p);`
			`#else /* POLYMORPHIC */`
			`return vsbciq_m_s32(inactive, a, b, carry_out, p);`
			`#endif /* POLYMORPHIC */`
			`}`

			`// CHECK-LABEL: @test_vsbciq_m_u32(`
			`// CHECK-NEXT: entry:`
			`// CHECK-NEXT: [[TMP0:%.]] = zext i16 [[P:%.]] to i32`
			`// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])`
			`// CHECK-NEXT: [[TMP2:%.]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.]], <4 x i32> [[A:%.]], <4 x i32> [[B:%.]], i32 0, <4 x i1> [[TMP1]])`
			`// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1`
			`// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29`
			`// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]`
			`// CHECK-NEXT: store i32 [[TMP5]], i32* [[CARRY_OUT:%.*]], align 4`
			`// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0`
			`// CHECK-NEXT: ret <4 x i32> [[TMP6]]`
			`//`
			`uint32x4_t test_vsbciq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry_out, mve_pred16_t p) {`
			`#ifdef POLYMORPHIC`
			`return vsbciq_m(inactive, a, b, carry_out, p);`
			`#else /* POLYMORPHIC */`
			`return vsbciq_m_u32(inactive, a, b, carry_out, p);`
			`#endif /* POLYMORPHIC */`
			`}`

			`// CHECK-LABEL: @test_vsbcq_m_s32(`
			`// CHECK-NEXT: entry:`
			`// CHECK-NEXT: [[TMP0:%.]] = load i32, i32 [[CARRY:%.*]], align 4`
			`// CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29`
			`// CHECK-NEXT: [[TMP2:%.]] = zext i16 [[P:%.]] to i32`
			`// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])`
			`// CHECK-NEXT: [[TMP4:%.]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.]], <4 x i32> [[A:%.]], <4 x i32> [[B:%.]], i32 [[TMP1]], <4 x i1> [[TMP3]])`
			`// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 1`
			`// CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP5]], 29`
			`// CHECK-NEXT: [[TMP7:%.*]] = and i32 1, [[TMP6]]`
			`// CHECK-NEXT: store i32 [[TMP7]], i32* [[CARRY]], align 4`
			`// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 0`
			`// CHECK-NEXT: ret <4 x i32> [[TMP8]]`
			`//`
			`int32x4_t test_vsbcq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry, mve_pred16_t p) {`
			`#ifdef POLYMORPHIC`
			`return vsbcq_m(inactive, a, b, carry, p);`
			`#else /* POLYMORPHIC */`
			`return vsbcq_m_s32(inactive, a, b, carry, p);`
			`#endif /* POLYMORPHIC */`
			`}`

			`// CHECK-LABEL: @test_vsbcq_m_u32(`
			`// CHECK-NEXT: entry:`
			`// CHECK-NEXT: [[TMP0:%.]] = load i32, i32 [[CARRY:%.*]], align 4`
			`// CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29`
			`// CHECK-NEXT: [[TMP2:%.]] = zext i16 [[P:%.]] to i32`
			`// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])`
			`// CHECK-NEXT: [[TMP4:%.]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.]], <4 x i32> [[A:%.]], <4 x i32> [[B:%.]], i32 [[TMP1]], <4 x i1> [[TMP3]])`
			`// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 1`
			`// CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP5]], 29`
			`// CHECK-NEXT: [[TMP7:%.*]] = and i32 1, [[TMP6]]`
			`// CHECK-NEXT: store i32 [[TMP7]], i32* [[CARRY]], align 4`
			`// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 0`
			`// CHECK-NEXT: ret <4 x i32> [[TMP8]]`
			`//`
			`uint32x4_t test_vsbcq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry, mve_pred16_t p) {`
			`#ifdef POLYMORPHIC`
			`return vsbcq_m(inactive, a, b, carry, p);`
			`#else /* POLYMORPHIC */`
			`return vsbcq_m_u32(inactive, a, b, carry, p);`
			`#endif /* POLYMORPHIC */`
			`}`