2019-11-29 00:38:01 +08:00
|
|
|
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
2020-09-06 20:19:55 +08:00
|
|
|
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
|
|
|
|
// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
|
2019-11-29 00:38:01 +08:00
|
|
|
|
|
|
|
#include <arm_mve.h>
|
|
|
|
|
|
|
|
// CHECK-LABEL: @test_vmaxq_s8(
|
|
|
|
// CHECK-NEXT: entry:
|
2020-09-06 20:19:55 +08:00
|
|
|
// CHECK-NEXT: [[TMP0:%.*]] = icmp sge <16 x i8> [[A:%.*]], [[B:%.*]]
|
|
|
|
// CHECK-NEXT: [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[A]], <16 x i8> [[B]]
|
2019-11-29 00:38:01 +08:00
|
|
|
// CHECK-NEXT: ret <16 x i8> [[TMP1]]
|
|
|
|
//
|
|
|
|
int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b)
|
|
|
|
{
|
|
|
|
#ifdef POLYMORPHIC
|
|
|
|
return vmaxq(a, b);
|
|
|
|
#else /* POLYMORPHIC */
|
|
|
|
return vmaxq_s8(a, b);
|
|
|
|
#endif /* POLYMORPHIC */
|
|
|
|
}
|
|
|
|
|
|
|
|
// CHECK-LABEL: @test_vmaxq_u16(
|
|
|
|
// CHECK-NEXT: entry:
|
2020-09-06 20:19:55 +08:00
|
|
|
// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i16> [[A:%.*]], [[B:%.*]]
|
|
|
|
// CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[A]], <8 x i16> [[B]]
|
2019-11-29 00:38:01 +08:00
|
|
|
// CHECK-NEXT: ret <8 x i16> [[TMP1]]
|
|
|
|
//
|
|
|
|
uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b)
|
|
|
|
{
|
|
|
|
#ifdef POLYMORPHIC
|
|
|
|
return vmaxq(a, b);
|
|
|
|
#else /* POLYMORPHIC */
|
|
|
|
return vmaxq_u16(a, b);
|
|
|
|
#endif /* POLYMORPHIC */
|
|
|
|
}
|
|
|
|
|
|
|
|
// CHECK-LABEL: @test_vmaxq_s32(
|
|
|
|
// CHECK-NEXT: entry:
|
2020-09-06 20:19:55 +08:00
|
|
|
// CHECK-NEXT: [[TMP0:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[B:%.*]]
|
|
|
|
// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[B]]
|
2019-11-29 00:38:01 +08:00
|
|
|
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
|
|
|
|
//
|
|
|
|
int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b)
|
|
|
|
{
|
|
|
|
#ifdef POLYMORPHIC
|
|
|
|
return vmaxq(a, b);
|
|
|
|
#else /* POLYMORPHIC */
|
|
|
|
return vmaxq_s32(a, b);
|
|
|
|
#endif /* POLYMORPHIC */
|
|
|
|
}
|
|
|
|
|
|
|
|
// CHECK-LABEL: @test_vmaxq_m_u8(
|
|
|
|
// CHECK-NEXT: entry:
|
|
|
|
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
2020-09-06 20:19:55 +08:00
|
|
|
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
|
|
|
|
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
|
2019-11-29 00:38:01 +08:00
|
|
|
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
|
|
|
|
//
|
|
|
|
uint8x16_t test_vmaxq_m_u8(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p)
|
|
|
|
{
|
|
|
|
#ifdef POLYMORPHIC
|
|
|
|
return vmaxq_m(inactive, a, b, p);
|
|
|
|
#else /* POLYMORPHIC */
|
|
|
|
return vmaxq_m_u8(inactive, a, b, p);
|
|
|
|
#endif /* POLYMORPHIC */
|
|
|
|
}
|
|
|
|
|
|
|
|
// CHECK-LABEL: @test_vmaxq_m_s16(
|
|
|
|
// CHECK-NEXT: entry:
|
|
|
|
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
2020-09-06 20:19:55 +08:00
|
|
|
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
|
|
|
|
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
|
2019-11-29 00:38:01 +08:00
|
|
|
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
|
|
|
|
//
|
|
|
|
int16x8_t test_vmaxq_m_s16(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p)
|
|
|
|
{
|
|
|
|
#ifdef POLYMORPHIC
|
|
|
|
return vmaxq_m(inactive, a, b, p);
|
|
|
|
#else /* POLYMORPHIC */
|
|
|
|
return vmaxq_m_s16(inactive, a, b, p);
|
|
|
|
#endif /* POLYMORPHIC */
|
|
|
|
}
|
|
|
|
|
|
|
|
// CHECK-LABEL: @test_vmaxq_m_u32(
|
|
|
|
// CHECK-NEXT: entry:
|
|
|
|
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
2020-09-06 20:19:55 +08:00
|
|
|
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
|
|
|
|
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
|
2019-11-29 00:38:01 +08:00
|
|
|
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
|
|
|
//
|
|
|
|
uint32x4_t test_vmaxq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p)
|
|
|
|
{
|
|
|
|
#ifdef POLYMORPHIC
|
|
|
|
return vmaxq_m(inactive, a, b, p);
|
|
|
|
#else /* POLYMORPHIC */
|
|
|
|
return vmaxq_m_u32(inactive, a, b, p);
|
|
|
|
#endif /* POLYMORPHIC */
|
|
|
|
}
|
[ARM][MVE][Intrinsics] Add *_x() variants of my *_m() intrinsics.
Summary:
Better use of multiclass is used, and this helped find some existing
bugs in the predicated VMULL* intrinsics, which are now fixed.
The refactored VMULL[TB]Q_(INT|POLY)_M() intrinsics were discovered
to have an argument ("inactive") with incorrect type, and this required
a fix that is included in this whole patch. The argument "inactive"
should have been the same width (per vector element) as the return
type of the intrinsic, but was not in the case where the return type
was double the element width of the input types.
To assist in testing the multiclassing , and to thwart further gremlins,
the unit tests are improved in scope.
The *.ll tests are all generated by a small bit of throw-away scripting
from the corresponding *.c tests, and as such the diffs are large and
nasty. Look at the file rather than the diff.
Reviewers: dmgreen, miyuki, ostannard, simon_tatham
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71421
2019-12-12 01:53:12 +08:00
|
|
|
|
|
|
|
// CHECK-LABEL: @test_vmaxq_x_u8(
|
|
|
|
// CHECK-NEXT: entry:
|
|
|
|
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
2020-09-06 20:19:55 +08:00
|
|
|
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
|
|
|
|
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef)
|
[ARM][MVE][Intrinsics] Add *_x() variants of my *_m() intrinsics.
Summary:
Better use of multiclass is used, and this helped find some existing
bugs in the predicated VMULL* intrinsics, which are now fixed.
The refactored VMULL[TB]Q_(INT|POLY)_M() intrinsics were discovered
to have an argument ("inactive") with incorrect type, and this required
a fix that is included in this whole patch. The argument "inactive"
should have been the same width (per vector element) as the return
type of the intrinsic, but was not in the case where the return type
was double the element width of the input types.
To assist in testing the multiclassing , and to thwart further gremlins,
the unit tests are improved in scope.
The *.ll tests are all generated by a small bit of throw-away scripting
from the corresponding *.c tests, and as such the diffs are large and
nasty. Look at the file rather than the diff.
Reviewers: dmgreen, miyuki, ostannard, simon_tatham
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71421
2019-12-12 01:53:12 +08:00
|
|
|
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
|
|
|
|
//
|
[ARM,MVE] Fix many signedness errors in MVE intrinsics.
Summary:
Running an end-to-end test last week I noticed that a lot of the ACLE
intrinsics that operate differently on vectors of signed and unsigned
integers were ending up generating the signed version of the
instruction unconditionally. This is because the IR intrinsics had no
way to distinguish signed from unsigned: the LLVM type system just
calls them both `v8i16` (or whatever), so you need either separate
intrinsics for signed and unsigned, or a flag parameter that tells
ISel which one to choose.
This patch fixes all the problems of that kind that I've noticed, by
adding an i32 flag parameter to many of the IR intrinsics which is set
to 1 for unsigned (matching the existing practice in cases where we
got it right), and conditioning all the isel patterns on that flag. So
the fundamental change is in `IntrinsicsARM.td`, changing the
low-level IR intrinsics API; there are knock-on changes in
`arm_mve.td` (adjusting code gen for the ACLE intrinsics to use the
modified API) and in `ARMInstrMVE.td` (adjusting isel to expect the
new unsigned flags). The rest of this patch is boringly updating tests.
Reviewers: dmgreen, miyuki, MarkMurrayARM
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D72270
2020-01-07 00:33:14 +08:00
|
|
|
uint8x16_t test_vmaxq_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p)
|
[ARM][MVE][Intrinsics] Add *_x() variants of my *_m() intrinsics.
Summary:
Better use of multiclass is used, and this helped find some existing
bugs in the predicated VMULL* intrinsics, which are now fixed.
The refactored VMULL[TB]Q_(INT|POLY)_M() intrinsics were discovered
to have an argument ("inactive") with incorrect type, and this required
a fix that is included in this whole patch. The argument "inactive"
should have been the same width (per vector element) as the return
type of the intrinsic, but was not in the case where the return type
was double the element width of the input types.
To assist in testing the multiclassing , and to thwart further gremlins,
the unit tests are improved in scope.
The *.ll tests are all generated by a small bit of throw-away scripting
from the corresponding *.c tests, and as such the diffs are large and
nasty. Look at the file rather than the diff.
Reviewers: dmgreen, miyuki, ostannard, simon_tatham
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71421
2019-12-12 01:53:12 +08:00
|
|
|
{
|
|
|
|
#ifdef POLYMORPHIC
|
|
|
|
return vmaxq_x(a, b, p);
|
|
|
|
#else /* POLYMORPHIC */
|
[ARM,MVE] Fix many signedness errors in MVE intrinsics.
Summary:
Running an end-to-end test last week I noticed that a lot of the ACLE
intrinsics that operate differently on vectors of signed and unsigned
integers were ending up generating the signed version of the
instruction unconditionally. This is because the IR intrinsics had no
way to distinguish signed from unsigned: the LLVM type system just
calls them both `v8i16` (or whatever), so you need either separate
intrinsics for signed and unsigned, or a flag parameter that tells
ISel which one to choose.
This patch fixes all the problems of that kind that I've noticed, by
adding an i32 flag parameter to many of the IR intrinsics which is set
to 1 for unsigned (matching the existing practice in cases where we
got it right), and conditioning all the isel patterns on that flag. So
the fundamental change is in `IntrinsicsARM.td`, changing the
low-level IR intrinsics API; there are knock-on changes in
`arm_mve.td` (adjusting code gen for the ACLE intrinsics to use the
modified API) and in `ARMInstrMVE.td` (adjusting isel to expect the
new unsigned flags). The rest of this patch is boringly updating tests.
Reviewers: dmgreen, miyuki, MarkMurrayARM
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D72270
2020-01-07 00:33:14 +08:00
|
|
|
return vmaxq_x_u8(a, b, p);
|
[ARM][MVE][Intrinsics] Add *_x() variants of my *_m() intrinsics.
Summary:
Better use of multiclass is used, and this helped find some existing
bugs in the predicated VMULL* intrinsics, which are now fixed.
The refactored VMULL[TB]Q_(INT|POLY)_M() intrinsics were discovered
to have an argument ("inactive") with incorrect type, and this required
a fix that is included in this whole patch. The argument "inactive"
should have been the same width (per vector element) as the return
type of the intrinsic, but was not in the case where the return type
was double the element width of the input types.
To assist in testing the multiclassing , and to thwart further gremlins,
the unit tests are improved in scope.
The *.ll tests are all generated by a small bit of throw-away scripting
from the corresponding *.c tests, and as such the diffs are large and
nasty. Look at the file rather than the diff.
Reviewers: dmgreen, miyuki, ostannard, simon_tatham
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71421
2019-12-12 01:53:12 +08:00
|
|
|
#endif /* POLYMORPHIC */
|
|
|
|
}
|
|
|
|
|
|
|
|
// CHECK-LABEL: @test_vmaxq_x_u16(
|
|
|
|
// CHECK-NEXT: entry:
|
|
|
|
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
2020-09-06 20:19:55 +08:00
|
|
|
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
|
|
|
|
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
|
[ARM][MVE][Intrinsics] Add *_x() variants of my *_m() intrinsics.
Summary:
Better use of multiclass is used, and this helped find some existing
bugs in the predicated VMULL* intrinsics, which are now fixed.
The refactored VMULL[TB]Q_(INT|POLY)_M() intrinsics were discovered
to have an argument ("inactive") with incorrect type, and this required
a fix that is included in this whole patch. The argument "inactive"
should have been the same width (per vector element) as the return
type of the intrinsic, but was not in the case where the return type
was double the element width of the input types.
To assist in testing the multiclassing , and to thwart further gremlins,
the unit tests are improved in scope.
The *.ll tests are all generated by a small bit of throw-away scripting
from the corresponding *.c tests, and as such the diffs are large and
nasty. Look at the file rather than the diff.
Reviewers: dmgreen, miyuki, ostannard, simon_tatham
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71421
2019-12-12 01:53:12 +08:00
|
|
|
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
|
|
|
|
//
|
|
|
|
uint16x8_t test_vmaxq_x_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p)
|
|
|
|
{
|
|
|
|
#ifdef POLYMORPHIC
|
|
|
|
return vmaxq_x(a, b, p);
|
|
|
|
#else /* POLYMORPHIC */
|
|
|
|
return vmaxq_x_u16(a, b, p);
|
|
|
|
#endif /* POLYMORPHIC */
|
|
|
|
}
|
|
|
|
|
|
|
|
// CHECK-LABEL: @test_vmaxq_x_s32(
|
|
|
|
// CHECK-NEXT: entry:
|
|
|
|
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
2020-09-06 20:19:55 +08:00
|
|
|
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
|
|
|
|
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
|
[ARM][MVE][Intrinsics] Add *_x() variants of my *_m() intrinsics.
Summary:
Better use of multiclass is used, and this helped find some existing
bugs in the predicated VMULL* intrinsics, which are now fixed.
The refactored VMULL[TB]Q_(INT|POLY)_M() intrinsics were discovered
to have an argument ("inactive") with incorrect type, and this required
a fix that is included in this whole patch. The argument "inactive"
should have been the same width (per vector element) as the return
type of the intrinsic, but was not in the case where the return type
was double the element width of the input types.
To assist in testing the multiclassing , and to thwart further gremlins,
the unit tests are improved in scope.
The *.ll tests are all generated by a small bit of throw-away scripting
from the corresponding *.c tests, and as such the diffs are large and
nasty. Look at the file rather than the diff.
Reviewers: dmgreen, miyuki, ostannard, simon_tatham
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71421
2019-12-12 01:53:12 +08:00
|
|
|
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
|
|
|
//
|
|
|
|
int32x4_t test_vmaxq_x_s32(int32x4_t a, int32x4_t b, mve_pred16_t p)
|
|
|
|
{
|
|
|
|
#ifdef POLYMORPHIC
|
|
|
|
return vmaxq_x(a, b, p);
|
|
|
|
#else /* POLYMORPHIC */
|
[ARM,MVE] Fix many signedness errors in MVE intrinsics.
Summary:
Running an end-to-end test last week I noticed that a lot of the ACLE
intrinsics that operate differently on vectors of signed and unsigned
integers were ending up generating the signed version of the
instruction unconditionally. This is because the IR intrinsics had no
way to distinguish signed from unsigned: the LLVM type system just
calls them both `v8i16` (or whatever), so you need either separate
intrinsics for signed and unsigned, or a flag parameter that tells
ISel which one to choose.
This patch fixes all the problems of that kind that I've noticed, by
adding an i32 flag parameter to many of the IR intrinsics which is set
to 1 for unsigned (matching the existing practice in cases where we
got it right), and conditioning all the isel patterns on that flag. So
the fundamental change is in `IntrinsicsARM.td`, changing the
low-level IR intrinsics API; there are knock-on changes in
`arm_mve.td` (adjusting code gen for the ACLE intrinsics to use the
modified API) and in `ARMInstrMVE.td` (adjusting isel to expect the
new unsigned flags). The rest of this patch is boringly updating tests.
Reviewers: dmgreen, miyuki, MarkMurrayARM
Reviewed By: dmgreen
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D72270
2020-01-07 00:33:14 +08:00
|
|
|
return vmaxq_x_s32(a, b, p);
|
[ARM][MVE][Intrinsics] Add *_x() variants of my *_m() intrinsics.
Summary:
Better use of multiclass is used, and this helped find some existing
bugs in the predicated VMULL* intrinsics, which are now fixed.
The refactored VMULL[TB]Q_(INT|POLY)_M() intrinsics were discovered
to have an argument ("inactive") with incorrect type, and this required
a fix that is included in this whole patch. The argument "inactive"
should have been the same width (per vector element) as the return
type of the intrinsic, but was not in the case where the return type
was double the element width of the input types.
To assist in testing the multiclassing , and to thwart further gremlins,
the unit tests are improved in scope.
The *.ll tests are all generated by a small bit of throw-away scripting
from the corresponding *.c tests, and as such the diffs are large and
nasty. Look at the file rather than the diff.
Reviewers: dmgreen, miyuki, ostannard, simon_tatham
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71421
2019-12-12 01:53:12 +08:00
|
|
|
#endif /* POLYMORPHIC */
|
|
|
|
}
|