2019-11-15 19:30:15 +08:00
|
|
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
|
|
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
|
|
|
|
; CHECK-LABEL: test_vorrq_u8:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vorr q0, q1, q0
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%0 = or <16 x i8> %b, %a
|
|
|
|
ret <16 x i8> %0
|
|
|
|
}
|
|
|
|
|
[ARM][MVE][Intrinsics] Add *_x() variants of my *_m() intrinsics.
Summary:
Better use of multiclass is used, and this helped find some existing
bugs in the predicated VMULL* intrinsics, which are now fixed.
The refactored VMULL[TB]Q_(INT|POLY)_M() intrinsics were discovered
to have an argument ("inactive") with incorrect type, and this required
a fix that is included in this whole patch. The argument "inactive"
should have been the same width (per vector element) as the return
type of the intrinsic, but was not in the case where the return type
was double the element width of the input types.
To assist in testing the multiclassing , and to thwart further gremlins,
the unit tests are improved in scope.
The *.ll tests are all generated by a small bit of throw-away scripting
from the corresponding *.c tests, and as such the diffs are large and
nasty. Look at the file rather than the diff.
Reviewers: dmgreen, miyuki, ostannard, simon_tatham
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71421
2019-12-12 01:53:12 +08:00
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
|
|
|
|
; CHECK-LABEL: test_vorrq_s16:
|
2019-11-15 19:30:15 +08:00
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vorr q0, q1, q0
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
[ARM][MVE][Intrinsics] Add *_x() variants of my *_m() intrinsics.
Summary:
Better use of multiclass is used, and this helped find some existing
bugs in the predicated VMULL* intrinsics, which are now fixed.
The refactored VMULL[TB]Q_(INT|POLY)_M() intrinsics were discovered
to have an argument ("inactive") with incorrect type, and this required
a fix that is included in this whole patch. The argument "inactive"
should have been the same width (per vector element) as the return
type of the intrinsic, but was not in the case where the return type
was double the element width of the input types.
To assist in testing the multiclassing , and to thwart further gremlins,
the unit tests are improved in scope.
The *.ll tests are all generated by a small bit of throw-away scripting
from the corresponding *.c tests, and as such the diffs are large and
nasty. Look at the file rather than the diff.
Reviewers: dmgreen, miyuki, ostannard, simon_tatham
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71421
2019-12-12 01:53:12 +08:00
|
|
|
%0 = or <8 x i16> %b, %a
|
|
|
|
ret <8 x i16> %0
|
2019-11-15 19:30:15 +08:00
|
|
|
}
|
|
|
|
|
[ARM][MVE][Intrinsics] Add *_x() variants of my *_m() intrinsics.
Summary:
Better use of multiclass is used, and this helped find some existing
bugs in the predicated VMULL* intrinsics, which are now fixed.
The refactored VMULL[TB]Q_(INT|POLY)_M() intrinsics were discovered
to have an argument ("inactive") with incorrect type, and this required
a fix that is included in this whole patch. The argument "inactive"
should have been the same width (per vector element) as the return
type of the intrinsic, but was not in the case where the return type
was double the element width of the input types.
To assist in testing the multiclassing , and to thwart further gremlins,
the unit tests are improved in scope.
The *.ll tests are all generated by a small bit of throw-away scripting
from the corresponding *.c tests, and as such the diffs are large and
nasty. Look at the file rather than the diff.
Reviewers: dmgreen, miyuki, ostannard, simon_tatham
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71421
2019-12-12 01:53:12 +08:00
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
|
|
|
|
; CHECK-LABEL: test_vorrq_u32:
|
2019-11-15 19:30:15 +08:00
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vorr q0, q1, q0
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
[ARM][MVE][Intrinsics] Add *_x() variants of my *_m() intrinsics.
Summary:
Better use of multiclass is used, and this helped find some existing
bugs in the predicated VMULL* intrinsics, which are now fixed.
The refactored VMULL[TB]Q_(INT|POLY)_M() intrinsics were discovered
to have an argument ("inactive") with incorrect type, and this required
a fix that is included in this whole patch. The argument "inactive"
should have been the same width (per vector element) as the return
type of the intrinsic, but was not in the case where the return type
was double the element width of the input types.
To assist in testing the multiclassing , and to thwart further gremlins,
the unit tests are improved in scope.
The *.ll tests are all generated by a small bit of throw-away scripting
from the corresponding *.c tests, and as such the diffs are large and
nasty. Look at the file rather than the diff.
Reviewers: dmgreen, miyuki, ostannard, simon_tatham
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71421
2019-12-12 01:53:12 +08:00
|
|
|
%0 = or <4 x i32> %b, %a
|
|
|
|
ret <4 x i32> %0
|
2019-11-15 19:30:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vorrq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 {
|
|
|
|
; CHECK-LABEL: test_vorrq_f32:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vorr q0, q1, q0
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%0 = bitcast <4 x float> %a to <4 x i32>
|
|
|
|
%1 = bitcast <4 x float> %b to <4 x i32>
|
|
|
|
%2 = or <4 x i32> %1, %0
|
|
|
|
%3 = bitcast <4 x i32> %2 to <4 x float>
|
|
|
|
ret <4 x float> %3
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @test_vorrq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
|
|
|
|
; CHECK-LABEL: test_vorrq_m_s8:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
|
|
; CHECK-NEXT: vpst
|
|
|
|
; CHECK-NEXT: vorrt q0, q1, q2
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%0 = zext i16 %p to i32
|
|
|
|
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
|
|
|
%2 = tail call <16 x i8> @llvm.arm.mve.orr.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
|
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2
|
|
|
|
|
|
|
|
declare <16 x i8> @llvm.arm.mve.orr.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vorrq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
|
|
|
|
; CHECK-LABEL: test_vorrq_m_u16:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
|
|
; CHECK-NEXT: vpst
|
|
|
|
; CHECK-NEXT: vorrt q0, q1, q2
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%0 = zext i16 %p to i32
|
|
|
|
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
|
|
%2 = tail call <8 x i16> @llvm.arm.mve.orr.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2
|
|
|
|
|
|
|
|
declare <8 x i16> @llvm.arm.mve.orr.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2
|
|
|
|
|
[ARM][MVE][Intrinsics] Add *_x() variants of my *_m() intrinsics.
Summary:
Better use of multiclass is used, and this helped find some existing
bugs in the predicated VMULL* intrinsics, which are now fixed.
The refactored VMULL[TB]Q_(INT|POLY)_M() intrinsics were discovered
to have an argument ("inactive") with incorrect type, and this required
a fix that is included in this whole patch. The argument "inactive"
should have been the same width (per vector element) as the return
type of the intrinsic, but was not in the case where the return type
was double the element width of the input types.
To assist in testing the multiclassing , and to thwart further gremlins,
the unit tests are improved in scope.
The *.ll tests are all generated by a small bit of throw-away scripting
from the corresponding *.c tests, and as such the diffs are large and
nasty. Look at the file rather than the diff.
Reviewers: dmgreen, miyuki, ostannard, simon_tatham
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71421
2019-12-12 01:53:12 +08:00
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
|
|
|
|
; CHECK-LABEL: test_vorrq_m_s32:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
|
|
; CHECK-NEXT: vpst
|
|
|
|
; CHECK-NEXT: vorrt q0, q1, q2
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%0 = zext i16 %p to i32
|
|
|
|
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
|
|
%2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2
|
|
|
|
|
|
|
|
declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @test_vorrq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) local_unnamed_addr #1 {
|
|
|
|
; CHECK-LABEL: test_vorrq_m_f16:
|
2019-11-15 19:30:15 +08:00
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
|
|
; CHECK-NEXT: vpst
|
|
|
|
; CHECK-NEXT: vorrt q0, q1, q2
|
|
|
|
; CHECK-NEXT: bx lr
|
[ARM][MVE][Intrinsics] Add *_x() variants of my *_m() intrinsics.
Summary:
Better use of multiclass is used, and this helped find some existing
bugs in the predicated VMULL* intrinsics, which are now fixed.
The refactored VMULL[TB]Q_(INT|POLY)_M() intrinsics were discovered
to have an argument ("inactive") with incorrect type, and this required
a fix that is included in this whole patch. The argument "inactive"
should have been the same width (per vector element) as the return
type of the intrinsic, but was not in the case where the return type
was double the element width of the input types.
To assist in testing the multiclassing , and to thwart further gremlins,
the unit tests are improved in scope.
The *.ll tests are all generated by a small bit of throw-away scripting
from the corresponding *.c tests, and as such the diffs are large and
nasty. Look at the file rather than the diff.
Reviewers: dmgreen, miyuki, ostannard, simon_tatham
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71421
2019-12-12 01:53:12 +08:00
|
|
|
entry:
|
|
|
|
%0 = bitcast <8 x half> %a to <8 x i16>
|
|
|
|
%1 = bitcast <8 x half> %b to <8 x i16>
|
|
|
|
%2 = zext i16 %p to i32
|
|
|
|
%3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
|
|
|
|
%4 = bitcast <8 x half> %inactive to <8 x i16>
|
|
|
|
%5 = tail call <8 x i16> @llvm.arm.mve.orr.predicated.v8i16.v8i1(<8 x i16> %0, <8 x i16> %1, <8 x i1> %3, <8 x i16> %4)
|
|
|
|
%6 = bitcast <8 x i16> %5 to <8 x half>
|
|
|
|
ret <8 x half> %6
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @test_vorrq_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
|
|
|
|
; CHECK-LABEL: test_vorrq_x_u8:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
|
|
; CHECK-NEXT: vpst
|
|
|
|
; CHECK-NEXT: vorrt q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%0 = zext i16 %p to i32
|
|
|
|
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
|
|
|
%2 = tail call <16 x i8> @llvm.arm.mve.orr.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> undef)
|
|
|
|
ret <16 x i8> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vorrq_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
|
|
|
|
; CHECK-LABEL: test_vorrq_x_s16:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
|
|
; CHECK-NEXT: vpst
|
|
|
|
; CHECK-NEXT: vorrt q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%0 = zext i16 %p to i32
|
|
|
|
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
|
|
%2 = tail call <8 x i16> @llvm.arm.mve.orr.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> undef)
|
|
|
|
ret <8 x i16> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
|
|
|
|
; CHECK-LABEL: test_vorrq_x_u32:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
|
|
; CHECK-NEXT: vpst
|
|
|
|
; CHECK-NEXT: vorrt q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
|
|
|
entry:
|
|
|
|
%0 = zext i16 %p to i32
|
|
|
|
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
|
|
%2 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> undef)
|
|
|
|
ret <4 x i32> %2
|
|
|
|
}
|
|
|
|
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @test_vorrq_m_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 {
|
|
|
|
; CHECK-LABEL: test_vorrq_m_f32:
|
|
|
|
; CHECK: @ %bb.0: @ %entry
|
|
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
|
|
; CHECK-NEXT: vpst
|
|
|
|
; CHECK-NEXT: vorrt q0, q0, q1
|
|
|
|
; CHECK-NEXT: bx lr
|
2019-11-15 19:30:15 +08:00
|
|
|
entry:
|
|
|
|
%0 = bitcast <4 x float> %a to <4 x i32>
|
|
|
|
%1 = bitcast <4 x float> %b to <4 x i32>
|
|
|
|
%2 = zext i16 %p to i32
|
|
|
|
%3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
|
[ARM][MVE][Intrinsics] Add *_x() variants of my *_m() intrinsics.
Summary:
Better use of multiclass is used, and this helped find some existing
bugs in the predicated VMULL* intrinsics, which are now fixed.
The refactored VMULL[TB]Q_(INT|POLY)_M() intrinsics were discovered
to have an argument ("inactive") with incorrect type, and this required
a fix that is included in this whole patch. The argument "inactive"
should have been the same width (per vector element) as the return
type of the intrinsic, but was not in the case where the return type
was double the element width of the input types.
To assist in testing the multiclassing , and to thwart further gremlins,
the unit tests are improved in scope.
The *.ll tests are all generated by a small bit of throw-away scripting
from the corresponding *.c tests, and as such the diffs are large and
nasty. Look at the file rather than the diff.
Reviewers: dmgreen, miyuki, ostannard, simon_tatham
Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71421
2019-12-12 01:53:12 +08:00
|
|
|
%4 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %0, <4 x i32> %1, <4 x i1> %3, <4 x i32> undef)
|
|
|
|
%5 = bitcast <4 x i32> %4 to <4 x float>
|
|
|
|
ret <4 x float> %5
|
2019-11-15 19:30:15 +08:00
|
|
|
}
|
|
|
|
|