forked from OSchip/llvm-project
[ARM] FP16: support vector zip and unzip
This is addressing PR38404. Differential Revision: https://reviews.llvm.org/D50186 llvm-svn: 338835
This commit is contained in:
parent
2c4dcf0576
commit
d62c5ec2fe
|
@ -3030,11 +3030,13 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
|
|||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default: return;
|
||||
case MVT::v8i8: Opc = ARM::VZIPd8; break;
|
||||
case MVT::v4f16:
|
||||
case MVT::v4i16: Opc = ARM::VZIPd16; break;
|
||||
case MVT::v2f32:
|
||||
// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
|
||||
case MVT::v2i32: Opc = ARM::VTRNd32; break;
|
||||
case MVT::v16i8: Opc = ARM::VZIPq8; break;
|
||||
case MVT::v8f16:
|
||||
case MVT::v8i16: Opc = ARM::VZIPq16; break;
|
||||
case MVT::v4f32:
|
||||
case MVT::v4i32: Opc = ARM::VZIPq32; break;
|
||||
|
@ -3051,11 +3053,13 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
|
|||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default: return;
|
||||
case MVT::v8i8: Opc = ARM::VUZPd8; break;
|
||||
case MVT::v4f16:
|
||||
case MVT::v4i16: Opc = ARM::VUZPd16; break;
|
||||
case MVT::v2f32:
|
||||
// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
|
||||
case MVT::v2i32: Opc = ARM::VTRNd32; break;
|
||||
case MVT::v16i8: Opc = ARM::VUZPq8; break;
|
||||
case MVT::v8f16:
|
||||
case MVT::v8i16: Opc = ARM::VUZPq16; break;
|
||||
case MVT::v4f32:
|
||||
case MVT::v4i32: Opc = ARM::VUZPq32; break;
|
||||
|
|
|
@ -1015,44 +1015,56 @@ entry:
|
|||
ret <8 x half> %3
|
||||
}
|
||||
|
||||
define dso_local %struct.float16x4x2_t @test_vzip_f16(<4 x half> %a, <4 x half> %b) {
|
||||
; CHECK-LABEL: test_vzip_f16:
|
||||
; CHECK: vzip.16 d0, d1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%vzip.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
||||
%vzip1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
|
||||
%.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vzip.i, 0, 0
|
||||
%.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vzip1.i, 0, 1
|
||||
ret %struct.float16x4x2_t %.fca.0.1.insert
|
||||
}
|
||||
|
||||
define dso_local %struct.float16x8x2_t @test_vzipq_f16(<8 x half> %a, <8 x half> %b) {
|
||||
; CHECK-LABEL: test_vzipq_f16:
|
||||
; CHECK: vzip.16 q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%vzip.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
|
||||
%vzip1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
|
||||
%.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vzip.i, 0, 0
|
||||
%.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vzip1.i, 0, 1
|
||||
ret %struct.float16x8x2_t %.fca.0.1.insert
|
||||
}
|
||||
|
||||
define dso_local %struct.float16x4x2_t @test_vuzp_f16(<4 x half> %a, <4 x half> %b) {
|
||||
; CHECK-LABEL: test_vuzp_f16:
|
||||
; CHECK: vuzp.16 d0, d1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%vuzp.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%vuzp1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
||||
%.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vuzp.i, 0, 0
|
||||
%.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vuzp1.i, 0, 1
|
||||
ret %struct.float16x4x2_t %.fca.0.1.insert
|
||||
}
|
||||
|
||||
define dso_local %struct.float16x8x2_t @test_vuzpq_f16(<8 x half> %a, <8 x half> %b) {
|
||||
; CHECK-LABEL: test_vuzpq_f16:
|
||||
; CHECK: vuzp.16 q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%vuzp.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
|
||||
%vuzp1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
|
||||
%.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vuzp.i, 0, 0
|
||||
%.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vuzp1.i, 0, 1
|
||||
ret %struct.float16x8x2_t %.fca.0.1.insert
|
||||
}
|
||||
|
||||
; FIXME (PR38404)
|
||||
;
|
||||
;define dso_local %struct.float16x4x2_t @test_vzip_f16(<4 x half> %a, <4 x half> %b) {
|
||||
;entry:
|
||||
; %vzip.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
||||
; %vzip1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
|
||||
; %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vzip.i, 0, 0
|
||||
; %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vzip1.i, 0, 1
|
||||
; ret %struct.float16x4x2_t %.fca.0.1.insert
|
||||
;}
|
||||
;
|
||||
;define dso_local %struct.float16x8x2_t @test_vzipq_f16(<8 x half> %a, <8 x half> %b) {
|
||||
;entry:
|
||||
; %vzip.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
|
||||
; %vzip1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
|
||||
; %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vzip.i, 0, 0
|
||||
; %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vzip1.i, 0, 1
|
||||
; ret %struct.float16x8x2_t %.fca.0.1.insert
|
||||
;}
|
||||
;
|
||||
;define dso_local %struct.float16x4x2_t @test_vuzp_f16(<4 x half> %a, <4 x half> %b) {
|
||||
;entry:
|
||||
; %vuzp.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
; %vuzp1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
||||
; %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vuzp.i, 0, 0
|
||||
; %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vuzp1.i, 0, 1
|
||||
; ret %struct.float16x4x2_t %.fca.0.1.insert
|
||||
;}
|
||||
;
|
||||
;define dso_local %struct.float16x8x2_t @test_vuzpq_f16(<8 x half> %a, <8 x half> %b) {
|
||||
;entry:
|
||||
; %vuzp.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
|
||||
; %vuzp1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
|
||||
; %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vuzp.i, 0, 0
|
||||
; %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vuzp1.i, 0, 1
|
||||
; ret %struct.float16x8x2_t %.fca.0.1.insert
|
||||
;}
|
||||
;
|
||||
;define dso_local %struct.float16x4x2_t @test_vtrn_f16(<4 x half> %a, <4 x half> %b) {
|
||||
;entry:
|
||||
; %vtrn.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
|
||||
|
|
Loading…
Reference in New Issue