forked from OSchip/llvm-project
[ARM] FP16: vector VMUL variants
This adds codegen support for the vmul_lane_f16 and vmul_n_f16 variants. Differential Revision: https://reviews.llvm.org/D50326 llvm-svn: 339232
This commit is contained in:
parent
5477f11ba3
commit
920a453485
|
@ -4305,17 +4305,29 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
|
||||||
(v2f32 (EXTRACT_SUBREG QPR:$src2,
|
(v2f32 (EXTRACT_SUBREG QPR:$src2,
|
||||||
(DSubReg_i32_reg imm:$lane))),
|
(DSubReg_i32_reg imm:$lane))),
|
||||||
(SubReg_i32_lane imm:$lane)))>;
|
(SubReg_i32_lane imm:$lane)))>;
|
||||||
|
def : Pat<(v8f16 (fmul (v8f16 QPR:$src1),
|
||||||
|
(v8f16 (NEONvduplane (v8f16 QPR:$src2), imm:$lane)))),
|
||||||
|
(v8f16 (VMULslhq(v8f16 QPR:$src1),
|
||||||
|
(v4f16 (EXTRACT_SUBREG QPR:$src2,
|
||||||
|
(DSubReg_i16_reg imm:$lane))),
|
||||||
|
(SubReg_i16_lane imm:$lane)))>;
|
||||||
|
|
||||||
def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
|
def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
|
||||||
(VMULslfd DPR:$Rn,
|
(VMULslfd DPR:$Rn,
|
||||||
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
|
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
|
||||||
(i32 0))>;
|
(i32 0))>;
|
||||||
|
def : Pat<(v4f16 (fmul DPR:$Rn, (NEONvdup (f16 HPR:$Rm)))),
|
||||||
|
(VMULslhd DPR:$Rn,
|
||||||
|
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
|
||||||
|
(i32 0))>;
|
||||||
def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
|
def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
|
||||||
(VMULslfq QPR:$Rn,
|
(VMULslfq QPR:$Rn,
|
||||||
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
|
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
|
||||||
(i32 0))>;
|
(i32 0))>;
|
||||||
|
def : Pat<(v8f16 (fmul QPR:$Rn, (NEONvdup (f16 HPR:$Rm)))),
|
||||||
|
(VMULslhq QPR:$Rn,
|
||||||
|
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$Rm, ssub_0),
|
||||||
|
(i32 0))>;
|
||||||
|
|
||||||
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
|
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
|
||||||
defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
|
defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
|
||||||
|
|
|
@ -979,43 +979,53 @@ entry:
|
||||||
ret <8 x half> %0
|
ret <8 x half> %0
|
||||||
}
|
}
|
||||||
|
|
||||||
; FIXME (PR38404)
|
define dso_local <4 x half> @test_vmul_lane_f16(<4 x half> %a, <4 x half> %b) {
|
||||||
;
|
; CHECK-LABEL: test_vmul_lane_f16:
|
||||||
;define dso_local <4 x half> @test_vmul_lane_f16(<4 x half> %a, <4 x half> %b) {
|
; CHECK: vmul.f16 d0, d0, d1[3]
|
||||||
;entry:
|
; CHECK-NEXT: bx lr
|
||||||
; %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
entry:
|
||||||
; %mul = fmul <4 x half> %shuffle, %a
|
%shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||||
; ret <4 x half> %mul
|
%mul = fmul <4 x half> %shuffle, %a
|
||||||
;}
|
ret <4 x half> %mul
|
||||||
|
}
|
||||||
|
|
||||||
;define dso_local <8 x half> @test_vmulq_lane_f16(<8 x half> %a, <4 x half> %b) {
|
define dso_local <8 x half> @test_vmulq_lane_f16(<8 x half> %a, <4 x half> %b) {
|
||||||
;entry:
|
; CHECK-LABEL: test_vmulq_lane_f16:
|
||||||
; %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
; CHECK: vmul.f16 q0, q0, d2[3]
|
||||||
; %mul = fmul <8 x half> %shuffle, %a
|
; CHECK-NEXT: bx lr
|
||||||
; ret <8 x half> %mul
|
entry:
|
||||||
;}
|
%shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||||
|
%mul = fmul <8 x half> %shuffle, %a
|
||||||
|
ret <8 x half> %mul
|
||||||
|
}
|
||||||
|
|
||||||
;define dso_local <4 x half> @test_vmul_n_f16(<4 x half> %a, float %b.coerce) {
|
define dso_local <4 x half> @test_vmul_n_f16(<4 x half> %a, float %b.coerce) {
|
||||||
;entry:
|
; CHECK-LABEL: test_vmul_n_f16:
|
||||||
; %0 = bitcast float %b.coerce to i32
|
; CHECK: vmul.f16 d0, d0, d1[0]
|
||||||
; %tmp.0.extract.trunc = trunc i32 %0 to i16
|
; CHECK-NEXT: bx lr
|
||||||
; %1 = bitcast i16 %tmp.0.extract.trunc to half
|
entry:
|
||||||
; %vecinit = insertelement <4 x half> undef, half %1, i32 0
|
%0 = bitcast float %b.coerce to i32
|
||||||
; %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
|
%tmp.0.extract.trunc = trunc i32 %0 to i16
|
||||||
; %mul = fmul <4 x half> %vecinit4, %a
|
%1 = bitcast i16 %tmp.0.extract.trunc to half
|
||||||
; ret <4 x half> %mul
|
%vecinit = insertelement <4 x half> undef, half %1, i32 0
|
||||||
;}
|
%vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
|
||||||
|
%mul = fmul <4 x half> %vecinit4, %a
|
||||||
|
ret <4 x half> %mul
|
||||||
|
}
|
||||||
|
|
||||||
;define dso_local <8 x half> @test_vmulq_n_f16(<8 x half> %a, float %b.coerce) {
|
define dso_local <8 x half> @test_vmulq_n_f16(<8 x half> %a, float %b.coerce) {
|
||||||
;entry:
|
; CHECK-LABEL: test_vmulq_n_f16:
|
||||||
; %0 = bitcast float %b.coerce to i32
|
; CHECK: vmul.f16 q0, q0, d2[0]
|
||||||
; %tmp.0.extract.trunc = trunc i32 %0 to i16
|
; CHECK-NEXT: bx lr
|
||||||
; %1 = bitcast i16 %tmp.0.extract.trunc to half
|
entry:
|
||||||
; %vecinit = insertelement <8 x half> undef, half %1, i32 0
|
%0 = bitcast float %b.coerce to i32
|
||||||
; %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
|
%tmp.0.extract.trunc = trunc i32 %0 to i16
|
||||||
; %mul = fmul <8 x half> %vecinit8, %a
|
%1 = bitcast i16 %tmp.0.extract.trunc to half
|
||||||
; ret <8 x half> %mul
|
%vecinit = insertelement <8 x half> undef, half %1, i32 0
|
||||||
;}
|
%vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
|
||||||
|
%mul = fmul <8 x half> %vecinit8, %a
|
||||||
|
ret <8 x half> %mul
|
||||||
|
}
|
||||||
|
|
||||||
define dso_local <4 x half> @test_vbsl_f16(<4 x i16> %a, <4 x half> %b, <4 x half> %c) {
|
define dso_local <4 x half> @test_vbsl_f16(<4 x i16> %a, <4 x half> %b, <4 x half> %c) {
|
||||||
; CHECKLABEL: test_vbsl_f16:
|
; CHECKLABEL: test_vbsl_f16:
|
||||||
|
|
Loading…
Reference in New Issue