forked from OSchip/llvm-project
[ARM] Various tests for MVE and FP16 codegen. NFC
This commit is contained in:
parent
f76c42416c
commit
a947be51bd
|
@ -47,3 +47,41 @@ entry:
|
|||
%tmp4.0.insert.ext = zext i16 %2 to i32
|
||||
ret i32 %tmp4.0.insert.ext
|
||||
}
|
||||
|
||||
define half @load_i16(i16 *%hp) {
|
||||
; CHECK-VFPV4-LABEL: load_i16:
|
||||
; CHECK-VFPV4: @ %bb.0: @ %entry
|
||||
; CHECK-VFPV4-NEXT: vmov.f32 s0, #1.000000e+00
|
||||
; CHECK-VFPV4-NEXT: ldrh r0, [r0]
|
||||
; CHECK-VFPV4-NEXT: vmov s2, r0
|
||||
; CHECK-VFPV4-NEXT: vcvtb.f32.f16 s2, s2
|
||||
; CHECK-VFPV4-NEXT: vadd.f32 s0, s2, s0
|
||||
; CHECK-VFPV4-NEXT: vmov r0, s0
|
||||
; CHECK-VFPV4-NEXT: bx lr
|
||||
;
|
||||
; CHECK-FP16-LABEL: load_i16:
|
||||
; CHECK-FP16: @ %bb.0: @ %entry
|
||||
; CHECK-FP16-NEXT: vldr.16 s2, [r1]
|
||||
; CHECK-FP16-NEXT: vmov.f16 s0, #1.000000e+00
|
||||
; CHECK-FP16-NEXT: vadd.f16 s0, s2, s0
|
||||
; CHECK-FP16-NEXT: vstr.16 s0, [r0]
|
||||
; CHECK-FP16-NEXT: bx lr
|
||||
entry:
|
||||
%h = load i16, i16 *%hp, align 2
|
||||
%hc = bitcast i16 %h to half
|
||||
%add = fadd half %hc, 1.0
|
||||
ret half %add
|
||||
}
|
||||
|
||||
define i16 @load_f16(half *%hp) {
|
||||
; CHECK-LABEL: load_f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: adds r0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%h = load half, half *%hp, align 2
|
||||
%hc = bitcast half %h to i16
|
||||
%add = add i16 %hc, 1
|
||||
ret i16 %add
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -verify-machineinstrs -mattr=+mve %s -o - | FileCheck %s
|
||||
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -verify-machineinstrs -mattr=+mve.fp %s -o - | FileCheck %s
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @vaddqr_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: vaddqr_v4i32:
|
||||
|
@ -72,3 +72,123 @@ entry:
|
|||
%c = add <16 x i8> %sp, %src
|
||||
ret <16 x i8> %c
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: vaddqr_v4f32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vadd.f32 q0, q0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%i = insertelement <4 x float> undef, float %src2, i32 0
|
||||
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
%c = fadd <4 x float> %src, %sp
|
||||
ret <4 x float> %c
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
|
||||
; CHECK-LABEL: vaddqr_v8f16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vadd.f16 q0, q0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%src2 = load half, half *%src2p, align 2
|
||||
%i = insertelement <8 x half> undef, half %src2, i32 0
|
||||
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
|
||||
%c = fadd <8 x half> %src, %sp
|
||||
ret <8 x half> %c
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32_2(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: vaddqr_v4f32_2:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vadd.f32 q0, q0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%i = insertelement <4 x float> undef, float %src2, i32 0
|
||||
%sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
%c = fadd <4 x float> %sp, %src
|
||||
ret <4 x float> %c
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_2(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
|
||||
; CHECK-LABEL: vaddqr_v8f16_2:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vadd.f16 q0, q0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%src2 = load half, half *%src2p, align 2
|
||||
%i = insertelement <8 x half> undef, half %src2, i32 0
|
||||
%sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
|
||||
%c = fadd <8 x half> %sp, %src
|
||||
ret <8 x half> %c
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32_3(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: vaddqr_v4f32_3:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vdup.32 q1, r0
|
||||
; CHECK-NEXT: vadd.f32 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%src2bc = bitcast float %src2 to i32
|
||||
%i = insertelement <4 x i32> undef, i32 %src2bc, i32 0
|
||||
%spbc = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%sp = bitcast <4 x i32> %spbc to <4 x float>
|
||||
%c = fadd <4 x float> %src, %sp
|
||||
ret <4 x float> %c
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_3(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
|
||||
; CHECK-LABEL: vaddqr_v8f16_3:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vdup.16 q1, r0
|
||||
; CHECK-NEXT: vadd.f16 q0, q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%src2 = load half, half *%src2p, align 2
|
||||
%src2bc = bitcast half %src2 to i16
|
||||
%i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
|
||||
%spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%sp = bitcast <8 x i16> %spbc to <8 x half>
|
||||
%c = fadd <8 x half> %src, %sp
|
||||
ret <8 x half> %c
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @vaddqr_v4f32_4(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: vaddqr_v4f32_4:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s4
|
||||
; CHECK-NEXT: vdup.32 q1, r0
|
||||
; CHECK-NEXT: vadd.f32 q0, q1, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%src2bc = bitcast float %src2 to i32
|
||||
%i = insertelement <4 x i32> undef, i32 %src2bc, i32 0
|
||||
%spbc = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%sp = bitcast <4 x i32> %spbc to <4 x float>
|
||||
%c = fadd <4 x float> %sp, %src
|
||||
ret <4 x float> %c
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @vaddqr_v8f16_4(<8 x half> %src, half *%src2p, <8 x half> %a, <8 x half> %b) {
|
||||
; CHECK-LABEL: vaddqr_v8f16_4:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: ldrh r0, [r0]
|
||||
; CHECK-NEXT: vdup.16 q1, r0
|
||||
; CHECK-NEXT: vadd.f16 q0, q1, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%src2 = load half, half *%src2p, align 2
|
||||
%src2bc = bitcast half %src2 to i16
|
||||
%i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
|
||||
%spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%sp = bitcast <8 x i16> %spbc to <8 x half>
|
||||
%c = fadd <8 x half> %sp, %src
|
||||
ret <8 x half> %c
|
||||
}
|
||||
|
|
|
@ -5192,3 +5192,138 @@ entry:
|
|||
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
|
||||
ret <8 x half> %s
|
||||
}
|
||||
|
||||
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @vcmp_oeq_v8f16_bc(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
|
||||
; CHECK-MVE-LABEL: vcmp_oeq_v8f16_bc:
|
||||
; CHECK-MVE: @ %bb.0: @ %entry
|
||||
; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11}
|
||||
; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11}
|
||||
; CHECK-MVE-NEXT: ldrh r0, [r0]
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
|
||||
; CHECK-MVE-NEXT: movs r2, #0
|
||||
; CHECK-MVE-NEXT: movs r1, #0
|
||||
; CHECK-MVE-NEXT: vdup.16 q4, r0
|
||||
; CHECK-MVE-NEXT: movs r0, #0
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s14, s16
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s22, s17
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s12, s14
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r0, #1
|
||||
; CHECK-MVE-NEXT: cmp r0, #0
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
|
||||
; CHECK-MVE-NEXT: cset r0, ne
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
|
||||
; CHECK-MVE-NEXT: lsls r0, r0, #31
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r2, #1
|
||||
; CHECK-MVE-NEXT: cmp r2, #0
|
||||
; CHECK-MVE-NEXT: cset r2, ne
|
||||
; CHECK-MVE-NEXT: vmov r0, s12
|
||||
; CHECK-MVE-NEXT: lsls r2, r2, #31
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s1, s17
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: vmov r2, s12
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
|
||||
; CHECK-MVE-NEXT: mov.w r0, #0
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r0, #1
|
||||
; CHECK-MVE-NEXT: cmp r0, #0
|
||||
; CHECK-MVE-NEXT: cset r0, ne
|
||||
; CHECK-MVE-NEXT: lsls r0, r0, #31
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s20, s9, s5
|
||||
; CHECK-MVE-NEXT: vmov r0, s20
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s20, s1
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s20, s22
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: mov.w r0, #0
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r0, #1
|
||||
; CHECK-MVE-NEXT: cmp r0, #0
|
||||
; CHECK-MVE-NEXT: cset r0, ne
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s20, s5
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s22, s9
|
||||
; CHECK-MVE-NEXT: lsls r0, r0, #31
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s2, s18
|
||||
; CHECK-MVE-NEXT: vmov r0, s20
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
|
||||
; CHECK-MVE-NEXT: mov.w r0, #0
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r0, #1
|
||||
; CHECK-MVE-NEXT: cmp r0, #0
|
||||
; CHECK-MVE-NEXT: cset r0, ne
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s22, s18
|
||||
; CHECK-MVE-NEXT: lsls r0, r0, #31
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s20, s10, s6
|
||||
; CHECK-MVE-NEXT: vmov r0, s20
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s20, s2
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s20, s22
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: mov.w r0, #0
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r0, #1
|
||||
; CHECK-MVE-NEXT: cmp r0, #0
|
||||
; CHECK-MVE-NEXT: cset r0, ne
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s20, s6
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s22, s10
|
||||
; CHECK-MVE-NEXT: lsls r0, r0, #31
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s3, s19
|
||||
; CHECK-MVE-NEXT: vmov r0, s20
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
|
||||
; CHECK-MVE-NEXT: mov.w r0, #0
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r0, #1
|
||||
; CHECK-MVE-NEXT: cmp r0, #0
|
||||
; CHECK-MVE-NEXT: cset r0, ne
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s2, s19
|
||||
; CHECK-MVE-NEXT: vcmp.f16 s0, s2
|
||||
; CHECK-MVE-NEXT: lsls r0, r0, #31
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s20, s11, s7
|
||||
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
|
||||
; CHECK-MVE-NEXT: it eq
|
||||
; CHECK-MVE-NEXT: moveq r1, #1
|
||||
; CHECK-MVE-NEXT: vmov r0, s20
|
||||
; CHECK-MVE-NEXT: cmp r1, #0
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
|
||||
; CHECK-MVE-NEXT: cset r0, ne
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
|
||||
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
|
||||
; CHECK-MVE-NEXT: lsls r0, r0, #31
|
||||
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
|
||||
; CHECK-MVE-NEXT: vmov r0, s0
|
||||
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
|
||||
; CHECK-MVE-NEXT: vmov q0, q3
|
||||
; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11}
|
||||
; CHECK-MVE-NEXT: bx lr
|
||||
;
|
||||
; CHECK-MVEFP-LABEL: vcmp_oeq_v8f16_bc:
|
||||
; CHECK-MVEFP: @ %bb.0: @ %entry
|
||||
; CHECK-MVEFP-NEXT: ldrh r0, [r0]
|
||||
; CHECK-MVEFP-NEXT: vdup.16 q3, r0
|
||||
; CHECK-MVEFP-NEXT: vcmp.f16 eq, q0, q3
|
||||
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
|
||||
; CHECK-MVEFP-NEXT: bx lr
|
||||
entry:
|
||||
%src2 = load half, half* %src2p
|
||||
%src2bc = bitcast half %src2 to i16
|
||||
%i = insertelement <8 x i16> undef, i16 %src2bc, i32 0
|
||||
%spbc = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%sp = bitcast <8 x i16> %spbc to <8 x half>
|
||||
%c = fcmp oeq <8 x half> %src, %sp
|
||||
%s = select <8 x i1> %c, <8 x half> %a, <8 x half> %b
|
||||
ret <8 x half> %s
|
||||
}
|
||||
|
|
|
@ -75,6 +75,36 @@ entry:
|
|||
ret <4 x float> %out
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @vdup_f32_1bc(float %src) {
|
||||
; CHECK-LABEL: vdup_f32_1bc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vdup.32 q0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%srcbc = bitcast float %src to i32
|
||||
%0 = insertelement <4 x i32> undef, i32 %srcbc, i32 0
|
||||
%out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%outbc = bitcast <4 x i32> %out to <4 x float>
|
||||
ret <4 x float> %outbc
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @vdup_f32_2bc(float %src1, float %src2) {
|
||||
; CHECK-LABEL: vdup_f32_2bc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vadd.f32 s0, s0, s1
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vdup.32 q0, r0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = fadd float %src1, %src2
|
||||
%bc = bitcast float %0 to i32
|
||||
%1 = insertelement <4 x i32> undef, i32 %bc, i32 0
|
||||
%out = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%outbc = bitcast <4 x i32> %out to <4 x float>
|
||||
ret <4 x float> %outbc
|
||||
}
|
||||
|
||||
; TODO: Calling convention needs fixing to pass half types directly to functions
|
||||
define arm_aapcs_vfpcc <8 x half> @vdup_f16(half* %src1, half* %src2) {
|
||||
; CHECK-LABEL: vdup_f16:
|
||||
|
@ -94,6 +124,30 @@ entry:
|
|||
ret <8 x half> %out
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @vdup_f16_bc(half* %src1, half* %src2) {
|
||||
; CHECK-LABEL: vdup_f16_bc:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: vldr.16 s0, [r1]
|
||||
; CHECK-NEXT: vldr.16 s2, [r0]
|
||||
; CHECK-NEXT: vadd.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [sp, #2]
|
||||
; CHECK-NEXT: ldrh.w r0, [sp, #2]
|
||||
; CHECK-NEXT: vdup.16 q0, r0
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load half, half *%src1, align 2
|
||||
%1 = load half, half *%src2, align 2
|
||||
%2 = fadd half %0, %1
|
||||
%bc = bitcast half %2 to i16
|
||||
%3 = insertelement <8 x i16> undef, i16 %bc, i32 0
|
||||
%out = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%outbc = bitcast <8 x i16> %out to <8 x half>
|
||||
ret <8 x half> %outbc
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) {
|
||||
; CHECK-LABEL: vdup_f64:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
|
@ -185,3 +239,46 @@ entry:
|
|||
%out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 1>
|
||||
ret <2 x double> %out
|
||||
}
|
||||
|
||||
|
||||
define arm_aapcs_vfpcc float @vdup_f32_extract(float %src) {
|
||||
; CHECK-LABEL: vdup_f32_extract:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov r0, s0
|
||||
; CHECK-NEXT: vdup.32 q0, r0
|
||||
; CHECK-NEXT: vmov.f32 s0, s2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%srcbc = bitcast float %src to i32
|
||||
%0 = insertelement <4 x i32> undef, i32 %srcbc, i32 0
|
||||
%out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%outbc = bitcast <4 x i32> %out to <4 x float>
|
||||
%ext = extractelement <4 x float> %outbc, i32 2
|
||||
ret float %ext
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc half @vdup_f16_extract(half* %src1, half* %src2) {
|
||||
; CHECK-LABEL: vdup_f16_extract:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: .pad #4
|
||||
; CHECK-NEXT: sub sp, #4
|
||||
; CHECK-NEXT: vldr.16 s0, [r2]
|
||||
; CHECK-NEXT: vldr.16 s2, [r1]
|
||||
; CHECK-NEXT: vadd.f16 s0, s2, s0
|
||||
; CHECK-NEXT: vstr.16 s0, [sp, #2]
|
||||
; CHECK-NEXT: ldrh.w r1, [sp, #2]
|
||||
; CHECK-NEXT: vdup.16 q0, r1
|
||||
; CHECK-NEXT: vstr.16 s1, [r0]
|
||||
; CHECK-NEXT: add sp, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = load half, half *%src1, align 2
|
||||
%1 = load half, half *%src2, align 2
|
||||
%2 = fadd half %0, %1
|
||||
%bc = bitcast half %2 to i16
|
||||
%3 = insertelement <8 x i16> undef, i16 %bc, i32 0
|
||||
%out = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%outbc = bitcast <8 x i16> %out to <8 x half>
|
||||
%ext = extractelement <8 x half> %outbc, i32 2
|
||||
ret half %ext
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue