[ARM] Expand vector reduction intrinsics on soft float

Followup to D73135. If the target doesn't have hard float (default
for ARM), then we assert when trying to soften the result of vector
reduction intrinsics. This patch marks these for expansion as well.
(A bit odd to use vectors on a target without hard float ... but
that's where you end up if you expose target-independent vector types.)

Differential Revision: https://reviews.llvm.org/D73854
This commit is contained in:
Nikita Popov 2020-02-02 18:44:18 +01:00
parent 9eb74f609d
commit 1cc4f8d172
2 changed files with 71 additions and 1 deletions

View File

@ -174,7 +174,14 @@ public:
case Intrinsic::experimental_vector_reduce_v2_fadd:
case Intrinsic::experimental_vector_reduce_v2_fmul:
// We don't have legalization support for ordered FP reductions.
return !II->getFastMathFlags().allowReassoc();
if (!II->getFastMathFlags().allowReassoc())
return true;
LLVM_FALLTHROUGH;
case Intrinsic::experimental_vector_reduce_fmin:
case Intrinsic::experimental_vector_reduce_fmax:
// Can't legalize reductions with soft floats.
return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs();
default:
// Don't expand anything else, let legalization deal with it.

View File

@ -0,0 +1,63 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>)
declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>)
define float @test_v4f32(<4 x float> %a) nounwind {
; CHECK-LABEL: test_v4f32:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: mov r4, r3
; CHECK-NEXT: bl __aeabi_fadd
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: mov r0, r5
; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_fadd
; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl __aeabi_fadd
; CHECK-NEXT: pop {r4, r5, r6, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %a)
ret float %b
}
define double @test_v2f64(<2 x double> %a) nounwind {
; CHECK-LABEL: test_v2f64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: bl __aeabi_dadd
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double zeroinitializer, <2 x double> %a)
ret double %b
}
define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-LABEL: test_v2f128:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: ldr r12, [sp, #36]
; CHECK-NEXT: str r12, [sp, #12]
; CHECK-NEXT: ldr r12, [sp, #32]
; CHECK-NEXT: str r12, [sp, #8]
; CHECK-NEXT: ldr r12, [sp, #28]
; CHECK-NEXT: str r12, [sp, #4]
; CHECK-NEXT: ldr r12, [sp, #24]
; CHECK-NEXT: str r12, [sp]
; CHECK-NEXT: bl __addtf3
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
ret fp128 %b
}