[ARM][DAG] BF16 constant handling.

Much like f16 and f32, we shouldn't try to shrink bf16 to smaller fp
constant.  The code may not be optimal, but this allows us to legalize
bf16 constants under Arm without errors.
This commit is contained in:
David Green 2022-10-02 11:51:08 +01:00
parent 3f0ad8558a
commit 3651635eca
2 changed files with 138 additions and 1 deletions

View File

@ -310,7 +310,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
// We don't want to shrink SNaNs. Converting the SNaN back to its real type
// can cause it to be changed into a QNaN on some platforms (e.g. on SystemZ).
if (!APF.isSignaling()) {
while (SVT != MVT::f32 && SVT != MVT::f16) {
while (SVT != MVT::f32 && SVT != MVT::f16 && SVT != MVT::bf16) {
SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
if (ConstantFPSDNode::isValueValidForType(SVT, APF) &&
// Only do this if the target has a native EXTLOAD instruction from

View File

@ -0,0 +1,137 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=armv8.6a-none-none-eabi < %s | FileCheck %s --check-prefixes=CHECK
; RUN: llc -mtriple=armv8.6a-none-none-eabi -mattr=+bf16,+neon < %s | FileCheck %s --check-prefixes=CHECK
; RUN: llc -mtriple=armv8.6a-none-none-eabi -mattr=+bf16,+neon,+fullfp16 < %s | FileCheck %s --check-prefix=CHECK-FP16
; RUN: llc -mtriple=armv8.6a-none-none-eabi -mattr=+bf16,+neon -float-abi=hard < %s | FileCheck %s --check-prefix=CHECK-HARD
define bfloat @bitcast_zero() {
; CHECK-LABEL: bitcast_zero:
; CHECK: @ %bb.0:
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, sp, #4
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: strh r0, [sp, #2]
; CHECK-NEXT: ldrh r0, [sp, #2]
; CHECK-NEXT: add sp, sp, #4
; CHECK-NEXT: bx lr
;
; CHECK-FP16-LABEL: bitcast_zero:
; CHECK-FP16: @ %bb.0:
; CHECK-FP16-NEXT: mov r0, #0
; CHECK-FP16-NEXT: vmov.f16 s0, r0
; CHECK-FP16-NEXT: vmov r0, s0
; CHECK-FP16-NEXT: bx lr
;
; CHECK-HARD-LABEL: bitcast_zero:
; CHECK-HARD: @ %bb.0:
; CHECK-HARD-NEXT: .pad #4
; CHECK-HARD-NEXT: sub sp, sp, #4
; CHECK-HARD-NEXT: mov r0, #0
; CHECK-HARD-NEXT: strh r0, [sp, #2]
; CHECK-HARD-NEXT: ldrh r0, [sp, #2]
; CHECK-HARD-NEXT: vmov s0, r0
; CHECK-HARD-NEXT: add sp, sp, #4
; CHECK-HARD-NEXT: bx lr
%z = bitcast i16 0 to bfloat
ret bfloat %z
}
define bfloat @zero() {
; CHECK-LABEL: zero:
; CHECK: @ %bb.0:
; CHECK-NEXT: adr r0, .LCPI1_0
; CHECK-NEXT: ldrh r0, [r0]
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI1_0:
; CHECK-NEXT: .short 0x0000 @ bfloat 0
;
; CHECK-FP16-LABEL: zero:
; CHECK-FP16: @ %bb.0:
; CHECK-FP16-NEXT: vldr.16 s0, .LCPI1_0
; CHECK-FP16-NEXT: vmov r0, s0
; CHECK-FP16-NEXT: bx lr
; CHECK-FP16-NEXT: .p2align 1
; CHECK-FP16-NEXT: @ %bb.1:
; CHECK-FP16-NEXT: .LCPI1_0:
; CHECK-FP16-NEXT: .short 0x0000 @ bfloat 0
;
; CHECK-HARD-LABEL: zero:
; CHECK-HARD: @ %bb.0:
; CHECK-HARD-NEXT: adr r0, .LCPI1_0
; CHECK-HARD-NEXT: ldrh r0, [r0]
; CHECK-HARD-NEXT: vmov s0, r0
; CHECK-HARD-NEXT: bx lr
; CHECK-HARD-NEXT: .p2align 1
; CHECK-HARD-NEXT: @ %bb.1:
; CHECK-HARD-NEXT: .LCPI1_0:
; CHECK-HARD-NEXT: .short 0x0000 @ bfloat 0
ret bfloat 0xR0000
}
define bfloat @bitcast_tenk() {
; CHECK-LABEL: bitcast_tenk:
; CHECK: @ %bb.0:
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, sp, #4
; CHECK-NEXT: movw r0, #10000
; CHECK-NEXT: strh r0, [sp, #2]
; CHECK-NEXT: ldrh r0, [sp, #2]
; CHECK-NEXT: add sp, sp, #4
; CHECK-NEXT: bx lr
;
; CHECK-FP16-LABEL: bitcast_tenk:
; CHECK-FP16: @ %bb.0:
; CHECK-FP16-NEXT: movw r0, #10000
; CHECK-FP16-NEXT: vmov.f16 s0, r0
; CHECK-FP16-NEXT: vmov r0, s0
; CHECK-FP16-NEXT: bx lr
;
; CHECK-HARD-LABEL: bitcast_tenk:
; CHECK-HARD: @ %bb.0:
; CHECK-HARD-NEXT: .pad #4
; CHECK-HARD-NEXT: sub sp, sp, #4
; CHECK-HARD-NEXT: movw r0, #10000
; CHECK-HARD-NEXT: strh r0, [sp, #2]
; CHECK-HARD-NEXT: ldrh r0, [sp, #2]
; CHECK-HARD-NEXT: vmov s0, r0
; CHECK-HARD-NEXT: add sp, sp, #4
; CHECK-HARD-NEXT: bx lr
%z = bitcast i16 10000 to bfloat
ret bfloat %z
}
define bfloat @minus0() {
; CHECK-LABEL: minus0:
; CHECK: @ %bb.0:
; CHECK-NEXT: adr r0, .LCPI3_0
; CHECK-NEXT: ldrh r0, [r0]
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI3_0:
; CHECK-NEXT: .short 0x8000 @ bfloat -0
;
; CHECK-FP16-LABEL: minus0:
; CHECK-FP16: @ %bb.0:
; CHECK-FP16-NEXT: vldr.16 s0, .LCPI3_0
; CHECK-FP16-NEXT: vmov r0, s0
; CHECK-FP16-NEXT: bx lr
; CHECK-FP16-NEXT: .p2align 1
; CHECK-FP16-NEXT: @ %bb.1:
; CHECK-FP16-NEXT: .LCPI3_0:
; CHECK-FP16-NEXT: .short 0x8000 @ bfloat -0
;
; CHECK-HARD-LABEL: minus0:
; CHECK-HARD: @ %bb.0:
; CHECK-HARD-NEXT: adr r0, .LCPI3_0
; CHECK-HARD-NEXT: ldrh r0, [r0]
; CHECK-HARD-NEXT: vmov s0, r0
; CHECK-HARD-NEXT: bx lr
; CHECK-HARD-NEXT: .p2align 1
; CHECK-HARD-NEXT: @ %bb.1:
; CHECK-HARD-NEXT: .LCPI3_0:
; CHECK-HARD-NEXT: .short 0x8000 @ bfloat -0
ret bfloat 0xR8000
}