[CodeGen] Matching promoted type for 16-bit integer bitcasts from fp16 operand

Summary: When legalizing a biscast operation from an fp16 operand to an i16 on a target that requires both input and output types to be promoted to 32-bits, an assertion can fail when building the new node due to a mismatch between the the operation's result size and the type specified to the node. This patches fix the issue by making sure the bit width of the types match for the FP_TO_FP16 node, covering the difference with an extra ANYEXTEND operation. Reviewers: ostannard, efriedma, pirama, jmolloy, plotfi Reviewed By: efriedma Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D82552
2020-06-25 15:01:02 +01:00 · 2020-06-25 15:01:02 +01:00 · fc39a9ca0e
parent e59e39b7c4
commit fc39a9ca0e
2 changed files with 54 additions and 1 deletions
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -4554,7 +4554,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
      // FIXME need to be more flexible about rounding mode.
      (void)V.convert(APFloat::IEEEhalf(),
                      APFloat::rmNearestTiesToEven, &Ignored);
-      return getConstant(V.bitcastToAPInt(), DL, VT);
+      return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
    }
    }
  }
--- a/llvm/test/CodeGen/ARM/arm-half-promote.ll
+++ b/llvm/test/CodeGen/ARM/arm-half-promote.ll
@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=thumbv7s-apple-ios7.0.0 | FileCheck %s
+
+define arm_aapcs_vfpcc { <8 x half>, <8 x half> } @f1() {
+; CHECK-LABEL: _f1
+; CHECK: vpush   {d8}
+; CHECK-NEXT: vmov.f64        d8, #5.000000e-01
+; CHECK-NEXT: vmov.i32        d8, #0x0
+; CHECK-NEXT: vmov.i32        d0, #0x0
+; CHECK-NEXT: vmov.i32        d1, #0x0
+; CHECK-NEXT: vmov.i32        d2, #0x0
+; CHECK-NEXT: vmov.i32        d3, #0x0
+; CHECK-NEXT: vmov.i32        d4, #0x0
+; CHECK-NEXT: vmov.i32        d5, #0x0
+; CHECK-NEXT: vmov.i32        d6, #0x0
+; CHECK-NEXT: vmov.i32        d7, #0x0
+; CHECK-NEXT: vmov.f32        s1, s16
+; CHECK-NEXT: vmov.f32        s3, s16
+; CHECK-NEXT: vmov.f32        s5, s16
+; CHECK-NEXT: vmov.f32        s7, s16
+; CHECK-NEXT: vmov.f32        s9, s16
+; CHECK-NEXT: vmov.f32        s11, s16
+; CHECK-NEXT: vmov.f32        s13, s16
+; CHECK-NEXT: vmov.f32        s15, s16
+; CHECK-NEXT: vpop    {d8}
+; CHECK-NEXT: bx      lr
+  ret { <8 x half>, <8 x half> } zeroinitializer
+}
+
+define swiftcc { <8 x half>, <8 x half> } @f2() {
+; CHECK-LABEL: _f2
+; CHECK: vpush   {d8}
+; CHECK-NEXT: vmov.f64        d8, #5.000000e-01
+; CHECK-NEXT: vmov.i32        d8, #0x0
+; CHECK-NEXT: vmov.i32        d0, #0x0
+; CHECK-NEXT: vmov.i32        d1, #0x0
+; CHECK-NEXT: vmov.i32        d2, #0x0
+; CHECK-NEXT: vmov.i32        d3, #0x0
+; CHECK-NEXT: vmov.i32        d4, #0x0
+; CHECK-NEXT: vmov.i32        d5, #0x0
+; CHECK-NEXT: vmov.i32        d6, #0x0
+; CHECK-NEXT: vmov.i32        d7, #0x0
+; CHECK-NEXT: vmov.f32        s1, s16
+; CHECK-NEXT: vmov.f32        s3, s16
+; CHECK-NEXT: vmov.f32        s5, s16
+; CHECK-NEXT: vmov.f32        s7, s16
+; CHECK-NEXT: vmov.f32        s9, s16
+; CHECK-NEXT: vmov.f32        s11, s16
+; CHECK-NEXT: vmov.f32        s13, s16
+; CHECK-NEXT: vmov.f32        s15, s16
+; CHECK-NEXT: vpop    {d8}
+; CHECK-NEXT: bx      lr
+  ret { <8 x half>, <8 x half> } zeroinitializer
+}