[X86] Building constant vector which element type is half will cause assertion fail.

Fix assertion fail when building con constant vector which element type is half. Differential Revision: https://reviews.llvm.org/D108612
2021-08-24 13:58:08 +08:00 · 2021-08-24 13:58:08 +08:00 · b7795eb646
parent 292f013395
commit b7795eb646
2 changed files with 24 additions and 1 deletions
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -9006,7 +9006,9 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
    APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
    Constant *Const;
    if (VT.isFloatingPoint()) {
-      if (ScalarSize == 32) {
+      if (ScalarSize == 16) {
+        Const = ConstantFP::get(C, APFloat(APFloat::IEEEhalf(), Val));
+      } else if (ScalarSize == 32) {
        Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
      } else {
        assert(ScalarSize == 64 && "Unsupported floating point scalar size");
--- a/llvm/test/CodeGen/X86/build_fp16_constant_vector.ll
+++ b/llvm/test/CodeGen/X86/build_fp16_constant_vector.ll
@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s
+
+define dso_local <32 x half> @foo(<32 x half> %a, <32 x half> %b, <32 x half> %c) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
+; CHECK-NEXT:    # zmm3 = mem[0,1,2,3,0,1,2,3]
+; CHECK-NEXT:    vfmadd213ph %zmm2, %zmm3, %zmm0
+; CHECK-NEXT:    vfmadd213ph %zmm2, %zmm3, %zmm1
+; CHECK-NEXT:    vaddph %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    ret{{[l|q]}}
+entry:
+  %0 = tail call fast <32 x half> @llvm.fma.v32f16(<32 x half> %a, <32 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00>, <32 x half> %c)
+  %1 = tail call fast <32 x half> @llvm.fma.v32f16(<32 x half> %b, <32 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00>, <32 x half> %c)
+  %2 = fadd <32 x half> %0, %1
+  ret <32 x half> %2
+}
+
+declare <32 x half> @llvm.fma.v32f16(<32 x half>, <32 x half>, <32 x half>)