[MVT] Add v16f16 and v32f16 vectors.

I might look at improving PR43065 which will require being able to mark a 256 and 512 bit vector of f16 as Legal. Differential Revision: https://reviews.llvm.org/D66515 llvm-svn: 369565
2019-08-21 19:14:48 +00:00 · 2019-08-21 19:14:48 +00:00 · 3f59bfd5be
parent 8d5fbecf8b
commit 3f59bfd5be
4 changed files with 97 additions and 78 deletions
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@ -126,43 +126,45 @@ def v2f16    : ValueType<32 , 93>;    //    2 x f16 vector value
 def v3f16    : ValueType<48 , 94>;    //    3 x f16 vector value
 def v4f16    : ValueType<64 , 95>;    //    4 x f16 vector value
 def v8f16    : ValueType<128, 96>;    //    8 x f16 vector value
-def v1f32    : ValueType<32 , 97>;    //    1 x f32 vector value
-def v2f32    : ValueType<64 , 98>;    //    2 x f32 vector value
-def v3f32    : ValueType<96 , 99>;    //    3 x f32 vector value
-def v4f32    : ValueType<128, 100>;   //    4 x f32 vector value
-def v5f32    : ValueType<160, 101>;   //    5 x f32 vector value
-def v8f32    : ValueType<256, 102>;   //    8 x f32 vector value
-def v16f32   : ValueType<512,  103>;  //   16 x f32 vector value
-def v32f32   : ValueType<1024, 104>;  //   32 x f32 vector value
-def v64f32   : ValueType<2048, 105>;  //   64 x f32 vector value
-def v128f32  : ValueType<4096, 106>;  //  128 x f32 vector value
-def v256f32  : ValueType<8182, 107>;  //  256 x f32 vector value
-def v512f32  : ValueType<16384, 108>; //  512 x f32 vector value
-def v1024f32 : ValueType<32768, 109>; // 1024 x f32 vector value
-def v2048f32 : ValueType<65536, 110>; // 2048 x f32 vector value
-def v1f64    : ValueType<64, 111>;    //    1 x f64 vector value
-def v2f64    : ValueType<128, 112>;   //    2 x f64 vector value
-def v4f64    : ValueType<256, 113>;   //    4 x f64 vector value
-def v8f64    : ValueType<512, 114>;   //    8 x f64 vector value
+def v16f16   : ValueType<256, 97>;    //    8 x f16 vector value
+def v32f16   : ValueType<512, 98>;    //    8 x f16 vector value
+def v1f32    : ValueType<32 , 99>;    //    1 x f32 vector value
+def v2f32    : ValueType<64 , 100>;   //    2 x f32 vector value
+def v3f32    : ValueType<96 , 101>;   //    3 x f32 vector value
+def v4f32    : ValueType<128, 102>;   //    4 x f32 vector value
+def v5f32    : ValueType<160, 103>;   //    5 x f32 vector value
+def v8f32    : ValueType<256, 104>;   //    8 x f32 vector value
+def v16f32   : ValueType<512,  105>;  //   16 x f32 vector value
+def v32f32   : ValueType<1024, 106>;  //   32 x f32 vector value
+def v64f32   : ValueType<2048, 107>;  //   64 x f32 vector value
+def v128f32  : ValueType<4096, 108>;  //  128 x f32 vector value
+def v256f32  : ValueType<8182, 109>;  //  256 x f32 vector value
+def v512f32  : ValueType<16384, 110>; //  512 x f32 vector value
+def v1024f32 : ValueType<32768, 111>; // 1024 x f32 vector value
+def v2048f32 : ValueType<65536, 112>; // 2048 x f32 vector value
+def v1f64    : ValueType<64, 113>;    //    1 x f64 vector value
+def v2f64    : ValueType<128, 114>;   //    2 x f64 vector value
+def v4f64    : ValueType<256, 115>;   //    4 x f64 vector value
+def v8f64    : ValueType<512, 116>;   //    8 x f64 vector value

-def nxv2f16  : ValueType<32 , 115>; // n x  2 x f16 vector value
-def nxv4f16  : ValueType<64 , 116>; // n x  4 x f16 vector value
-def nxv8f16  : ValueType<128, 117>; // n x  8 x f16 vector value
-def nxv1f32  : ValueType<32 , 118>; // n x  1 x f32 vector value
-def nxv2f32  : ValueType<64 , 119>; // n x  2 x f32 vector value
-def nxv4f32  : ValueType<128, 120>; // n x  4 x f32 vector value
-def nxv8f32  : ValueType<256, 121>; // n x  8 x f32 vector value
-def nxv16f32 : ValueType<512, 122>; // n x 16 x f32 vector value
-def nxv1f64  : ValueType<64,  123>; // n x  1 x f64 vector value
-def nxv2f64  : ValueType<128, 124>; // n x  2 x f64 vector value
-def nxv4f64  : ValueType<256, 125>; // n x  4 x f64 vector value
-def nxv8f64  : ValueType<512, 126>; // n x  8 x f64 vector value
+def nxv2f16  : ValueType<32 , 117>; // n x  2 x f16 vector value
+def nxv4f16  : ValueType<64 , 118>; // n x  4 x f16 vector value
+def nxv8f16  : ValueType<128, 119>; // n x  8 x f16 vector value
+def nxv1f32  : ValueType<32 , 120>; // n x  1 x f32 vector value
+def nxv2f32  : ValueType<64 , 121>; // n x  2 x f32 vector value
+def nxv4f32  : ValueType<128, 122>; // n x  4 x f32 vector value
+def nxv8f32  : ValueType<256, 123>; // n x  8 x f32 vector value
+def nxv16f32 : ValueType<512, 124>; // n x 16 x f32 vector value
+def nxv1f64  : ValueType<64,  125>; // n x  1 x f64 vector value
+def nxv2f64  : ValueType<128, 126>; // n x  2 x f64 vector value
+def nxv4f64  : ValueType<256, 127>; // n x  4 x f64 vector value
+def nxv8f64  : ValueType<512, 128>; // n x  8 x f64 vector value

-def x86mmx : ValueType<64 , 127>;   // X86 MMX value
-def FlagVT : ValueType<0  , 128>;   // Pre-RA sched glue
-def isVoid : ValueType<0  , 129>;   // Produces no value
-def untyped: ValueType<8  , 130>;   // Produces an untyped value
-def exnref: ValueType<0, 131>;      // WebAssembly's exnref type
+def x86mmx : ValueType<64 , 129>;   // X86 MMX value
+def FlagVT : ValueType<0  , 130>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 131>;   // Produces no value
+def untyped: ValueType<8  , 132>;   // Produces an untyped value
+def exnref: ValueType<0, 133>;      // WebAssembly's exnref type
 def token  : ValueType<0  , 248>;   // TokenTy
 def MetadataVT: ValueType<0, 249>;  // Metadata

--- a/llvm/include/llvm/Support/MachineValueType.h
+++ b/llvm/include/llvm/Support/MachineValueType.h
@ -158,37 +158,39 @@ namespace llvm {
      v3f16          =  94,   //    3 x f16
      v4f16          =  95,   //    4 x f16
      v8f16          =  96,   //    8 x f16
-      v1f32          =  97,   //    1 x f32
-      v2f32          =  98,   //    2 x f32
-      v3f32          =  99,   //    3 x f32
-      v4f32          =  100,  //    4 x f32
-      v5f32          =  101,  //    5 x f32
-      v8f32          =  102,  //    8 x f32
-      v16f32         =  103,  //   16 x f32
-      v32f32         =  104,  //   32 x f32
-      v64f32         =  105,  //   64 x f32
-      v128f32        =  106,  //  128 x f32
-      v256f32        =  107,  //  256 x f32
-      v512f32        =  108,  //  512 x f32
-      v1024f32       =  109,  // 1024 x f32
-      v2048f32       =  110,  // 2048 x f32
-      v1f64          =  111,  //    1 x f64
-      v2f64          =  112,  //    2 x f64
-      v4f64          =  113,  //    4 x f64
-      v8f64          =  114,  //    8 x f64
+      v16f16         =  97,   //   16 x f16
+      v32f16         =  98,   //   32 x f16
+      v1f32          =  99,   //    1 x f32
+      v2f32          =  100,  //    2 x f32
+      v3f32          =  101,  //    3 x f32
+      v4f32          =  102,  //    4 x f32
+      v5f32          =  103,  //    5 x f32
+      v8f32          =  104,  //    8 x f32
+      v16f32         =  105,  //   16 x f32
+      v32f32         =  106,  //   32 x f32
+      v64f32         =  107,  //   64 x f32
+      v128f32        =  108,  //  128 x f32
+      v256f32        =  109,  //  256 x f32
+      v512f32        =  110,  //  512 x f32
+      v1024f32       =  111,  // 1024 x f32
+      v2048f32       =  112,  // 2048 x f32
+      v1f64          =  113,  //    1 x f64
+      v2f64          =  114,  //    2 x f64
+      v4f64          =  115,  //    4 x f64
+      v8f64          =  116,  //    8 x f64

-      nxv2f16        =  115,  // n x  2 x f16
-      nxv4f16        =  116,  // n x  4 x f16
-      nxv8f16        =  117,  // n x  8 x f16
-      nxv1f32        =  118,  // n x  1 x f32
-      nxv2f32        =  119,  // n x  2 x f32
-      nxv4f32        =  120,  // n x  4 x f32
-      nxv8f32        =  121,  // n x  8 x f32
-      nxv16f32       =  122,  // n x 16 x f32
-      nxv1f64        =  123,  // n x  1 x f64
-      nxv2f64        =  124,  // n x  2 x f64
-      nxv4f64        =  125,  // n x  4 x f64
-      nxv8f64        =  126,  // n x  8 x f64
+      nxv2f16        =  117,  // n x  2 x f16
+      nxv4f16        =  118,  // n x  4 x f16
+      nxv8f16        =  119,  // n x  8 x f16
+      nxv1f32        =  120,  // n x  1 x f32
+      nxv2f32        =  121,  // n x  2 x f32
+      nxv4f32        =  122,  // n x  4 x f32
+      nxv8f32        =  123,  // n x  8 x f32
+      nxv16f32       =  124,  // n x 16 x f32
+      nxv1f64        =  125,  // n x  1 x f64
+      nxv2f64        =  126,  // n x  2 x f64
+      nxv4f64        =  127,  // n x  4 x f64
+      nxv8f64        =  128,  // n x  8 x f64

      FIRST_FP_VECTOR_VALUETYPE = v2f16,
      LAST_FP_VECTOR_VALUETYPE = nxv8f64,
@ -199,20 +201,20 @@ namespace llvm {
      FIRST_VECTOR_VALUETYPE = v1i1,
      LAST_VECTOR_VALUETYPE  = nxv8f64,

-      x86mmx         =  127,   // This is an X86 MMX value
+      x86mmx         =  129,   // This is an X86 MMX value

-      Glue           =  128,   // This glues nodes together during pre-RA sched
+      Glue           =  130,   // This glues nodes together during pre-RA sched

-      isVoid         =  129,   // This has no value
+      isVoid         =  131,   // This has no value

-      Untyped        =  130,   // This value takes a register, but has
+      Untyped        =  132,   // This value takes a register, but has
                               // unspecified type.  The register class
                               // will be determined by the opcode.

-      exnref         =  131,   // WebAssembly's exnref type
+      exnref         =  133,   // WebAssembly's exnref type

      FIRST_VALUETYPE = 1,     // This is always the beginning of the list.
-      LAST_VALUETYPE =  132,   // This always remains at the end of the list.
+      LAST_VALUETYPE =  134,   // This always remains at the end of the list.

      // This is the current maximum for LAST_VALUETYPE.
      // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
@ -341,17 +343,18 @@ namespace llvm {

    /// Return true if this is a 256-bit vector type.
    bool is256BitVector() const {
-      return (SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64  ||
-              SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 ||
-              SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64);
+      return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v8f32 ||
+              SimpleTy == MVT::v4f64  || SimpleTy == MVT::v32i8 ||
+              SimpleTy == MVT::v16i16 || SimpleTy == MVT::v8i32 ||
+              SimpleTy == MVT::v4i64);
    }

    /// Return true if this is a 512-bit vector type.
    bool is512BitVector() const {
-      return (SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64  ||
-              SimpleTy == MVT::v512i1 || SimpleTy == MVT::v64i8  ||
-              SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 ||
-              SimpleTy == MVT::v8i64);
+      return (SimpleTy == MVT::v32f16 || SimpleTy == MVT::v16f32 ||
+              SimpleTy == MVT::v8f64  || SimpleTy == MVT::v512i1 ||
+              SimpleTy == MVT::v64i8  || SimpleTy == MVT::v32i16 ||
+              SimpleTy == MVT::v16i32 || SimpleTy == MVT::v8i64);
    }

    /// Return true if this is a 1024-bit vector type.
@ -483,6 +486,8 @@ namespace llvm {
      case v3f16:
      case v4f16:
      case v8f16:
+      case v16f16:
+      case v32f16:
      case nxv2f16:
      case nxv4f16:
      case nxv8f16: return f16;
@ -546,6 +551,7 @@ namespace llvm {
      case v32i16:
      case v32i32:
      case v32i64:
+      case v32f16:
      case v32f32:
      case nxv32i1:
      case nxv32i8:
@ -557,6 +563,7 @@ namespace llvm {
      case v16i16:
      case v16i32:
      case v16i64:
+      case v16f16:
      case v16f32:
      case nxv16i1:
      case nxv16i8:
@ -741,6 +748,7 @@ namespace llvm {
      case v16i16:
      case v8i32:
      case v4i64:
+      case v16f16:
      case v8f32:
      case v4f64:
      case nxv32i8:
@ -754,6 +762,7 @@ namespace llvm {
      case v32i16:
      case v16i32:
      case v8i64:
+      case v32f16:
      case v16f32:
      case v8f64:
      case nxv32i16:
@ -931,6 +940,8 @@ namespace llvm {
        if (NumElements == 3)  return MVT::v3f16;
        if (NumElements == 4)  return MVT::v4f16;
        if (NumElements == 8)  return MVT::v8f16;
+        if (NumElements == 16) return MVT::v16f16;
+        if (NumElements == 32) return MVT::v32f16;
        break;
      case MVT::f32:
        if (NumElements == 1)    return MVT::v1f32;
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@ -330,6 +330,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
  case MVT::v3f16:   return VectorType::get(Type::getHalfTy(Context), 3);
  case MVT::v4f16:   return VectorType::get(Type::getHalfTy(Context), 4);
  case MVT::v8f16:   return VectorType::get(Type::getHalfTy(Context), 8);
+  case MVT::v16f16:  return VectorType::get(Type::getHalfTy(Context), 16);
+  case MVT::v32f16:  return VectorType::get(Type::getHalfTy(Context), 32);
  case MVT::v1f32:   return VectorType::get(Type::getFloatTy(Context), 1);
  case MVT::v2f32:   return VectorType::get(Type::getFloatTy(Context), 2);
  case MVT::v3f32:   return VectorType::get(Type::getFloatTy(Context), 3);
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@ -157,6 +157,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
  setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand);
  setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
  setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand);

  setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
  setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
@ -219,6 +221,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
  setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand);
  setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
  setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand);
+  setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand);
+  setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand);

  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
  setTruncStoreAction(MVT::f64, MVT::f32, Expand);