[MVT] Add v16f16 and v32f16 vectors.

I might look at improving PR43065 which will require being
able to mark a 256 and 512 bit vector of f16 as Legal.

Differential Revision: https://reviews.llvm.org/D66515

llvm-svn: 369565
This commit is contained in:
Craig Topper 2019-08-21 19:14:48 +00:00
parent 8d5fbecf8b
commit 3f59bfd5be
4 changed files with 97 additions and 78 deletions

View File

@ -126,43 +126,45 @@ def v2f16 : ValueType<32 , 93>; // 2 x f16 vector value
def v3f16 : ValueType<48 , 94>; // 3 x f16 vector value
def v4f16 : ValueType<64 , 95>; // 4 x f16 vector value
def v8f16 : ValueType<128, 96>; // 8 x f16 vector value
def v1f32 : ValueType<32 , 97>; // 1 x f32 vector value
def v2f32 : ValueType<64 , 98>; // 2 x f32 vector value
def v3f32 : ValueType<96 , 99>; // 3 x f32 vector value
def v4f32 : ValueType<128, 100>; // 4 x f32 vector value
def v5f32 : ValueType<160, 101>; // 5 x f32 vector value
def v8f32 : ValueType<256, 102>; // 8 x f32 vector value
def v16f32 : ValueType<512, 103>; // 16 x f32 vector value
def v32f32 : ValueType<1024, 104>; // 32 x f32 vector value
def v64f32 : ValueType<2048, 105>; // 64 x f32 vector value
def v128f32 : ValueType<4096, 106>; // 128 x f32 vector value
def v256f32 : ValueType<8182, 107>; // 256 x f32 vector value
def v512f32 : ValueType<16384, 108>; // 512 x f32 vector value
def v1024f32 : ValueType<32768, 109>; // 1024 x f32 vector value
def v2048f32 : ValueType<65536, 110>; // 2048 x f32 vector value
def v1f64 : ValueType<64, 111>; // 1 x f64 vector value
def v2f64 : ValueType<128, 112>; // 2 x f64 vector value
def v4f64 : ValueType<256, 113>; // 4 x f64 vector value
def v8f64 : ValueType<512, 114>; // 8 x f64 vector value
def v16f16 : ValueType<256, 97>; // 8 x f16 vector value
def v32f16 : ValueType<512, 98>; // 8 x f16 vector value
def v1f32 : ValueType<32 , 99>; // 1 x f32 vector value
def v2f32 : ValueType<64 , 100>; // 2 x f32 vector value
def v3f32 : ValueType<96 , 101>; // 3 x f32 vector value
def v4f32 : ValueType<128, 102>; // 4 x f32 vector value
def v5f32 : ValueType<160, 103>; // 5 x f32 vector value
def v8f32 : ValueType<256, 104>; // 8 x f32 vector value
def v16f32 : ValueType<512, 105>; // 16 x f32 vector value
def v32f32 : ValueType<1024, 106>; // 32 x f32 vector value
def v64f32 : ValueType<2048, 107>; // 64 x f32 vector value
def v128f32 : ValueType<4096, 108>; // 128 x f32 vector value
def v256f32 : ValueType<8182, 109>; // 256 x f32 vector value
def v512f32 : ValueType<16384, 110>; // 512 x f32 vector value
def v1024f32 : ValueType<32768, 111>; // 1024 x f32 vector value
def v2048f32 : ValueType<65536, 112>; // 2048 x f32 vector value
def v1f64 : ValueType<64, 113>; // 1 x f64 vector value
def v2f64 : ValueType<128, 114>; // 2 x f64 vector value
def v4f64 : ValueType<256, 115>; // 4 x f64 vector value
def v8f64 : ValueType<512, 116>; // 8 x f64 vector value
def nxv2f16 : ValueType<32 , 115>; // n x 2 x f16 vector value
def nxv4f16 : ValueType<64 , 116>; // n x 4 x f16 vector value
def nxv8f16 : ValueType<128, 117>; // n x 8 x f16 vector value
def nxv1f32 : ValueType<32 , 118>; // n x 1 x f32 vector value
def nxv2f32 : ValueType<64 , 119>; // n x 2 x f32 vector value
def nxv4f32 : ValueType<128, 120>; // n x 4 x f32 vector value
def nxv8f32 : ValueType<256, 121>; // n x 8 x f32 vector value
def nxv16f32 : ValueType<512, 122>; // n x 16 x f32 vector value
def nxv1f64 : ValueType<64, 123>; // n x 1 x f64 vector value
def nxv2f64 : ValueType<128, 124>; // n x 2 x f64 vector value
def nxv4f64 : ValueType<256, 125>; // n x 4 x f64 vector value
def nxv8f64 : ValueType<512, 126>; // n x 8 x f64 vector value
def nxv2f16 : ValueType<32 , 117>; // n x 2 x f16 vector value
def nxv4f16 : ValueType<64 , 118>; // n x 4 x f16 vector value
def nxv8f16 : ValueType<128, 119>; // n x 8 x f16 vector value
def nxv1f32 : ValueType<32 , 120>; // n x 1 x f32 vector value
def nxv2f32 : ValueType<64 , 121>; // n x 2 x f32 vector value
def nxv4f32 : ValueType<128, 122>; // n x 4 x f32 vector value
def nxv8f32 : ValueType<256, 123>; // n x 8 x f32 vector value
def nxv16f32 : ValueType<512, 124>; // n x 16 x f32 vector value
def nxv1f64 : ValueType<64, 125>; // n x 1 x f64 vector value
def nxv2f64 : ValueType<128, 126>; // n x 2 x f64 vector value
def nxv4f64 : ValueType<256, 127>; // n x 4 x f64 vector value
def nxv8f64 : ValueType<512, 128>; // n x 8 x f64 vector value
def x86mmx : ValueType<64 , 127>; // X86 MMX value
def FlagVT : ValueType<0 , 128>; // Pre-RA sched glue
def isVoid : ValueType<0 , 129>; // Produces no value
def untyped: ValueType<8 , 130>; // Produces an untyped value
def exnref: ValueType<0, 131>; // WebAssembly's exnref type
def x86mmx : ValueType<64 , 129>; // X86 MMX value
def FlagVT : ValueType<0 , 130>; // Pre-RA sched glue
def isVoid : ValueType<0 , 131>; // Produces no value
def untyped: ValueType<8 , 132>; // Produces an untyped value
def exnref: ValueType<0, 133>; // WebAssembly's exnref type
def token : ValueType<0 , 248>; // TokenTy
def MetadataVT: ValueType<0, 249>; // Metadata

View File

@ -158,37 +158,39 @@ namespace llvm {
v3f16 = 94, // 3 x f16
v4f16 = 95, // 4 x f16
v8f16 = 96, // 8 x f16
v1f32 = 97, // 1 x f32
v2f32 = 98, // 2 x f32
v3f32 = 99, // 3 x f32
v4f32 = 100, // 4 x f32
v5f32 = 101, // 5 x f32
v8f32 = 102, // 8 x f32
v16f32 = 103, // 16 x f32
v32f32 = 104, // 32 x f32
v64f32 = 105, // 64 x f32
v128f32 = 106, // 128 x f32
v256f32 = 107, // 256 x f32
v512f32 = 108, // 512 x f32
v1024f32 = 109, // 1024 x f32
v2048f32 = 110, // 2048 x f32
v1f64 = 111, // 1 x f64
v2f64 = 112, // 2 x f64
v4f64 = 113, // 4 x f64
v8f64 = 114, // 8 x f64
v16f16 = 97, // 16 x f16
v32f16 = 98, // 32 x f16
v1f32 = 99, // 1 x f32
v2f32 = 100, // 2 x f32
v3f32 = 101, // 3 x f32
v4f32 = 102, // 4 x f32
v5f32 = 103, // 5 x f32
v8f32 = 104, // 8 x f32
v16f32 = 105, // 16 x f32
v32f32 = 106, // 32 x f32
v64f32 = 107, // 64 x f32
v128f32 = 108, // 128 x f32
v256f32 = 109, // 256 x f32
v512f32 = 110, // 512 x f32
v1024f32 = 111, // 1024 x f32
v2048f32 = 112, // 2048 x f32
v1f64 = 113, // 1 x f64
v2f64 = 114, // 2 x f64
v4f64 = 115, // 4 x f64
v8f64 = 116, // 8 x f64
nxv2f16 = 115, // n x 2 x f16
nxv4f16 = 116, // n x 4 x f16
nxv8f16 = 117, // n x 8 x f16
nxv1f32 = 118, // n x 1 x f32
nxv2f32 = 119, // n x 2 x f32
nxv4f32 = 120, // n x 4 x f32
nxv8f32 = 121, // n x 8 x f32
nxv16f32 = 122, // n x 16 x f32
nxv1f64 = 123, // n x 1 x f64
nxv2f64 = 124, // n x 2 x f64
nxv4f64 = 125, // n x 4 x f64
nxv8f64 = 126, // n x 8 x f64
nxv2f16 = 117, // n x 2 x f16
nxv4f16 = 118, // n x 4 x f16
nxv8f16 = 119, // n x 8 x f16
nxv1f32 = 120, // n x 1 x f32
nxv2f32 = 121, // n x 2 x f32
nxv4f32 = 122, // n x 4 x f32
nxv8f32 = 123, // n x 8 x f32
nxv16f32 = 124, // n x 16 x f32
nxv1f64 = 125, // n x 1 x f64
nxv2f64 = 126, // n x 2 x f64
nxv4f64 = 127, // n x 4 x f64
nxv8f64 = 128, // n x 8 x f64
FIRST_FP_VECTOR_VALUETYPE = v2f16,
LAST_FP_VECTOR_VALUETYPE = nxv8f64,
@ -199,20 +201,20 @@ namespace llvm {
FIRST_VECTOR_VALUETYPE = v1i1,
LAST_VECTOR_VALUETYPE = nxv8f64,
x86mmx = 127, // This is an X86 MMX value
x86mmx = 129, // This is an X86 MMX value
Glue = 128, // This glues nodes together during pre-RA sched
Glue = 130, // This glues nodes together during pre-RA sched
isVoid = 129, // This has no value
isVoid = 131, // This has no value
Untyped = 130, // This value takes a register, but has
Untyped = 132, // This value takes a register, but has
// unspecified type. The register class
// will be determined by the opcode.
exnref = 131, // WebAssembly's exnref type
exnref = 133, // WebAssembly's exnref type
FIRST_VALUETYPE = 1, // This is always the beginning of the list.
LAST_VALUETYPE = 132, // This always remains at the end of the list.
LAST_VALUETYPE = 134, // This always remains at the end of the list.
// This is the current maximum for LAST_VALUETYPE.
// MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
@ -341,17 +343,18 @@ namespace llvm {
/// Return true if this is a 256-bit vector type.
bool is256BitVector() const {
return (SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64 ||
SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 ||
SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64);
return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v8f32 ||
SimpleTy == MVT::v4f64 || SimpleTy == MVT::v32i8 ||
SimpleTy == MVT::v16i16 || SimpleTy == MVT::v8i32 ||
SimpleTy == MVT::v4i64);
}
/// Return true if this is a 512-bit vector type.
bool is512BitVector() const {
return (SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64 ||
SimpleTy == MVT::v512i1 || SimpleTy == MVT::v64i8 ||
SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 ||
SimpleTy == MVT::v8i64);
return (SimpleTy == MVT::v32f16 || SimpleTy == MVT::v16f32 ||
SimpleTy == MVT::v8f64 || SimpleTy == MVT::v512i1 ||
SimpleTy == MVT::v64i8 || SimpleTy == MVT::v32i16 ||
SimpleTy == MVT::v16i32 || SimpleTy == MVT::v8i64);
}
/// Return true if this is a 1024-bit vector type.
@ -483,6 +486,8 @@ namespace llvm {
case v3f16:
case v4f16:
case v8f16:
case v16f16:
case v32f16:
case nxv2f16:
case nxv4f16:
case nxv8f16: return f16;
@ -546,6 +551,7 @@ namespace llvm {
case v32i16:
case v32i32:
case v32i64:
case v32f16:
case v32f32:
case nxv32i1:
case nxv32i8:
@ -557,6 +563,7 @@ namespace llvm {
case v16i16:
case v16i32:
case v16i64:
case v16f16:
case v16f32:
case nxv16i1:
case nxv16i8:
@ -741,6 +748,7 @@ namespace llvm {
case v16i16:
case v8i32:
case v4i64:
case v16f16:
case v8f32:
case v4f64:
case nxv32i8:
@ -754,6 +762,7 @@ namespace llvm {
case v32i16:
case v16i32:
case v8i64:
case v32f16:
case v16f32:
case v8f64:
case nxv32i16:
@ -931,6 +940,8 @@ namespace llvm {
if (NumElements == 3) return MVT::v3f16;
if (NumElements == 4) return MVT::v4f16;
if (NumElements == 8) return MVT::v8f16;
if (NumElements == 16) return MVT::v16f16;
if (NumElements == 32) return MVT::v32f16;
break;
case MVT::f32:
if (NumElements == 1) return MVT::v1f32;

View File

@ -330,6 +330,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v3f16: return VectorType::get(Type::getHalfTy(Context), 3);
case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4);
case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8);
case MVT::v16f16: return VectorType::get(Type::getHalfTy(Context), 16);
case MVT::v32f16: return VectorType::get(Type::getHalfTy(Context), 32);
case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1);
case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
case MVT::v3f32: return VectorType::get(Type::getFloatTy(Context), 3);

View File

@ -157,6 +157,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
@ -219,6 +221,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand);
setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand);
setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand);
setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);