forked from OSchip/llvm-project
[MVT] Add v16f16 and v32f16 vectors.
I might look at improving PR43065 which will require being able to mark a 256 and 512 bit vector of f16 as Legal. Differential Revision: https://reviews.llvm.org/D66515 llvm-svn: 369565
This commit is contained in:
parent
8d5fbecf8b
commit
3f59bfd5be
|
@ -126,43 +126,45 @@ def v2f16 : ValueType<32 , 93>; // 2 x f16 vector value
|
|||
def v3f16 : ValueType<48 , 94>; // 3 x f16 vector value
|
||||
def v4f16 : ValueType<64 , 95>; // 4 x f16 vector value
|
||||
def v8f16 : ValueType<128, 96>; // 8 x f16 vector value
|
||||
def v1f32 : ValueType<32 , 97>; // 1 x f32 vector value
|
||||
def v2f32 : ValueType<64 , 98>; // 2 x f32 vector value
|
||||
def v3f32 : ValueType<96 , 99>; // 3 x f32 vector value
|
||||
def v4f32 : ValueType<128, 100>; // 4 x f32 vector value
|
||||
def v5f32 : ValueType<160, 101>; // 5 x f32 vector value
|
||||
def v8f32 : ValueType<256, 102>; // 8 x f32 vector value
|
||||
def v16f32 : ValueType<512, 103>; // 16 x f32 vector value
|
||||
def v32f32 : ValueType<1024, 104>; // 32 x f32 vector value
|
||||
def v64f32 : ValueType<2048, 105>; // 64 x f32 vector value
|
||||
def v128f32 : ValueType<4096, 106>; // 128 x f32 vector value
|
||||
def v256f32 : ValueType<8182, 107>; // 256 x f32 vector value
|
||||
def v512f32 : ValueType<16384, 108>; // 512 x f32 vector value
|
||||
def v1024f32 : ValueType<32768, 109>; // 1024 x f32 vector value
|
||||
def v2048f32 : ValueType<65536, 110>; // 2048 x f32 vector value
|
||||
def v1f64 : ValueType<64, 111>; // 1 x f64 vector value
|
||||
def v2f64 : ValueType<128, 112>; // 2 x f64 vector value
|
||||
def v4f64 : ValueType<256, 113>; // 4 x f64 vector value
|
||||
def v8f64 : ValueType<512, 114>; // 8 x f64 vector value
|
||||
def v16f16 : ValueType<256, 97>; // 8 x f16 vector value
|
||||
def v32f16 : ValueType<512, 98>; // 8 x f16 vector value
|
||||
def v1f32 : ValueType<32 , 99>; // 1 x f32 vector value
|
||||
def v2f32 : ValueType<64 , 100>; // 2 x f32 vector value
|
||||
def v3f32 : ValueType<96 , 101>; // 3 x f32 vector value
|
||||
def v4f32 : ValueType<128, 102>; // 4 x f32 vector value
|
||||
def v5f32 : ValueType<160, 103>; // 5 x f32 vector value
|
||||
def v8f32 : ValueType<256, 104>; // 8 x f32 vector value
|
||||
def v16f32 : ValueType<512, 105>; // 16 x f32 vector value
|
||||
def v32f32 : ValueType<1024, 106>; // 32 x f32 vector value
|
||||
def v64f32 : ValueType<2048, 107>; // 64 x f32 vector value
|
||||
def v128f32 : ValueType<4096, 108>; // 128 x f32 vector value
|
||||
def v256f32 : ValueType<8182, 109>; // 256 x f32 vector value
|
||||
def v512f32 : ValueType<16384, 110>; // 512 x f32 vector value
|
||||
def v1024f32 : ValueType<32768, 111>; // 1024 x f32 vector value
|
||||
def v2048f32 : ValueType<65536, 112>; // 2048 x f32 vector value
|
||||
def v1f64 : ValueType<64, 113>; // 1 x f64 vector value
|
||||
def v2f64 : ValueType<128, 114>; // 2 x f64 vector value
|
||||
def v4f64 : ValueType<256, 115>; // 4 x f64 vector value
|
||||
def v8f64 : ValueType<512, 116>; // 8 x f64 vector value
|
||||
|
||||
def nxv2f16 : ValueType<32 , 115>; // n x 2 x f16 vector value
|
||||
def nxv4f16 : ValueType<64 , 116>; // n x 4 x f16 vector value
|
||||
def nxv8f16 : ValueType<128, 117>; // n x 8 x f16 vector value
|
||||
def nxv1f32 : ValueType<32 , 118>; // n x 1 x f32 vector value
|
||||
def nxv2f32 : ValueType<64 , 119>; // n x 2 x f32 vector value
|
||||
def nxv4f32 : ValueType<128, 120>; // n x 4 x f32 vector value
|
||||
def nxv8f32 : ValueType<256, 121>; // n x 8 x f32 vector value
|
||||
def nxv16f32 : ValueType<512, 122>; // n x 16 x f32 vector value
|
||||
def nxv1f64 : ValueType<64, 123>; // n x 1 x f64 vector value
|
||||
def nxv2f64 : ValueType<128, 124>; // n x 2 x f64 vector value
|
||||
def nxv4f64 : ValueType<256, 125>; // n x 4 x f64 vector value
|
||||
def nxv8f64 : ValueType<512, 126>; // n x 8 x f64 vector value
|
||||
def nxv2f16 : ValueType<32 , 117>; // n x 2 x f16 vector value
|
||||
def nxv4f16 : ValueType<64 , 118>; // n x 4 x f16 vector value
|
||||
def nxv8f16 : ValueType<128, 119>; // n x 8 x f16 vector value
|
||||
def nxv1f32 : ValueType<32 , 120>; // n x 1 x f32 vector value
|
||||
def nxv2f32 : ValueType<64 , 121>; // n x 2 x f32 vector value
|
||||
def nxv4f32 : ValueType<128, 122>; // n x 4 x f32 vector value
|
||||
def nxv8f32 : ValueType<256, 123>; // n x 8 x f32 vector value
|
||||
def nxv16f32 : ValueType<512, 124>; // n x 16 x f32 vector value
|
||||
def nxv1f64 : ValueType<64, 125>; // n x 1 x f64 vector value
|
||||
def nxv2f64 : ValueType<128, 126>; // n x 2 x f64 vector value
|
||||
def nxv4f64 : ValueType<256, 127>; // n x 4 x f64 vector value
|
||||
def nxv8f64 : ValueType<512, 128>; // n x 8 x f64 vector value
|
||||
|
||||
def x86mmx : ValueType<64 , 127>; // X86 MMX value
|
||||
def FlagVT : ValueType<0 , 128>; // Pre-RA sched glue
|
||||
def isVoid : ValueType<0 , 129>; // Produces no value
|
||||
def untyped: ValueType<8 , 130>; // Produces an untyped value
|
||||
def exnref: ValueType<0, 131>; // WebAssembly's exnref type
|
||||
def x86mmx : ValueType<64 , 129>; // X86 MMX value
|
||||
def FlagVT : ValueType<0 , 130>; // Pre-RA sched glue
|
||||
def isVoid : ValueType<0 , 131>; // Produces no value
|
||||
def untyped: ValueType<8 , 132>; // Produces an untyped value
|
||||
def exnref: ValueType<0, 133>; // WebAssembly's exnref type
|
||||
def token : ValueType<0 , 248>; // TokenTy
|
||||
def MetadataVT: ValueType<0, 249>; // Metadata
|
||||
|
||||
|
|
|
@ -158,37 +158,39 @@ namespace llvm {
|
|||
v3f16 = 94, // 3 x f16
|
||||
v4f16 = 95, // 4 x f16
|
||||
v8f16 = 96, // 8 x f16
|
||||
v1f32 = 97, // 1 x f32
|
||||
v2f32 = 98, // 2 x f32
|
||||
v3f32 = 99, // 3 x f32
|
||||
v4f32 = 100, // 4 x f32
|
||||
v5f32 = 101, // 5 x f32
|
||||
v8f32 = 102, // 8 x f32
|
||||
v16f32 = 103, // 16 x f32
|
||||
v32f32 = 104, // 32 x f32
|
||||
v64f32 = 105, // 64 x f32
|
||||
v128f32 = 106, // 128 x f32
|
||||
v256f32 = 107, // 256 x f32
|
||||
v512f32 = 108, // 512 x f32
|
||||
v1024f32 = 109, // 1024 x f32
|
||||
v2048f32 = 110, // 2048 x f32
|
||||
v1f64 = 111, // 1 x f64
|
||||
v2f64 = 112, // 2 x f64
|
||||
v4f64 = 113, // 4 x f64
|
||||
v8f64 = 114, // 8 x f64
|
||||
v16f16 = 97, // 16 x f16
|
||||
v32f16 = 98, // 32 x f16
|
||||
v1f32 = 99, // 1 x f32
|
||||
v2f32 = 100, // 2 x f32
|
||||
v3f32 = 101, // 3 x f32
|
||||
v4f32 = 102, // 4 x f32
|
||||
v5f32 = 103, // 5 x f32
|
||||
v8f32 = 104, // 8 x f32
|
||||
v16f32 = 105, // 16 x f32
|
||||
v32f32 = 106, // 32 x f32
|
||||
v64f32 = 107, // 64 x f32
|
||||
v128f32 = 108, // 128 x f32
|
||||
v256f32 = 109, // 256 x f32
|
||||
v512f32 = 110, // 512 x f32
|
||||
v1024f32 = 111, // 1024 x f32
|
||||
v2048f32 = 112, // 2048 x f32
|
||||
v1f64 = 113, // 1 x f64
|
||||
v2f64 = 114, // 2 x f64
|
||||
v4f64 = 115, // 4 x f64
|
||||
v8f64 = 116, // 8 x f64
|
||||
|
||||
nxv2f16 = 115, // n x 2 x f16
|
||||
nxv4f16 = 116, // n x 4 x f16
|
||||
nxv8f16 = 117, // n x 8 x f16
|
||||
nxv1f32 = 118, // n x 1 x f32
|
||||
nxv2f32 = 119, // n x 2 x f32
|
||||
nxv4f32 = 120, // n x 4 x f32
|
||||
nxv8f32 = 121, // n x 8 x f32
|
||||
nxv16f32 = 122, // n x 16 x f32
|
||||
nxv1f64 = 123, // n x 1 x f64
|
||||
nxv2f64 = 124, // n x 2 x f64
|
||||
nxv4f64 = 125, // n x 4 x f64
|
||||
nxv8f64 = 126, // n x 8 x f64
|
||||
nxv2f16 = 117, // n x 2 x f16
|
||||
nxv4f16 = 118, // n x 4 x f16
|
||||
nxv8f16 = 119, // n x 8 x f16
|
||||
nxv1f32 = 120, // n x 1 x f32
|
||||
nxv2f32 = 121, // n x 2 x f32
|
||||
nxv4f32 = 122, // n x 4 x f32
|
||||
nxv8f32 = 123, // n x 8 x f32
|
||||
nxv16f32 = 124, // n x 16 x f32
|
||||
nxv1f64 = 125, // n x 1 x f64
|
||||
nxv2f64 = 126, // n x 2 x f64
|
||||
nxv4f64 = 127, // n x 4 x f64
|
||||
nxv8f64 = 128, // n x 8 x f64
|
||||
|
||||
FIRST_FP_VECTOR_VALUETYPE = v2f16,
|
||||
LAST_FP_VECTOR_VALUETYPE = nxv8f64,
|
||||
|
@ -199,20 +201,20 @@ namespace llvm {
|
|||
FIRST_VECTOR_VALUETYPE = v1i1,
|
||||
LAST_VECTOR_VALUETYPE = nxv8f64,
|
||||
|
||||
x86mmx = 127, // This is an X86 MMX value
|
||||
x86mmx = 129, // This is an X86 MMX value
|
||||
|
||||
Glue = 128, // This glues nodes together during pre-RA sched
|
||||
Glue = 130, // This glues nodes together during pre-RA sched
|
||||
|
||||
isVoid = 129, // This has no value
|
||||
isVoid = 131, // This has no value
|
||||
|
||||
Untyped = 130, // This value takes a register, but has
|
||||
Untyped = 132, // This value takes a register, but has
|
||||
// unspecified type. The register class
|
||||
// will be determined by the opcode.
|
||||
|
||||
exnref = 131, // WebAssembly's exnref type
|
||||
exnref = 133, // WebAssembly's exnref type
|
||||
|
||||
FIRST_VALUETYPE = 1, // This is always the beginning of the list.
|
||||
LAST_VALUETYPE = 132, // This always remains at the end of the list.
|
||||
LAST_VALUETYPE = 134, // This always remains at the end of the list.
|
||||
|
||||
// This is the current maximum for LAST_VALUETYPE.
|
||||
// MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
|
||||
|
@ -341,17 +343,18 @@ namespace llvm {
|
|||
|
||||
/// Return true if this is a 256-bit vector type.
|
||||
bool is256BitVector() const {
|
||||
return (SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64 ||
|
||||
SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 ||
|
||||
SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64);
|
||||
return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v8f32 ||
|
||||
SimpleTy == MVT::v4f64 || SimpleTy == MVT::v32i8 ||
|
||||
SimpleTy == MVT::v16i16 || SimpleTy == MVT::v8i32 ||
|
||||
SimpleTy == MVT::v4i64);
|
||||
}
|
||||
|
||||
/// Return true if this is a 512-bit vector type.
|
||||
bool is512BitVector() const {
|
||||
return (SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64 ||
|
||||
SimpleTy == MVT::v512i1 || SimpleTy == MVT::v64i8 ||
|
||||
SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 ||
|
||||
SimpleTy == MVT::v8i64);
|
||||
return (SimpleTy == MVT::v32f16 || SimpleTy == MVT::v16f32 ||
|
||||
SimpleTy == MVT::v8f64 || SimpleTy == MVT::v512i1 ||
|
||||
SimpleTy == MVT::v64i8 || SimpleTy == MVT::v32i16 ||
|
||||
SimpleTy == MVT::v16i32 || SimpleTy == MVT::v8i64);
|
||||
}
|
||||
|
||||
/// Return true if this is a 1024-bit vector type.
|
||||
|
@ -483,6 +486,8 @@ namespace llvm {
|
|||
case v3f16:
|
||||
case v4f16:
|
||||
case v8f16:
|
||||
case v16f16:
|
||||
case v32f16:
|
||||
case nxv2f16:
|
||||
case nxv4f16:
|
||||
case nxv8f16: return f16;
|
||||
|
@ -546,6 +551,7 @@ namespace llvm {
|
|||
case v32i16:
|
||||
case v32i32:
|
||||
case v32i64:
|
||||
case v32f16:
|
||||
case v32f32:
|
||||
case nxv32i1:
|
||||
case nxv32i8:
|
||||
|
@ -557,6 +563,7 @@ namespace llvm {
|
|||
case v16i16:
|
||||
case v16i32:
|
||||
case v16i64:
|
||||
case v16f16:
|
||||
case v16f32:
|
||||
case nxv16i1:
|
||||
case nxv16i8:
|
||||
|
@ -741,6 +748,7 @@ namespace llvm {
|
|||
case v16i16:
|
||||
case v8i32:
|
||||
case v4i64:
|
||||
case v16f16:
|
||||
case v8f32:
|
||||
case v4f64:
|
||||
case nxv32i8:
|
||||
|
@ -754,6 +762,7 @@ namespace llvm {
|
|||
case v32i16:
|
||||
case v16i32:
|
||||
case v8i64:
|
||||
case v32f16:
|
||||
case v16f32:
|
||||
case v8f64:
|
||||
case nxv32i16:
|
||||
|
@ -931,6 +940,8 @@ namespace llvm {
|
|||
if (NumElements == 3) return MVT::v3f16;
|
||||
if (NumElements == 4) return MVT::v4f16;
|
||||
if (NumElements == 8) return MVT::v8f16;
|
||||
if (NumElements == 16) return MVT::v16f16;
|
||||
if (NumElements == 32) return MVT::v32f16;
|
||||
break;
|
||||
case MVT::f32:
|
||||
if (NumElements == 1) return MVT::v1f32;
|
||||
|
|
|
@ -330,6 +330,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
|
|||
case MVT::v3f16: return VectorType::get(Type::getHalfTy(Context), 3);
|
||||
case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4);
|
||||
case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8);
|
||||
case MVT::v16f16: return VectorType::get(Type::getHalfTy(Context), 16);
|
||||
case MVT::v32f16: return VectorType::get(Type::getHalfTy(Context), 32);
|
||||
case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1);
|
||||
case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
|
||||
case MVT::v3f32: return VectorType::get(Type::getFloatTy(Context), 3);
|
||||
|
|
|
@ -157,6 +157,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand);
|
||||
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
|
||||
|
@ -219,6 +221,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
|
|||
setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand);
|
||||
setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
|
||||
setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand);
|
||||
setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand);
|
||||
setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand);
|
||||
|
||||
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
|
||||
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
|
||||
|
|
Loading…
Reference in New Issue