MVT: Add v3i16/v3f16 vectors

AMDGPU has some buffer intrinsics which theoretically could use
this. Some of the generated tables include the 3 and 4 element vector
versions of these rounded to 64-bits, which is ambiguous. Add these to
help the table disambiguate these.

Assertion change is for the path odd sized vectors now take for R600.
v3i16 is widened to v4i16, which then needs to be promoted to v4i32.

llvm-svn: 369038
This commit is contained in:
Matt Arsenault 2019-08-15 18:58:25 +00:00
parent d202899431
commit 1f2b727298
12 changed files with 277 additions and 321 deletions

View File

@ -55,112 +55,114 @@ def v256i8 : ValueType<2048,32>; //256 x i8 vector value
def v1i16 : ValueType<16 , 33>; // 1 x i16 vector value
def v2i16 : ValueType<32 , 34>; // 2 x i16 vector value
def v4i16 : ValueType<64 , 35>; // 4 x i16 vector value
def v8i16 : ValueType<128, 36>; // 8 x i16 vector value
def v16i16 : ValueType<256, 37>; // 16 x i16 vector value
def v32i16 : ValueType<512, 38>; // 32 x i16 vector value
def v64i16 : ValueType<1024,39>; // 64 x i16 vector value
def v128i16: ValueType<2048,40>; //128 x i16 vector value
def v3i16 : ValueType<48 , 35>; // 3 x i16 vector value
def v4i16 : ValueType<64 , 36>; // 4 x i16 vector value
def v8i16 : ValueType<128, 37>; // 8 x i16 vector value
def v16i16 : ValueType<256, 38>; // 16 x i16 vector value
def v32i16 : ValueType<512, 39>; // 32 x i16 vector value
def v64i16 : ValueType<1024,40>; // 64 x i16 vector value
def v128i16: ValueType<2048,41>; //128 x i16 vector value
def v1i32 : ValueType<32 , 41>; // 1 x i32 vector value
def v2i32 : ValueType<64 , 42>; // 2 x i32 vector value
def v3i32 : ValueType<96 , 43>; // 3 x i32 vector value
def v4i32 : ValueType<128, 44>; // 4 x i32 vector value
def v5i32 : ValueType<160, 45>; // 5 x i32 vector value
def v8i32 : ValueType<256, 46>; // 8 x i32 vector value
def v16i32 : ValueType<512, 47>; // 16 x i32 vector value
def v32i32 : ValueType<1024,48>; // 32 x i32 vector value
def v64i32 : ValueType<2048,49>; // 64 x i32 vector value
def v128i32 : ValueType<4096,50>; // 128 x i32 vector value
def v256i32 : ValueType<8182,51>; // 256 x i32 vector value
def v512i32 : ValueType<16384,52>; // 512 x i32 vector value
def v1024i32 : ValueType<32768,53>; // 1024 x i32 vector value
def v2048i32 : ValueType<65536,54>; // 2048 x i32 vector value
def v1i32 : ValueType<32 , 42>; // 1 x i32 vector value
def v2i32 : ValueType<64 , 43>; // 2 x i32 vector value
def v3i32 : ValueType<96 , 44>; // 3 x i32 vector value
def v4i32 : ValueType<128, 45>; // 4 x i32 vector value
def v5i32 : ValueType<160, 46>; // 5 x i32 vector value
def v8i32 : ValueType<256, 47>; // 8 x i32 vector value
def v16i32 : ValueType<512, 48>; // 16 x i32 vector value
def v32i32 : ValueType<1024,49>; // 32 x i32 vector value
def v64i32 : ValueType<2048,50>; // 64 x i32 vector value
def v128i32 : ValueType<4096,51>; // 128 x i32 vector value
def v256i32 : ValueType<8182,52>; // 256 x i32 vector value
def v512i32 : ValueType<16384,53>; // 512 x i32 vector value
def v1024i32 : ValueType<32768,54>; // 1024 x i32 vector value
def v2048i32 : ValueType<65536,55>; // 2048 x i32 vector value
def v1i64 : ValueType<64 , 55>; // 1 x i64 vector value
def v2i64 : ValueType<128, 56>; // 2 x i64 vector value
def v4i64 : ValueType<256, 57>; // 4 x i64 vector value
def v8i64 : ValueType<512, 58>; // 8 x i64 vector value
def v16i64 : ValueType<1024,59>; // 16 x i64 vector value
def v32i64 : ValueType<2048,60>; // 32 x i64 vector value
def v1i64 : ValueType<64 , 56>; // 1 x i64 vector value
def v2i64 : ValueType<128, 57>; // 2 x i64 vector value
def v4i64 : ValueType<256, 58>; // 4 x i64 vector value
def v8i64 : ValueType<512, 59>; // 8 x i64 vector value
def v16i64 : ValueType<1024,60>; // 16 x i64 vector value
def v32i64 : ValueType<2048,61>; // 32 x i64 vector value
def v1i128 : ValueType<128, 61>; // 1 x i128 vector value
def v1i128 : ValueType<128, 62>; // 1 x i128 vector value
def nxv1i1 : ValueType<1, 62>; // n x 1 x i1 vector value
def nxv2i1 : ValueType<2, 63>; // n x 2 x i1 vector value
def nxv4i1 : ValueType<4, 64>; // n x 4 x i1 vector value
def nxv8i1 : ValueType<8, 65>; // n x 8 x i1 vector value
def nxv16i1 : ValueType<16, 66>; // n x 16 x i1 vector value
def nxv32i1 : ValueType<32, 67>; // n x 32 x i1 vector value
def nxv1i1 : ValueType<1, 63>; // n x 1 x i1 vector value
def nxv2i1 : ValueType<2, 64>; // n x 2 x i1 vector value
def nxv4i1 : ValueType<4, 65>; // n x 4 x i1 vector value
def nxv8i1 : ValueType<8, 66>; // n x 8 x i1 vector value
def nxv16i1 : ValueType<16, 67>; // n x 16 x i1 vector value
def nxv32i1 : ValueType<32, 68>; // n x 32 x i1 vector value
def nxv1i8 : ValueType<8, 68>; // n x 1 x i8 vector value
def nxv2i8 : ValueType<16, 69>; // n x 2 x i8 vector value
def nxv4i8 : ValueType<32, 70>; // n x 4 x i8 vector value
def nxv8i8 : ValueType<64, 71>; // n x 8 x i8 vector value
def nxv16i8 : ValueType<128, 72>; // n x 16 x i8 vector value
def nxv32i8 : ValueType<256, 73>; // n x 32 x i8 vector value
def nxv1i8 : ValueType<8, 69>; // n x 1 x i8 vector value
def nxv2i8 : ValueType<16, 70>; // n x 2 x i8 vector value
def nxv4i8 : ValueType<32, 71>; // n x 4 x i8 vector value
def nxv8i8 : ValueType<64, 72>; // n x 8 x i8 vector value
def nxv16i8 : ValueType<128, 73>; // n x 16 x i8 vector value
def nxv32i8 : ValueType<256, 74>; // n x 32 x i8 vector value
def nxv1i16 : ValueType<16, 74>; // n x 1 x i16 vector value
def nxv2i16 : ValueType<32, 75>; // n x 2 x i16 vector value
def nxv4i16 : ValueType<64, 76>; // n x 4 x i16 vector value
def nxv8i16 : ValueType<128, 77>; // n x 8 x i16 vector value
def nxv16i16: ValueType<256, 78>; // n x 16 x i16 vector value
def nxv32i16: ValueType<512, 79>; // n x 32 x i16 vector value
def nxv1i16 : ValueType<16, 75>; // n x 1 x i16 vector value
def nxv2i16 : ValueType<32, 76>; // n x 2 x i16 vector value
def nxv4i16 : ValueType<64, 77>; // n x 4 x i16 vector value
def nxv8i16 : ValueType<128, 78>; // n x 8 x i16 vector value
def nxv16i16: ValueType<256, 79>; // n x 16 x i16 vector value
def nxv32i16: ValueType<512, 80>; // n x 32 x i16 vector value
def nxv1i32 : ValueType<32, 80>; // n x 1 x i32 vector value
def nxv2i32 : ValueType<64, 81>; // n x 2 x i32 vector value
def nxv4i32 : ValueType<128, 82>; // n x 4 x i32 vector value
def nxv8i32 : ValueType<256, 83>; // n x 8 x i32 vector value
def nxv16i32: ValueType<512, 84>; // n x 16 x i32 vector value
def nxv32i32: ValueType<1024,85>; // n x 32 x i32 vector value
def nxv1i32 : ValueType<32, 81>; // n x 1 x i32 vector value
def nxv2i32 : ValueType<64, 82>; // n x 2 x i32 vector value
def nxv4i32 : ValueType<128, 83>; // n x 4 x i32 vector value
def nxv8i32 : ValueType<256, 84>; // n x 8 x i32 vector value
def nxv16i32: ValueType<512, 85>; // n x 16 x i32 vector value
def nxv32i32: ValueType<1024,86>; // n x 32 x i32 vector value
def nxv1i64 : ValueType<64, 86>; // n x 1 x i64 vector value
def nxv2i64 : ValueType<128, 87>; // n x 2 x i64 vector value
def nxv4i64 : ValueType<256, 88>; // n x 4 x i64 vector value
def nxv8i64 : ValueType<512, 89>; // n x 8 x i64 vector value
def nxv16i64: ValueType<1024,90>; // n x 16 x i64 vector value
def nxv32i64: ValueType<2048,91>; // n x 32 x i64 vector value
def nxv1i64 : ValueType<64, 87>; // n x 1 x i64 vector value
def nxv2i64 : ValueType<128, 88>; // n x 2 x i64 vector value
def nxv4i64 : ValueType<256, 89>; // n x 4 x i64 vector value
def nxv8i64 : ValueType<512, 90>; // n x 8 x i64 vector value
def nxv16i64: ValueType<1024,91>; // n x 16 x i64 vector value
def nxv32i64: ValueType<2048,92>; // n x 32 x i64 vector value
def v2f16 : ValueType<32 , 92>; // 2 x f16 vector value
def v4f16 : ValueType<64 , 93>; // 4 x f16 vector value
def v8f16 : ValueType<128, 94>; // 8 x f16 vector value
def v1f32 : ValueType<32 , 95>; // 1 x f32 vector value
def v2f32 : ValueType<64 , 96>; // 2 x f32 vector value
def v3f32 : ValueType<96 , 97>; // 3 x f32 vector value
def v4f32 : ValueType<128, 98>; // 4 x f32 vector value
def v5f32 : ValueType<160, 99>; // 5 x f32 vector value
def v8f32 : ValueType<256, 100>; // 8 x f32 vector value
def v16f32 : ValueType<512, 101>; // 16 x f32 vector value
def v32f32 : ValueType<1024, 102>; // 32 x f32 vector value
def v64f32 : ValueType<2048, 103>; // 64 x f32 vector value
def v128f32 : ValueType<4096, 104>; // 128 x f32 vector value
def v256f32 : ValueType<8182, 105>; // 256 x f32 vector value
def v512f32 : ValueType<16384, 106>; // 512 x f32 vector value
def v1024f32 : ValueType<32768, 107>; // 1024 x f32 vector value
def v2048f32 : ValueType<65536, 108>; // 2048 x f32 vector value
def v1f64 : ValueType<64, 109>; // 1 x f64 vector value
def v2f64 : ValueType<128, 110>; // 2 x f64 vector value
def v4f64 : ValueType<256, 111>; // 4 x f64 vector value
def v8f64 : ValueType<512, 112>; // 8 x f64 vector value
def v2f16 : ValueType<32 , 93>; // 2 x f16 vector value
def v3f16 : ValueType<48 , 94>; // 3 x f16 vector value
def v4f16 : ValueType<64 , 95>; // 4 x f16 vector value
def v8f16 : ValueType<128, 96>; // 8 x f16 vector value
def v1f32 : ValueType<32 , 97>; // 1 x f32 vector value
def v2f32 : ValueType<64 , 98>; // 2 x f32 vector value
def v3f32 : ValueType<96 , 99>; // 3 x f32 vector value
def v4f32 : ValueType<128, 100>; // 4 x f32 vector value
def v5f32 : ValueType<160, 101>; // 5 x f32 vector value
def v8f32 : ValueType<256, 102>; // 8 x f32 vector value
def v16f32 : ValueType<512, 103>; // 16 x f32 vector value
def v32f32 : ValueType<1024, 104>; // 32 x f32 vector value
def v64f32 : ValueType<2048, 105>; // 64 x f32 vector value
def v128f32 : ValueType<4096, 106>; // 128 x f32 vector value
def v256f32 : ValueType<8182, 107>; // 256 x f32 vector value
def v512f32 : ValueType<16384, 108>; // 512 x f32 vector value
def v1024f32 : ValueType<32768, 109>; // 1024 x f32 vector value
def v2048f32 : ValueType<65536, 110>; // 2048 x f32 vector value
def v1f64 : ValueType<64, 111>; // 1 x f64 vector value
def v2f64 : ValueType<128, 112>; // 2 x f64 vector value
def v4f64 : ValueType<256, 113>; // 4 x f64 vector value
def v8f64 : ValueType<512, 114>; // 8 x f64 vector value
def nxv2f16 : ValueType<32 , 113>; // n x 2 x f16 vector value
def nxv4f16 : ValueType<64 , 114>; // n x 4 x f16 vector value
def nxv8f16 : ValueType<128, 115>; // n x 8 x f16 vector value
def nxv1f32 : ValueType<32 , 116>; // n x 1 x f32 vector value
def nxv2f32 : ValueType<64 , 117>; // n x 2 x f32 vector value
def nxv4f32 : ValueType<128, 118>; // n x 4 x f32 vector value
def nxv8f32 : ValueType<256, 119>; // n x 8 x f32 vector value
def nxv16f32 : ValueType<512, 120>; // n x 16 x f32 vector value
def nxv1f64 : ValueType<64, 121>; // n x 1 x f64 vector value
def nxv2f64 : ValueType<128, 122>; // n x 2 x f64 vector value
def nxv4f64 : ValueType<256, 123>; // n x 4 x f64 vector value
def nxv8f64 : ValueType<512, 124>; // n x 8 x f64 vector value
def nxv2f16 : ValueType<32 , 115>; // n x 2 x f16 vector value
def nxv4f16 : ValueType<64 , 116>; // n x 4 x f16 vector value
def nxv8f16 : ValueType<128, 117>; // n x 8 x f16 vector value
def nxv1f32 : ValueType<32 , 118>; // n x 1 x f32 vector value
def nxv2f32 : ValueType<64 , 119>; // n x 2 x f32 vector value
def nxv4f32 : ValueType<128, 120>; // n x 4 x f32 vector value
def nxv8f32 : ValueType<256, 121>; // n x 8 x f32 vector value
def nxv16f32 : ValueType<512, 122>; // n x 16 x f32 vector value
def nxv1f64 : ValueType<64, 123>; // n x 1 x f64 vector value
def nxv2f64 : ValueType<128, 124>; // n x 2 x f64 vector value
def nxv4f64 : ValueType<256, 125>; // n x 4 x f64 vector value
def nxv8f64 : ValueType<512, 126>; // n x 8 x f64 vector value
def x86mmx : ValueType<64 , 125>; // X86 MMX value
def FlagVT : ValueType<0 , 126>; // Pre-RA sched glue
def isVoid : ValueType<0 , 127>; // Produces no value
def untyped: ValueType<8 , 128>; // Produces an untyped value
def exnref: ValueType<0, 129>; // WebAssembly's exnref type
def x86mmx : ValueType<64 , 127>; // X86 MMX value
def FlagVT : ValueType<0 , 128>; // Pre-RA sched glue
def isVoid : ValueType<0 , 129>; // Produces no value
def untyped: ValueType<8 , 130>; // Produces an untyped value
def exnref: ValueType<0, 131>; // WebAssembly's exnref type
def token : ValueType<0 , 248>; // TokenTy
def MetadataVT: ValueType<0, 249>; // Metadata

View File

@ -80,72 +80,73 @@ namespace llvm {
v1i16 = 33, // 1 x i16
v2i16 = 34, // 2 x i16
v4i16 = 35, // 4 x i16
v8i16 = 36, // 8 x i16
v16i16 = 37, // 16 x i16
v32i16 = 38, // 32 x i16
v64i16 = 39, // 64 x i16
v128i16 = 40, //128 x i16
v3i16 = 35, // 3 x i16
v4i16 = 36, // 4 x i16
v8i16 = 37, // 8 x i16
v16i16 = 38, // 16 x i16
v32i16 = 39, // 32 x i16
v64i16 = 40, // 64 x i16
v128i16 = 41, //128 x i16
v1i32 = 41, // 1 x i32
v2i32 = 42, // 2 x i32
v3i32 = 43, // 3 x i32
v4i32 = 44, // 4 x i32
v5i32 = 45, // 5 x i32
v8i32 = 46, // 8 x i32
v16i32 = 47, // 16 x i32
v32i32 = 48, // 32 x i32
v64i32 = 49, // 64 x i32
v128i32 = 50, // 128 x i32
v256i32 = 51, // 256 x i32
v512i32 = 52, // 512 x i32
v1024i32 = 53, // 1024 x i32
v2048i32 = 54, // 2048 x i32
v1i32 = 42, // 1 x i32
v2i32 = 43, // 2 x i32
v3i32 = 44, // 3 x i32
v4i32 = 45, // 4 x i32
v5i32 = 46, // 5 x i32
v8i32 = 47, // 8 x i32
v16i32 = 48, // 16 x i32
v32i32 = 49, // 32 x i32
v64i32 = 50, // 64 x i32
v128i32 = 51, // 128 x i32
v256i32 = 52, // 256 x i32
v512i32 = 53, // 512 x i32
v1024i32 = 54, // 1024 x i32
v2048i32 = 55, // 2048 x i32
v1i64 = 55, // 1 x i64
v2i64 = 56, // 2 x i64
v4i64 = 57, // 4 x i64
v8i64 = 58, // 8 x i64
v16i64 = 59, // 16 x i64
v32i64 = 60, // 32 x i64
v1i64 = 56, // 1 x i64
v2i64 = 57, // 2 x i64
v4i64 = 58, // 4 x i64
v8i64 = 59, // 8 x i64
v16i64 = 60, // 16 x i64
v32i64 = 61, // 32 x i64
v1i128 = 61, // 1 x i128
v1i128 = 62, // 1 x i128
// Scalable integer types
nxv1i1 = 62, // n x 1 x i1
nxv2i1 = 63, // n x 2 x i1
nxv4i1 = 64, // n x 4 x i1
nxv8i1 = 65, // n x 8 x i1
nxv16i1 = 66, // n x 16 x i1
nxv32i1 = 67, // n x 32 x i1
nxv1i1 = 63, // n x 1 x i1
nxv2i1 = 64, // n x 2 x i1
nxv4i1 = 65, // n x 4 x i1
nxv8i1 = 66, // n x 8 x i1
nxv16i1 = 67, // n x 16 x i1
nxv32i1 = 68, // n x 32 x i1
nxv1i8 = 68, // n x 1 x i8
nxv2i8 = 69, // n x 2 x i8
nxv4i8 = 70, // n x 4 x i8
nxv8i8 = 71, // n x 8 x i8
nxv16i8 = 72, // n x 16 x i8
nxv32i8 = 73, // n x 32 x i8
nxv1i8 = 69, // n x 1 x i8
nxv2i8 = 70, // n x 2 x i8
nxv4i8 = 71, // n x 4 x i8
nxv8i8 = 72, // n x 8 x i8
nxv16i8 = 73, // n x 16 x i8
nxv32i8 = 74, // n x 32 x i8
nxv1i16 = 74, // n x 1 x i16
nxv2i16 = 75, // n x 2 x i16
nxv4i16 = 76, // n x 4 x i16
nxv8i16 = 77, // n x 8 x i16
nxv16i16 = 78, // n x 16 x i16
nxv32i16 = 79, // n x 32 x i16
nxv1i16 = 75, // n x 1 x i16
nxv2i16 = 76, // n x 2 x i16
nxv4i16 = 77, // n x 4 x i16
nxv8i16 = 78, // n x 8 x i16
nxv16i16 = 79, // n x 16 x i16
nxv32i16 = 80, // n x 32 x i16
nxv1i32 = 80, // n x 1 x i32
nxv2i32 = 81, // n x 2 x i32
nxv4i32 = 82, // n x 4 x i32
nxv8i32 = 83, // n x 8 x i32
nxv16i32 = 84, // n x 16 x i32
nxv32i32 = 85, // n x 32 x i32
nxv1i32 = 81, // n x 1 x i32
nxv2i32 = 82, // n x 2 x i32
nxv4i32 = 83, // n x 4 x i32
nxv8i32 = 84, // n x 8 x i32
nxv16i32 = 85, // n x 16 x i32
nxv32i32 = 86, // n x 32 x i32
nxv1i64 = 86, // n x 1 x i64
nxv2i64 = 87, // n x 2 x i64
nxv4i64 = 88, // n x 4 x i64
nxv8i64 = 89, // n x 8 x i64
nxv16i64 = 90, // n x 16 x i64
nxv32i64 = 91, // n x 32 x i64
nxv1i64 = 87, // n x 1 x i64
nxv2i64 = 88, // n x 2 x i64
nxv4i64 = 89, // n x 4 x i64
nxv8i64 = 90, // n x 8 x i64
nxv16i64 = 91, // n x 16 x i64
nxv32i64 = 92, // n x 32 x i64
FIRST_INTEGER_VECTOR_VALUETYPE = v1i1,
LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64,
@ -153,40 +154,41 @@ namespace llvm {
FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1,
LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64,
v2f16 = 92, // 2 x f16
v4f16 = 93, // 4 x f16
v8f16 = 94, // 8 x f16
v1f32 = 95, // 1 x f32
v2f32 = 96, // 2 x f32
v3f32 = 97, // 3 x f32
v4f32 = 98, // 4 x f32
v5f32 = 99, // 5 x f32
v8f32 = 100, // 8 x f32
v16f32 = 101, // 16 x f32
v32f32 = 102, // 32 x f32
v64f32 = 103, // 64 x f32
v128f32 = 104, // 128 x f32
v256f32 = 105, // 256 x f32
v512f32 = 106, // 512 x f32
v1024f32 = 107, // 1024 x f32
v2048f32 = 108, // 2048 x f32
v1f64 = 109, // 1 x f64
v2f64 = 110, // 2 x f64
v4f64 = 111, // 4 x f64
v8f64 = 112, // 8 x f64
v2f16 = 93, // 2 x f16
v3f16 = 94, // 3 x f16
v4f16 = 95, // 4 x f16
v8f16 = 96, // 8 x f16
v1f32 = 97, // 1 x f32
v2f32 = 98, // 2 x f32
v3f32 = 99, // 3 x f32
v4f32 = 100, // 4 x f32
v5f32 = 101, // 5 x f32
v8f32 = 102, // 8 x f32
v16f32 = 103, // 16 x f32
v32f32 = 104, // 32 x f32
v64f32 = 105, // 64 x f32
v128f32 = 106, // 128 x f32
v256f32 = 107, // 256 x f32
v512f32 = 108, // 512 x f32
v1024f32 = 109, // 1024 x f32
v2048f32 = 110, // 2048 x f32
v1f64 = 111, // 1 x f64
v2f64 = 112, // 2 x f64
v4f64 = 113, // 4 x f64
v8f64 = 114, // 8 x f64
nxv2f16 = 113, // n x 2 x f16
nxv4f16 = 114, // n x 4 x f16
nxv8f16 = 115, // n x 8 x f16
nxv1f32 = 116, // n x 1 x f32
nxv2f32 = 117, // n x 2 x f32
nxv4f32 = 118, // n x 4 x f32
nxv8f32 = 119, // n x 8 x f32
nxv16f32 = 120, // n x 16 x f32
nxv1f64 = 121, // n x 1 x f64
nxv2f64 = 122, // n x 2 x f64
nxv4f64 = 123, // n x 4 x f64
nxv8f64 = 124, // n x 8 x f64
nxv2f16 = 115, // n x 2 x f16
nxv4f16 = 116, // n x 4 x f16
nxv8f16 = 117, // n x 8 x f16
nxv1f32 = 118, // n x 1 x f32
nxv2f32 = 119, // n x 2 x f32
nxv4f32 = 120, // n x 4 x f32
nxv8f32 = 121, // n x 8 x f32
nxv16f32 = 122, // n x 16 x f32
nxv1f64 = 123, // n x 1 x f64
nxv2f64 = 124, // n x 2 x f64
nxv4f64 = 125, // n x 4 x f64
nxv8f64 = 126, // n x 8 x f64
FIRST_FP_VECTOR_VALUETYPE = v2f16,
LAST_FP_VECTOR_VALUETYPE = nxv8f64,
@ -197,20 +199,20 @@ namespace llvm {
FIRST_VECTOR_VALUETYPE = v1i1,
LAST_VECTOR_VALUETYPE = nxv8f64,
x86mmx = 125, // This is an X86 MMX value
x86mmx = 127, // This is an X86 MMX value
Glue = 126, // This glues nodes together during pre-RA sched
Glue = 128, // This glues nodes together during pre-RA sched
isVoid = 127, // This has no value
isVoid = 129, // This has no value
Untyped = 128, // This value takes a register, but has
Untyped = 130, // This value takes a register, but has
// unspecified type. The register class
// will be determined by the opcode.
exnref = 129, // WebAssembly's exnref type
exnref = 131, // WebAssembly's exnref type
FIRST_VALUETYPE = 1, // This is always the beginning of the list.
LAST_VALUETYPE = 130, // This always remains at the end of the list.
LAST_VALUETYPE = 132, // This always remains at the end of the list.
// This is the current maximum for LAST_VALUETYPE.
// MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
@ -431,6 +433,7 @@ namespace llvm {
case nxv32i8: return i8;
case v1i16:
case v2i16:
case v3i16:
case v4i16:
case v8i16:
case v16i16:
@ -477,6 +480,7 @@ namespace llvm {
case nxv32i64: return i64;
case v1i128: return i128;
case v2f16:
case v3f16:
case v4f16:
case v8f16:
case nxv2f16:
@ -594,7 +598,9 @@ namespace llvm {
case nxv4f16:
case nxv4f32:
case nxv4f64: return 4;
case v3i16:
case v3i32:
case v3f16:
case v3f32: return 3;
case v2i1:
case v2i8:
@ -687,6 +693,8 @@ namespace llvm {
case nxv1i32:
case nxv2f16:
case nxv1f32: return 32;
case v3i16:
case v3f16: return 48;
case x86mmx:
case f64 :
case i64 :
@ -883,6 +891,7 @@ namespace llvm {
case MVT::i16:
if (NumElements == 1) return MVT::v1i16;
if (NumElements == 2) return MVT::v2i16;
if (NumElements == 3) return MVT::v3i16;
if (NumElements == 4) return MVT::v4i16;
if (NumElements == 8) return MVT::v8i16;
if (NumElements == 16) return MVT::v16i16;
@ -919,6 +928,7 @@ namespace llvm {
break;
case MVT::f16:
if (NumElements == 2) return MVT::v2f16;
if (NumElements == 3) return MVT::v3f16;
if (NumElements == 4) return MVT::v4f16;
if (NumElements == 8) return MVT::v8f16;
break;

View File

@ -824,7 +824,8 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
assert((LA == TypeLegal || LA == TypeSoftenFloat ||
ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger) &&
(NVT.isVector() ||
ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) &&
"Promote may not follow Expand or Promote");
if (LA == TypeSplitVector)

View File

@ -157,6 +157,7 @@ std::string EVT::getEVTString() const {
case MVT::v256i8: return "v256i8";
case MVT::v1i16: return "v1i16";
case MVT::v2i16: return "v2i16";
case MVT::v3i16: return "v3i16";
case MVT::v4i16: return "v4i16";
case MVT::v8i16: return "v8i16";
case MVT::v16i16: return "v16i16";
@ -187,6 +188,7 @@ std::string EVT::getEVTString() const {
case MVT::v1f32: return "v1f32";
case MVT::v2f32: return "v2f32";
case MVT::v2f16: return "v2f16";
case MVT::v3f16: return "v3f16";
case MVT::v4f16: return "v4f16";
case MVT::v8f16: return "v8f16";
case MVT::v3f32: return "v3f32";
@ -296,6 +298,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v256i8: return VectorType::get(Type::getInt8Ty(Context), 256);
case MVT::v1i16: return VectorType::get(Type::getInt16Ty(Context), 1);
case MVT::v2i16: return VectorType::get(Type::getInt16Ty(Context), 2);
case MVT::v3i16: return VectorType::get(Type::getInt16Ty(Context), 3);
case MVT::v4i16: return VectorType::get(Type::getInt16Ty(Context), 4);
case MVT::v8i16: return VectorType::get(Type::getInt16Ty(Context), 8);
case MVT::v16i16: return VectorType::get(Type::getInt16Ty(Context), 16);
@ -324,6 +327,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v32i64: return VectorType::get(Type::getInt64Ty(Context), 32);
case MVT::v1i128: return VectorType::get(Type::getInt128Ty(Context), 1);
case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2);
case MVT::v3f16: return VectorType::get(Type::getHalfTy(Context), 3);
case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4);
case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8);
case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1);

View File

@ -144,6 +144,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v3i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v3i16, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v3i16, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand);
@ -151,6 +154,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
@ -212,6 +216,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand);
setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand);

View File

@ -178,6 +178,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STORE, MVT::v32i32, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
@ -215,6 +216,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v3i16, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);

View File

@ -8,58 +8,36 @@
define <3 x i16> @zext_i8(<3 x i8>) {
; SSE3-LABEL: zext_i8:
; SSE3: # %bb.0:
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: movd %eax, %xmm0
; SSE3-NEXT: pinsrw $1, %edx, %xmm0
; SSE3-NEXT: pinsrw $2, %ecx, %xmm0
; SSE3-NEXT: movd %xmm0, %eax
; SSE3-NEXT: # kill: def $ax killed $ax killed $eax
; SSE3-NEXT: # kill: def $dx killed $dx killed $edx
; SSE3-NEXT: # kill: def $cx killed $cx killed $ecx
; SSE3-NEXT: movd %edx, %xmm0
; SSE3-NEXT: pinsrw $1, %ecx, %xmm0
; SSE3-NEXT: pinsrw $2, %eax, %xmm0
; SSE3-NEXT: retl
;
; SSE41-LABEL: zext_i8:
; SSE41: # %bb.0:
; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
; SSE41-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: pextrw $1, %xmm0, %edx
; SSE41-NEXT: pextrw $2, %xmm0, %ecx
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: # kill: def $dx killed $dx killed $edx
; SSE41-NEXT: # kill: def $cx killed $cx killed $ecx
; SSE41-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
; SSE41-NEXT: retl
;
; AVX-32-LABEL: zext_i8:
; AVX-32: # %bb.0:
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX-32-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX-32-NEXT: vmovd %xmm0, %eax
; AVX-32-NEXT: vpextrw $1, %xmm0, %edx
; AVX-32-NEXT: vpextrw $2, %xmm0, %ecx
; AVX-32-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-32-NEXT: # kill: def $dx killed $dx killed $edx
; AVX-32-NEXT: # kill: def $cx killed $cx killed $ecx
; AVX-32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: retl
;
; AVX-64-LABEL: zext_i8:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vmovd %edi, %xmm0
; AVX-64-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0
; AVX-64-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; AVX-64-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX-64-NEXT: vmovd %xmm0, %eax
; AVX-64-NEXT: vpextrw $1, %xmm0, %edx
; AVX-64-NEXT: vpextrw $2, %xmm0, %ecx
; AVX-64-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-64-NEXT: # kill: def $dx killed $dx killed $edx
; AVX-64-NEXT: # kill: def $cx killed $cx killed $ecx
; AVX-64-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX-64-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0
; AVX-64-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
; AVX-64-NEXT: vpinsrb $4, %edx, %xmm0, %xmm0
; AVX-64-NEXT: retq
%2 = zext <3 x i8> %0 to <3 x i16>
ret <3 x i16> %2
@ -68,64 +46,42 @@ define <3 x i16> @zext_i8(<3 x i8>) {
define <3 x i16> @sext_i8(<3 x i8>) {
; SSE3-LABEL: sext_i8:
; SSE3: # %bb.0:
; SSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: shll $8, %eax
; SSE3-NEXT: movl {{[0-9]+}}(%esp), %ecx
; SSE3-NEXT: shll $8, %ecx
; SSE3-NEXT: movd %ecx, %xmm0
; SSE3-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: movd %eax, %xmm0
; SSE3-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: pinsrw $1, %eax, %xmm0
; SSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: shll $8, %eax
; SSE3-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: pinsrw $2, %eax, %xmm0
; SSE3-NEXT: psraw $8, %xmm0
; SSE3-NEXT: movd %xmm0, %eax
; SSE3-NEXT: pextrw $1, %xmm0, %edx
; SSE3-NEXT: pextrw $2, %xmm0, %ecx
; SSE3-NEXT: # kill: def $ax killed $ax killed $eax
; SSE3-NEXT: # kill: def $dx killed $dx killed $edx
; SSE3-NEXT: # kill: def $cx killed $cx killed $ecx
; SSE3-NEXT: retl
;
; SSE41-LABEL: sext_i8:
; SSE41: # %bb.0:
; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE41-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
; SSE41-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm0
; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: pextrw $1, %xmm0, %edx
; SSE41-NEXT: pextrw $2, %xmm0, %ecx
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: # kill: def $dx killed $dx killed $edx
; SSE41-NEXT: # kill: def $cx killed $cx killed $ecx
; SSE41-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; SSE41-NEXT: movd %eax, %xmm0
; SSE41-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; SSE41-NEXT: pinsrw $1, %eax, %xmm0
; SSE41-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; SSE41-NEXT: pinsrw $2, %eax, %xmm0
; SSE41-NEXT: retl
;
; AVX-32-LABEL: sext_i8:
; AVX-32: # %bb.0:
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpinsrb $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpinsrb $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX-32-NEXT: vmovd %xmm0, %eax
; AVX-32-NEXT: vpextrw $1, %xmm0, %edx
; AVX-32-NEXT: vpextrw $2, %xmm0, %ecx
; AVX-32-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-32-NEXT: # kill: def $dx killed $dx killed $edx
; AVX-32-NEXT: # kill: def $cx killed $cx killed $ecx
; AVX-32-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vmovd %eax, %xmm0
; AVX-32-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX-32-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
; AVX-32-NEXT: retl
;
; AVX-64-LABEL: sext_i8:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vmovd %edi, %xmm0
; AVX-64-NEXT: vpinsrb $1, %esi, %xmm0, %xmm0
; AVX-64-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; AVX-64-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX-64-NEXT: vmovd %xmm0, %eax
; AVX-64-NEXT: vpextrw $1, %xmm0, %edx
; AVX-64-NEXT: vpextrw $2, %xmm0, %ecx
; AVX-64-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-64-NEXT: # kill: def $dx killed $dx killed $edx
; AVX-64-NEXT: # kill: def $cx killed $cx killed $ecx
; AVX-64-NEXT: movsbl %sil, %eax
; AVX-64-NEXT: movsbl %dil, %ecx
; AVX-64-NEXT: vmovd %ecx, %xmm0
; AVX-64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; AVX-64-NEXT: movsbl %dl, %eax
; AVX-64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
; AVX-64-NEXT: retq
%2 = sext <3 x i8> %0 to <3 x i16>
ret <3 x i16> %2

View File

@ -27,22 +27,13 @@ define <8 x i32> @a(<8 x i16> %a) nounwind {
define <3 x i32> @b(<3 x i16> %a) nounwind {
; CHECK-LIN-LABEL: b:
; CHECK-LIN: # %bb.0:
; CHECK-LIN-NEXT: pxor %xmm0, %xmm0
; CHECK-LIN-NEXT: pinsrw $1, %edi, %xmm0
; CHECK-LIN-NEXT: pinsrw $3, %esi, %xmm0
; CHECK-LIN-NEXT: pinsrw $5, %edx, %xmm0
; CHECK-LIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; CHECK-LIN-NEXT: psrad $16, %xmm0
; CHECK-LIN-NEXT: retq
;
; CHECK-WIN-LABEL: b:
; CHECK-WIN: # %bb.0:
; CHECK-WIN-NEXT: # kill: def $r8w killed $r8w def $r8d
; CHECK-WIN-NEXT: # kill: def $dx killed $dx def $edx
; CHECK-WIN-NEXT: # kill: def $cx killed $cx def $ecx
; CHECK-WIN-NEXT: pxor %xmm0, %xmm0
; CHECK-WIN-NEXT: pinsrw $1, %ecx, %xmm0
; CHECK-WIN-NEXT: pinsrw $3, %edx, %xmm0
; CHECK-WIN-NEXT: pinsrw $5, %r8d, %xmm0
; CHECK-WIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-WIN-NEXT: psrad $16, %xmm0
; CHECK-WIN-NEXT: retq
%c = sext <3 x i16> %a to <3 x i32>
@ -87,21 +78,15 @@ define <8 x i32> @d(<8 x i16> %a) nounwind {
define <3 x i32> @e(<3 x i16> %a) nounwind {
; CHECK-LIN-LABEL: e:
; CHECK-LIN: # %bb.0:
; CHECK-LIN-NEXT: pxor %xmm0, %xmm0
; CHECK-LIN-NEXT: pinsrw $0, %edi, %xmm0
; CHECK-LIN-NEXT: pinsrw $2, %esi, %xmm0
; CHECK-LIN-NEXT: pinsrw $4, %edx, %xmm0
; CHECK-LIN-NEXT: pxor %xmm1, %xmm1
; CHECK-LIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-LIN-NEXT: retq
;
; CHECK-WIN-LABEL: e:
; CHECK-WIN: # %bb.0:
; CHECK-WIN-NEXT: # kill: def $r8w killed $r8w def $r8d
; CHECK-WIN-NEXT: # kill: def $dx killed $dx def $edx
; CHECK-WIN-NEXT: # kill: def $cx killed $cx def $ecx
; CHECK-WIN-NEXT: pxor %xmm0, %xmm0
; CHECK-WIN-NEXT: pinsrw $0, %ecx, %xmm0
; CHECK-WIN-NEXT: pinsrw $2, %edx, %xmm0
; CHECK-WIN-NEXT: pinsrw $4, %r8d, %xmm0
; CHECK-WIN-NEXT: movdqa (%rcx), %xmm0
; CHECK-WIN-NEXT: pxor %xmm1, %xmm1
; CHECK-WIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-WIN-NEXT: retq
%c = zext <3 x i16> %a to <3 x i32>
ret <3 x i32> %c
@ -146,23 +131,14 @@ define <8 x i16> @g(<8 x i32> %a) nounwind {
define <3 x i16> @h(<3 x i32> %a) nounwind {
; CHECK-LIN-LABEL: h:
; CHECK-LIN: # %bb.0:
; CHECK-LIN-NEXT: movd %xmm0, %eax
; CHECK-LIN-NEXT: pextrw $2, %xmm0, %edx
; CHECK-LIN-NEXT: pextrw $4, %xmm0, %ecx
; CHECK-LIN-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-LIN-NEXT: # kill: def $dx killed $dx killed $edx
; CHECK-LIN-NEXT: # kill: def $cx killed $cx killed $ecx
; CHECK-LIN-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; CHECK-LIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-LIN-NEXT: retq
;
; CHECK-WIN-LABEL: h:
; CHECK-WIN: # %bb.0:
; CHECK-WIN-NEXT: movdqa (%rcx), %xmm0
; CHECK-WIN-NEXT: movd %xmm0, %eax
; CHECK-WIN-NEXT: pextrw $2, %xmm0, %edx
; CHECK-WIN-NEXT: pextrw $4, %xmm0, %ecx
; CHECK-WIN-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-WIN-NEXT: # kill: def $dx killed $dx killed $edx
; CHECK-WIN-NEXT: # kill: def $cx killed $cx killed $ecx
; CHECK-WIN-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]
; CHECK-WIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-WIN-NEXT: retq
%c = trunc <3 x i32> %a to <3 x i16>
ret <3 x i16> %c

View File

@ -151,23 +151,19 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: pinsrw $2, 4(%edx), %xmm0
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: pinsrw $2, 4(%ecx), %xmm1
; X86-NEXT: paddw %xmm0, %xmm1
; X86-NEXT: pextrw $2, %xmm1, 4(%eax)
; X86-NEXT: movd %xmm1, (%eax)
; X86-NEXT: movdqa (%edx), %xmm0
; X86-NEXT: paddw (%ecx), %xmm0
; X86-NEXT: pextrw $2, %xmm0, 4(%eax)
; X86-NEXT: movd %xmm0, (%eax)
; X86-NEXT: retl $4
;
; X64-LABEL: add3i16:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X64-NEXT: paddw %xmm0, %xmm1
; X64-NEXT: pextrw $2, %xmm1, 4(%rdi)
; X64-NEXT: movd %xmm1, (%rdi)
; X64-NEXT: movdqa (%rsi), %xmm0
; X64-NEXT: paddw (%rdx), %xmm0
; X64-NEXT: pextrw $2, %xmm0, 4(%rdi)
; X64-NEXT: movd %xmm0, (%rdi)
; X64-NEXT: retq
%a = load %i16vec3, %i16vec3* %ap, align 16
%b = load %i16vec3, %i16vec3* %bp, align 16

View File

@ -25,7 +25,7 @@ class Intrinsic<string name, list<LLVMType> param_types = []> {
}
// isVoid needs to match the definition in ValueTypes.td
def isVoid : ValueType<0, 127>; // Produces no value
def isVoid : ValueType<0, 129>; // Produces no value
def llvm_vararg_ty : LLVMType<isVoid>; // this means vararg here
// CHECK: /* 0 */ 0, 29, 0,

View File

@ -98,6 +98,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v256i8: return "MVT::v256i8";
case MVT::v1i16: return "MVT::v1i16";
case MVT::v2i16: return "MVT::v2i16";
case MVT::v3i16: return "MVT::v3i16";
case MVT::v4i16: return "MVT::v4i16";
case MVT::v8i16: return "MVT::v8i16";
case MVT::v16i16: return "MVT::v16i16";
@ -126,6 +127,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v32i64: return "MVT::v32i64";
case MVT::v1i128: return "MVT::v1i128";
case MVT::v2f16: return "MVT::v2f16";
case MVT::v3f16: return "MVT::v3f16";
case MVT::v4f16: return "MVT::v4f16";
case MVT::v8f16: return "MVT::v8f16";
case MVT::v1f32: return "MVT::v1f32";

View File

@ -220,7 +220,8 @@ enum IIT_Info {
IIT_STRUCT7 = 39,
IIT_STRUCT8 = 40,
IIT_F128 = 41,
IIT_VEC_ELEMENT = 42
IIT_VEC_ELEMENT = 42,
IIT_V48 = 43
};
static void EncodeFixedValueType(MVT::SimpleValueType VT,
@ -347,6 +348,7 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
case 8: Sig.push_back(IIT_V8); break;
case 16: Sig.push_back(IIT_V16); break;
case 32: Sig.push_back(IIT_V32); break;
case 48: Sig.push_back(IIT_V48); break;
case 64: Sig.push_back(IIT_V64); break;
case 512: Sig.push_back(IIT_V512); break;
case 1024: Sig.push_back(IIT_V1024); break;