[X86] Fix duplicate entries in skylake server scheduler model by changing Z128 to Z256

Based on the fact that the 'Y' version of the instruction is next to this, I assume Z256 is the intended value.

llvm-svn: 320295
This commit is contained in:
Craig Topper 2017-12-10 09:14:45 +00:00
parent 90c9c15936
commit 253562eb81
2 changed files with 24 additions and 24 deletions

View File

@ -576,22 +576,22 @@ def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFDZri(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFDri")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWYri")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWZ128r(b?)i(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWZ128r(b?)i(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWZ256r(b?)i(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWZri(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWri")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWYri")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWZ128r(b?)i(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWZ128r(b?)i(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWZ256r(b?)i(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWZri(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWri")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQYri")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQZ128rr(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQZ128rr(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQZ256rr(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQZ512rr(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQri")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQYri")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQZ128rr(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQZ128rr(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQZ256rr(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQZ512rr(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQri")>;
def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHBWYrr")>;
@ -4452,14 +4452,14 @@ def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFDYmi")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFDZ256m(b?)i(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFDZm(b?)i(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFHWYmi")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFHWZ128mi(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFHWZ256mi(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFHWZmi(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFLWYmi")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFLWZ128mi(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFLWZ256mi(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFLWZmi(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSLLDQZ128rm(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSLLDQZ256rm(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSLLDQZ512rm(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSRLDQZ128rm(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSRLDQZ256rm(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPSRLDQZ512rm(b?)(k?)(z?)")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHBWYrm")>;
def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHBWZ256rm(b?)(k?)(z?)")>;

View File

@ -7258,7 +7258,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [6:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15>
@ -7279,7 +7279,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [6:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15>
@ -7300,7 +7300,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [6:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
@ -7321,7 +7321,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [6:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
@ -7342,7 +7342,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [6:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 13, i32 14>
@ -7363,7 +7363,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [6:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 13, i32 14>
@ -7398,7 +7398,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [6:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
@ -7419,7 +7419,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [6:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
@ -7440,7 +7440,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [6:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 14, i32 15>
@ -7461,7 +7461,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [6:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 14, i32 15>
@ -7482,7 +7482,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [6:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
@ -7503,7 +7503,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [6:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
@ -7538,7 +7538,7 @@ define <16 x i16> @test_masked_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [6:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13>
@ -7559,7 +7559,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [6:1.00]
; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13>
@ -7580,7 +7580,7 @@ define <16 x i16> @test_masked_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [6:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
@ -7601,7 +7601,7 @@ define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [6:1.00]
; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%vec = load <16 x i16>, <16 x i16>* %vp
%shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>