forked from OSchip/llvm-project
[X86][AVX] matchShuffleAsBlend - use isElementEquivalent to help match broadcast/repeated elements
Extend matchShuffleAsBlend to not only match against known in-place elements for BLEND shuffles, but use isElementEquivalent to determine if the shuffle mask's referenced element is the same as the in-place element. This allows us to replace a number of insertps instructions with more general blendps instructions (better opportunities for commutation, concatenation etc.).
This commit is contained in:
parent
96fb3eef66
commit
352df10a23
|
@ -12248,10 +12248,15 @@ static bool matchShuffleAsBlend(SDValue V1, SDValue V2,
|
|||
int M = Mask[i];
|
||||
if (M == SM_SentinelUndef)
|
||||
continue;
|
||||
if (M == i)
|
||||
if (M == i ||
|
||||
(0 <= M && M < Size && IsElementEquivalent(Size, V1, V1, M, i))) {
|
||||
Mask[i] = i;
|
||||
continue;
|
||||
if (M == i + Size) {
|
||||
}
|
||||
if (M == (i + Size) ||
|
||||
(Size <= M && IsElementEquivalent(Size, V2, V2, M - Size, i))) {
|
||||
BlendMask |= 1ull << i;
|
||||
Mask[i] = i + Size;
|
||||
continue;
|
||||
}
|
||||
if (Zeroable[i]) {
|
||||
|
|
|
@ -138,18 +138,17 @@ define <4 x float> @insertps_from_broadcast_loadv4f32(<4 x float> %a, <4 x float
|
|||
ret <4 x float> %7
|
||||
}
|
||||
|
||||
;; FIXME: We're emitting an extraneous pshufd/vbroadcast.
|
||||
define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* nocapture readonly %fb, i64 %index) {
|
||||
; X86-LABEL: insertps_from_broadcast_multiple_use:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: vbroadcastss (%ecx,%eax,4), %xmm4
|
||||
; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0]
|
||||
; X86-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0]
|
||||
; X86-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[3]
|
||||
; X86-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3]
|
||||
; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm4[0]
|
||||
; X86-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm4[0]
|
||||
; X86-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1,2],xmm4[3]
|
||||
; X86-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0,1,2],xmm4[3]
|
||||
; X86-NEXT: vaddps %xmm2, %xmm1, %xmm1
|
||||
; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X86-NEXT: retl
|
||||
|
@ -157,11 +156,11 @@ define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x fl
|
|||
; X64-LABEL: insertps_from_broadcast_multiple_use:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: vbroadcastss (%rdi,%rsi,4), %xmm4
|
||||
; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0]
|
||||
; X64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0]
|
||||
; X64-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[3]
|
||||
; X64-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3]
|
||||
; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm4[0]
|
||||
; X64-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm4[0]
|
||||
; X64-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1,2],xmm4[3]
|
||||
; X64-NEXT: vblendps {{.*#+}} xmm2 = xmm3[0,1,2],xmm4[3]
|
||||
; X64-NEXT: vaddps %xmm2, %xmm1, %xmm1
|
||||
; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
|
|
|
@ -39,7 +39,7 @@ define <4 x float> @pair_sum_v4f32_v4f32(<4 x float> %0, <4 x float> %1, <4 x fl
|
|||
; AVX1-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,1]
|
||||
; AVX1-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,1]
|
||||
; AVX1-SLOW-NEXT: vhaddps %xmm3, %xmm3, %xmm1
|
||||
; AVX1-SLOW-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[1]
|
||||
; AVX1-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
|
||||
; AVX1-SLOW-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
|
||||
; AVX1-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0
|
||||
; AVX1-SLOW-NEXT: retq
|
||||
|
@ -58,7 +58,7 @@ define <4 x float> @pair_sum_v4f32_v4f32(<4 x float> %0, <4 x float> %1, <4 x fl
|
|||
; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,3]
|
||||
; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,1]
|
||||
; AVX2-SLOW-NEXT: vhaddps %xmm3, %xmm3, %xmm1
|
||||
; AVX2-SLOW-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[1]
|
||||
; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
|
||||
; AVX2-SLOW-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
|
||||
; AVX2-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
|
@ -227,7 +227,7 @@ define <8 x float> @pair_sum_v8f32_v4f32(<4 x float> %0, <4 x float> %1, <4 x fl
|
|||
; AVX1-SLOW-NEXT: vshufps {{.*#+}} xmm3 = xmm2[0,2],xmm1[0,1]
|
||||
; AVX1-SLOW-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[0]
|
||||
; AVX1-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm2[1,3],xmm1[1,3]
|
||||
; AVX1-SLOW-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[1]
|
||||
; AVX1-SLOW-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3]
|
||||
; AVX1-SLOW-NEXT: vaddps %xmm1, %xmm3, %xmm1
|
||||
; AVX1-SLOW-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX1-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
|
||||
|
@ -248,7 +248,7 @@ define <8 x float> @pair_sum_v8f32_v4f32(<4 x float> %0, <4 x float> %1, <4 x fl
|
|||
; AVX1-FAST-NEXT: vshufps {{.*#+}} xmm3 = xmm2[0,2],xmm1[0,1]
|
||||
; AVX1-FAST-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[0]
|
||||
; AVX1-FAST-NEXT: vshufps {{.*#+}} xmm1 = xmm2[1,3],xmm1[1,3]
|
||||
; AVX1-FAST-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[1]
|
||||
; AVX1-FAST-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3]
|
||||
; AVX1-FAST-NEXT: vaddps %xmm1, %xmm3, %xmm1
|
||||
; AVX1-FAST-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX1-FAST-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
|
||||
|
@ -271,7 +271,7 @@ define <8 x float> @pair_sum_v8f32_v4f32(<4 x float> %0, <4 x float> %1, <4 x fl
|
|||
; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm3 = xmm2[0,2],xmm1[0,3]
|
||||
; AVX2-SLOW-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[0]
|
||||
; AVX2-SLOW-NEXT: vshufps {{.*#+}} xmm1 = xmm2[1,3],xmm1[1,3]
|
||||
; AVX2-SLOW-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[1]
|
||||
; AVX2-SLOW-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3]
|
||||
; AVX2-SLOW-NEXT: vaddps %xmm1, %xmm3, %xmm1
|
||||
; AVX2-SLOW-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX2-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
|
||||
|
@ -292,7 +292,7 @@ define <8 x float> @pair_sum_v8f32_v4f32(<4 x float> %0, <4 x float> %1, <4 x fl
|
|||
; AVX2-FAST-NEXT: vshufps {{.*#+}} xmm3 = xmm2[0,2],xmm1[0,3]
|
||||
; AVX2-FAST-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[0]
|
||||
; AVX2-FAST-NEXT: vshufps {{.*#+}} xmm1 = xmm2[1,3],xmm1[1,3]
|
||||
; AVX2-FAST-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[1]
|
||||
; AVX2-FAST-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3]
|
||||
; AVX2-FAST-NEXT: vaddps %xmm1, %xmm3, %xmm1
|
||||
; AVX2-FAST-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX2-FAST-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
|
||||
|
|
|
@ -1661,15 +1661,15 @@ define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x fl
|
|||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
|
||||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
|
||||
; X86-AVX1-NEXT: vbroadcastss (%ecx,%eax,4), %xmm4 ## encoding: [0xc4,0xe2,0x79,0x18,0x24,0x81]
|
||||
; X86-AVX1-NEXT: vinsertps $48, %xmm4, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0xc4,0x30]
|
||||
; X86-AVX1-NEXT: ## xmm0 = xmm0[0,1,2],xmm4[0]
|
||||
; X86-AVX1-NEXT: vinsertps $48, %xmm4, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0xcc,0x30]
|
||||
; X86-AVX1-NEXT: ## xmm1 = xmm1[0,1,2],xmm4[0]
|
||||
; X86-AVX1-NEXT: vblendps $8, %xmm4, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc4,0x08]
|
||||
; X86-AVX1-NEXT: ## xmm0 = xmm0[0,1,2],xmm4[3]
|
||||
; X86-AVX1-NEXT: vblendps $8, %xmm4, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x0c,0xcc,0x08]
|
||||
; X86-AVX1-NEXT: ## xmm1 = xmm1[0,1,2],xmm4[3]
|
||||
; X86-AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x58,0xc1]
|
||||
; X86-AVX1-NEXT: vinsertps $48, %xmm4, %xmm2, %xmm1 ## encoding: [0xc4,0xe3,0x69,0x21,0xcc,0x30]
|
||||
; X86-AVX1-NEXT: ## xmm1 = xmm2[0,1,2],xmm4[0]
|
||||
; X86-AVX1-NEXT: vinsertps $48, %xmm4, %xmm3, %xmm2 ## encoding: [0xc4,0xe3,0x61,0x21,0xd4,0x30]
|
||||
; X86-AVX1-NEXT: ## xmm2 = xmm3[0,1,2],xmm4[0]
|
||||
; X86-AVX1-NEXT: vblendps $8, %xmm4, %xmm2, %xmm1 ## encoding: [0xc4,0xe3,0x69,0x0c,0xcc,0x08]
|
||||
; X86-AVX1-NEXT: ## xmm1 = xmm2[0,1,2],xmm4[3]
|
||||
; X86-AVX1-NEXT: vblendps $8, %xmm4, %xmm3, %xmm2 ## encoding: [0xc4,0xe3,0x61,0x0c,0xd4,0x08]
|
||||
; X86-AVX1-NEXT: ## xmm2 = xmm3[0,1,2],xmm4[3]
|
||||
; X86-AVX1-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x58,0xca]
|
||||
; X86-AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x58,0xc1]
|
||||
; X86-AVX1-NEXT: retl ## encoding: [0xc3]
|
||||
|
@ -1679,16 +1679,16 @@ define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x fl
|
|||
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
|
||||
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
|
||||
; X86-AVX512-NEXT: vbroadcastss (%ecx,%eax,4), %xmm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x24,0x81]
|
||||
; X86-AVX512-NEXT: vinsertps $48, %xmm4, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc4,0x30]
|
||||
; X86-AVX512-NEXT: ## xmm0 = xmm0[0,1,2],xmm4[0]
|
||||
; X86-AVX512-NEXT: vinsertps $48, %xmm4, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xcc,0x30]
|
||||
; X86-AVX512-NEXT: ## xmm1 = xmm1[0,1,2],xmm4[0]
|
||||
; X86-AVX512-NEXT: vblendps $8, %xmm4, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc4,0x08]
|
||||
; X86-AVX512-NEXT: ## xmm0 = xmm0[0,1,2],xmm4[3]
|
||||
; X86-AVX512-NEXT: vblendps $8, %xmm4, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x0c,0xcc,0x08]
|
||||
; X86-AVX512-NEXT: ## xmm1 = xmm1[0,1,2],xmm4[3]
|
||||
; X86-AVX512-NEXT: vblendps $8, %xmm4, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x0c,0xd4,0x08]
|
||||
; X86-AVX512-NEXT: ## xmm2 = xmm2[0,1,2],xmm4[3]
|
||||
; X86-AVX512-NEXT: vblendps $8, %xmm4, %xmm3, %xmm3 ## encoding: [0xc4,0xe3,0x61,0x0c,0xdc,0x08]
|
||||
; X86-AVX512-NEXT: ## xmm3 = xmm3[0,1,2],xmm4[3]
|
||||
; X86-AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
|
||||
; X86-AVX512-NEXT: vinsertps $48, %xmm4, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xcc,0x30]
|
||||
; X86-AVX512-NEXT: ## xmm1 = xmm2[0,1,2],xmm4[0]
|
||||
; X86-AVX512-NEXT: vinsertps $48, %xmm4, %xmm3, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd4,0x30]
|
||||
; X86-AVX512-NEXT: ## xmm2 = xmm3[0,1,2],xmm4[0]
|
||||
; X86-AVX512-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca]
|
||||
; X86-AVX512-NEXT: vaddps %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xcb]
|
||||
; X86-AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
|
||||
; X86-AVX512-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
|
@ -1712,15 +1712,15 @@ define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x fl
|
|||
; X64-AVX1-LABEL: insertps_from_broadcast_multiple_use:
|
||||
; X64-AVX1: ## %bb.0:
|
||||
; X64-AVX1-NEXT: vbroadcastss (%rdi,%rsi,4), %xmm4 ## encoding: [0xc4,0xe2,0x79,0x18,0x24,0xb7]
|
||||
; X64-AVX1-NEXT: vinsertps $48, %xmm4, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0xc4,0x30]
|
||||
; X64-AVX1-NEXT: ## xmm0 = xmm0[0,1,2],xmm4[0]
|
||||
; X64-AVX1-NEXT: vinsertps $48, %xmm4, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0xcc,0x30]
|
||||
; X64-AVX1-NEXT: ## xmm1 = xmm1[0,1,2],xmm4[0]
|
||||
; X64-AVX1-NEXT: vblendps $8, %xmm4, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc4,0x08]
|
||||
; X64-AVX1-NEXT: ## xmm0 = xmm0[0,1,2],xmm4[3]
|
||||
; X64-AVX1-NEXT: vblendps $8, %xmm4, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x0c,0xcc,0x08]
|
||||
; X64-AVX1-NEXT: ## xmm1 = xmm1[0,1,2],xmm4[3]
|
||||
; X64-AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x58,0xc1]
|
||||
; X64-AVX1-NEXT: vinsertps $48, %xmm4, %xmm2, %xmm1 ## encoding: [0xc4,0xe3,0x69,0x21,0xcc,0x30]
|
||||
; X64-AVX1-NEXT: ## xmm1 = xmm2[0,1,2],xmm4[0]
|
||||
; X64-AVX1-NEXT: vinsertps $48, %xmm4, %xmm3, %xmm2 ## encoding: [0xc4,0xe3,0x61,0x21,0xd4,0x30]
|
||||
; X64-AVX1-NEXT: ## xmm2 = xmm3[0,1,2],xmm4[0]
|
||||
; X64-AVX1-NEXT: vblendps $8, %xmm4, %xmm2, %xmm1 ## encoding: [0xc4,0xe3,0x69,0x0c,0xcc,0x08]
|
||||
; X64-AVX1-NEXT: ## xmm1 = xmm2[0,1,2],xmm4[3]
|
||||
; X64-AVX1-NEXT: vblendps $8, %xmm4, %xmm3, %xmm2 ## encoding: [0xc4,0xe3,0x61,0x0c,0xd4,0x08]
|
||||
; X64-AVX1-NEXT: ## xmm2 = xmm3[0,1,2],xmm4[3]
|
||||
; X64-AVX1-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x58,0xca]
|
||||
; X64-AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x58,0xc1]
|
||||
; X64-AVX1-NEXT: retq ## encoding: [0xc3]
|
||||
|
@ -1728,16 +1728,16 @@ define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x fl
|
|||
; X64-AVX512-LABEL: insertps_from_broadcast_multiple_use:
|
||||
; X64-AVX512: ## %bb.0:
|
||||
; X64-AVX512-NEXT: vbroadcastss (%rdi,%rsi,4), %xmm4 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x24,0xb7]
|
||||
; X64-AVX512-NEXT: vinsertps $48, %xmm4, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc4,0x30]
|
||||
; X64-AVX512-NEXT: ## xmm0 = xmm0[0,1,2],xmm4[0]
|
||||
; X64-AVX512-NEXT: vinsertps $48, %xmm4, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xcc,0x30]
|
||||
; X64-AVX512-NEXT: ## xmm1 = xmm1[0,1,2],xmm4[0]
|
||||
; X64-AVX512-NEXT: vblendps $8, %xmm4, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc4,0x08]
|
||||
; X64-AVX512-NEXT: ## xmm0 = xmm0[0,1,2],xmm4[3]
|
||||
; X64-AVX512-NEXT: vblendps $8, %xmm4, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x0c,0xcc,0x08]
|
||||
; X64-AVX512-NEXT: ## xmm1 = xmm1[0,1,2],xmm4[3]
|
||||
; X64-AVX512-NEXT: vblendps $8, %xmm4, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x0c,0xd4,0x08]
|
||||
; X64-AVX512-NEXT: ## xmm2 = xmm2[0,1,2],xmm4[3]
|
||||
; X64-AVX512-NEXT: vblendps $8, %xmm4, %xmm3, %xmm3 ## encoding: [0xc4,0xe3,0x61,0x0c,0xdc,0x08]
|
||||
; X64-AVX512-NEXT: ## xmm3 = xmm3[0,1,2],xmm4[3]
|
||||
; X64-AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
|
||||
; X64-AVX512-NEXT: vinsertps $48, %xmm4, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xcc,0x30]
|
||||
; X64-AVX512-NEXT: ## xmm1 = xmm2[0,1,2],xmm4[0]
|
||||
; X64-AVX512-NEXT: vinsertps $48, %xmm4, %xmm3, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd4,0x30]
|
||||
; X64-AVX512-NEXT: ## xmm2 = xmm3[0,1,2],xmm4[0]
|
||||
; X64-AVX512-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca]
|
||||
; X64-AVX512-NEXT: vaddps %xmm3, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xcb]
|
||||
; X64-AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
|
||||
; X64-AVX512-NEXT: retq ## encoding: [0xc3]
|
||||
%1 = getelementptr inbounds float, float* %fb, i64 %index
|
||||
|
|
Loading…
Reference in New Issue