forked from OSchip/llvm-project
[X86][SSE] matchShuffleWithSHUFPD - add support for unary shuffles.
This causes one minor test change but is mainly necessary for an upcoming patch.
This commit is contained in:
parent
417fe39be5
commit
e6a7e3b5e3
|
@ -15942,6 +15942,7 @@ static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
|
||||||
// Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, ..
|
// Mask for V8F64: 0/1, 8/9, 2/3, 10/11, 4/5, ..
|
||||||
// Mask for V4F64; 0/1, 4/5, 2/3, 6/7..
|
// Mask for V4F64; 0/1, 4/5, 2/3, 6/7..
|
||||||
ShuffleImm = 0;
|
ShuffleImm = 0;
|
||||||
|
bool UnaryMask = isUndefOrZeroOrInRange(Mask, 0, NumElts);
|
||||||
bool ShufpdMask = true;
|
bool ShufpdMask = true;
|
||||||
bool CommutableMask = true;
|
bool CommutableMask = true;
|
||||||
for (int i = 0; i < NumElts; ++i) {
|
for (int i = 0; i < NumElts; ++i) {
|
||||||
|
@ -15949,7 +15950,7 @@ static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
|
||||||
continue;
|
continue;
|
||||||
if (Mask[i] < 0)
|
if (Mask[i] < 0)
|
||||||
return false;
|
return false;
|
||||||
int Val = (i & 6) + NumElts * (i & 1);
|
int Val = (i & 6) + (UnaryMask ? 0 : (NumElts * (i & 1)));
|
||||||
int CommutVal = (i & 0xe) + NumElts * ((i & 1) ^ 1);
|
int CommutVal = (i & 0xe) + NumElts * ((i & 1) ^ 1);
|
||||||
if (Mask[i] < Val || Mask[i] > Val + 1)
|
if (Mask[i] < Val || Mask[i] > Val + 1)
|
||||||
ShufpdMask = false;
|
ShufpdMask = false;
|
||||||
|
@ -15961,7 +15962,9 @@ static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
|
||||||
if (!ShufpdMask && !CommutableMask)
|
if (!ShufpdMask && !CommutableMask)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!ShufpdMask && CommutableMask)
|
if (UnaryMask)
|
||||||
|
V2 = V1;
|
||||||
|
else if (!ShufpdMask && CommutableMask)
|
||||||
std::swap(V1, V2);
|
std::swap(V1, V2);
|
||||||
|
|
||||||
ForceV1Zero = ZeroLane[0];
|
ForceV1Zero = ZeroLane[0];
|
||||||
|
|
|
@ -2791,7 +2791,7 @@ define void @test_mm_storeh_pi(x86_mmx *%a0, <4 x float> %a1) nounwind {
|
||||||
;
|
;
|
||||||
; X64-SSE2-LABEL: test_mm_storeh_pi:
|
; X64-SSE2-LABEL: test_mm_storeh_pi:
|
||||||
; X64-SSE2: # %bb.0:
|
; X64-SSE2: # %bb.0:
|
||||||
; X64-SSE2-NEXT: pshufd $78, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x4e]
|
; X64-SSE2-NEXT: shufps $78, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x4e]
|
||||||
; X64-SSE2-NEXT: # xmm0 = xmm0[2,3,0,1]
|
; X64-SSE2-NEXT: # xmm0 = xmm0[2,3,0,1]
|
||||||
; X64-SSE2-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
|
; X64-SSE2-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0]
|
||||||
; X64-SSE2-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
|
; X64-SSE2-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07]
|
||||||
|
|
Loading…
Reference in New Issue