forked from OSchip/llvm-project
[X86][SSE] combineTargetShuffle - permilps(shufps(load(),x)) --> permilps(shufps(x,load()))
Moves lowerShuffleWithSHUFPS commutation code from rG30fcd29fe479 to catch cases during combine
This commit is contained in:
parent
a8c3608a27
commit
3fd5d1c6e7
|
@ -13316,12 +13316,10 @@ static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
|
|||
/// It makes no assumptions about whether this is the *best* lowering, it simply
|
||||
/// uses it.
|
||||
static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
|
||||
ArrayRef<int> OriginalMask, SDValue V1,
|
||||
ArrayRef<int> Mask, SDValue V1,
|
||||
SDValue V2, SelectionDAG &DAG) {
|
||||
SDValue LowV = V1, HighV = V2;
|
||||
SmallVector<int, 4> Mask(OriginalMask.begin(), OriginalMask.end());
|
||||
SmallVector<int, 4> NewMask = Mask;
|
||||
|
||||
SmallVector<int, 4> NewMask(Mask.begin(), Mask.end());
|
||||
int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });
|
||||
|
||||
if (NumV2Elements == 1) {
|
||||
|
@ -13358,14 +13356,6 @@ static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
|
|||
NewMask[V2Index] = 0; // We shifted the V2 element into V2[0].
|
||||
}
|
||||
} else if (NumV2Elements == 2) {
|
||||
// If we are likely to fold V1 but not V2, then commute the shuffle.
|
||||
if (MayFoldLoad(V1) && !MayFoldLoad(V2)) {
|
||||
ShuffleVectorSDNode::commuteMask(Mask);
|
||||
NewMask = Mask;
|
||||
std::swap(V1, V2);
|
||||
std::swap(LowV, HighV);
|
||||
}
|
||||
|
||||
if (Mask[0] < 4 && Mask[1] < 4) {
|
||||
// Handle the easy case where we have V1 in the low lanes and V2 in the
|
||||
// high lanes.
|
||||
|
@ -34598,6 +34588,28 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
|
|||
}
|
||||
}
|
||||
|
||||
// Attempt to commute shufps LHS loads:
|
||||
// permilps(shufps(load(),x)) --> permilps(shufps(x,load()))
|
||||
if (VT == MVT::v4f32 &&
|
||||
(X86ISD::VPERMILPI == Opcode ||
|
||||
(X86ISD::SHUFP == Opcode && N.getOperand(0) == N.getOperand(1)))) {
|
||||
SDValue N0 = N.getOperand(0);
|
||||
unsigned Imm = N.getConstantOperandVal(X86ISD::VPERMILPI == Opcode ? 1 : 2);
|
||||
if (N0.getOpcode() == X86ISD::SHUFP && N->isOnlyUserOf(N0.getNode())) {
|
||||
SDValue N00 = N0.getOperand(0);
|
||||
SDValue N01 = N0.getOperand(1);
|
||||
if (MayFoldLoad(peekThroughOneUseBitcasts(N00)) &&
|
||||
!MayFoldLoad(peekThroughOneUseBitcasts(N01))) {
|
||||
unsigned Imm1 = N0.getConstantOperandVal(2);
|
||||
Imm1 = ((Imm1 & 0x0F) << 4) | ((Imm1 & 0xF0) >> 4);
|
||||
SDValue NewN0 = DAG.getNode(X86ISD::SHUFP, DL, VT, N01, N00,
|
||||
DAG.getTargetConstant(Imm1, DL, MVT::i8));
|
||||
return DAG.getNode(X86ISD::SHUFP, DL, VT, NewN0, NewN0,
|
||||
DAG.getTargetConstant(Imm ^ 0xAA, DL, MVT::i8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch (Opcode) {
|
||||
case X86ISD::VBROADCAST: {
|
||||
SDValue Src = N.getOperand(0);
|
||||
|
|
|
@ -9,22 +9,22 @@ define void @PR15298(<4 x float>* nocapture %source, <8 x float>* nocapture %des
|
|||
; SSE-32: # %bb.0: # %L.entry
|
||||
; SSE-32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; SSE-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; SSE-32-NEXT: movaps 304(%ecx), %xmm0
|
||||
; SSE-32-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE-32-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
|
||||
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
|
||||
; SSE-32-NEXT: movups %xmm1, 624(%eax)
|
||||
; SSE-32-NEXT: movups %xmm0, 608(%eax)
|
||||
; SSE-32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],mem[0,0]
|
||||
; SSE-32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
|
||||
; SSE-32-NEXT: movups %xmm0, 624(%eax)
|
||||
; SSE-32-NEXT: movups %xmm1, 608(%eax)
|
||||
; SSE-32-NEXT: retl
|
||||
;
|
||||
; SSE-64-LABEL: PR15298:
|
||||
; SSE-64: # %bb.0: # %L.entry
|
||||
; SSE-64-NEXT: movaps 304(%rdi), %xmm0
|
||||
; SSE-64-NEXT: xorps %xmm0, %xmm0
|
||||
; SSE-64-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
|
||||
; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
|
||||
; SSE-64-NEXT: movups %xmm1, 624(%rsi)
|
||||
; SSE-64-NEXT: movups %xmm0, 608(%rsi)
|
||||
; SSE-64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],mem[0,0]
|
||||
; SSE-64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
|
||||
; SSE-64-NEXT: movups %xmm0, 624(%rsi)
|
||||
; SSE-64-NEXT: movups %xmm1, 608(%rsi)
|
||||
; SSE-64-NEXT: retq
|
||||
;
|
||||
; AVX-32-LABEL: PR15298:
|
||||
|
|
Loading…
Reference in New Issue