forked from OSchip/llvm-project
[X86][SSE] Add support for combining PINSRW into a target shuffle.
Also add the ability to recognise PINSR(Vex, 0, Idx). Targets shuffle combines won't replace multiple insertions with a bit mask until a depth of 3 or more, so we avoid codesize bloat. The unnecessary vpblendw in clearupper8xi16a will be fixed in an upcoming patch. llvm-svn: 293627
This commit is contained in:
parent
2f2a6ab991
commit
c29eab52e8
|
@ -5770,12 +5770,21 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
|
|||
return true;
|
||||
}
|
||||
case X86ISD::PINSRW: {
|
||||
// Attempt to recognise a PINSRW(ASSERTZEXT(PEXTRW)) shuffle pattern.
|
||||
// TODO: Expand this to support PINSRB/INSERT_VECTOR_ELT/etc.
|
||||
SDValue InVec = N.getOperand(0);
|
||||
SDValue InScl = N.getOperand(1);
|
||||
uint64_t InIdx = N.getConstantOperandVal(2);
|
||||
assert(InIdx < NumElts && "Illegal insertion index");
|
||||
|
||||
// Attempt to recognise a PINSRW(VEC, 0, Idx) shuffle pattern.
|
||||
if (X86::isZeroNode(InScl)) {
|
||||
Ops.push_back(InVec);
|
||||
for (unsigned i = 0; i != NumElts; ++i)
|
||||
Mask.push_back(i == InIdx ? SM_SentinelZero : i);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Attempt to recognise a PINSRW(ASSERTZEXT(PEXTRW)) shuffle pattern.
|
||||
// TODO: Expand this to support PINSRB/INSERT_VECTOR_ELT/etc.
|
||||
if (InScl.getOpcode() != ISD::AssertZext ||
|
||||
InScl.getOperand(0).getOpcode() != X86ISD::PEXTRW)
|
||||
return false;
|
||||
|
@ -30597,6 +30606,24 @@ static SDValue combineVectorShift(SDNode *N, SelectionDAG &DAG,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget &Subtarget) {
|
||||
unsigned Opcode = N->getOpcode();
|
||||
assert(((X86ISD::PINSRB == Opcode && N->getValueType(0) ==MVT::v16i8) ||
|
||||
(X86ISD::PINSRW == Opcode && N->getValueType(0) ==MVT::v8i16)) &&
|
||||
"Unexpected vector insertion");
|
||||
|
||||
// Attempt to combine PINSRB/PINSRW patterns to a shuffle.
|
||||
SDValue Op(N, 0);
|
||||
SmallVector<int, 1> NonceMask; // Just a placeholder.
|
||||
NonceMask.push_back(0);
|
||||
combineX86ShufflesRecursively({Op}, 0, Op, NonceMask,
|
||||
/*Depth*/ 1, /*HasVarMask*/ false, DAG,
|
||||
DCI, Subtarget);
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// Recognize the distinctive (AND (setcc ...) (setcc ..)) where both setccs
|
||||
/// reference the same FP CMP, and rewrite for CMPEQSS and friends. Likewise for
|
||||
/// OR -> CMPNEQSS.
|
||||
|
@ -34159,6 +34186,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case X86ISD::VSRLI: return combineVectorShift(N, DAG, DCI, Subtarget);
|
||||
case X86ISD::VSEXT:
|
||||
case X86ISD::VZEXT: return combineVSZext(N, DAG, DCI, Subtarget);
|
||||
case X86ISD::PINSRB:
|
||||
case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget);
|
||||
case X86ISD::SHUFP: // Handle all target specific shuffles
|
||||
case X86ISD::INSERTPS:
|
||||
case X86ISD::PALIGNR:
|
||||
|
|
|
@ -94,7 +94,8 @@ define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind {
|
|||
;
|
||||
; AVX-LABEL: _clearupper8xi16a:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7]
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%x0 = extractelement <8 x i16> %0, i32 0
|
||||
%x1 = extractelement <8 x i16> %0, i32 1
|
||||
|
@ -317,11 +318,7 @@ define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind {
|
|||
define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind {
|
||||
; SSE-LABEL: _clearupper4xi32b:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: xorl %eax, %eax
|
||||
; SSE-NEXT: pinsrw $1, %eax, %xmm0
|
||||
; SSE-NEXT: pinsrw $3, %eax, %xmm0
|
||||
; SSE-NEXT: pinsrw $5, %eax, %xmm0
|
||||
; SSE-NEXT: pinsrw $7, %eax, %xmm0
|
||||
; SSE-NEXT: andps {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: _clearupper4xi32b:
|
||||
|
|
Loading…
Reference in New Issue