merge consecutive stores of extracted vector elements

Add a path to DAGCombiner::MergeConsecutiveStores() 
to combine multiple scalar stores when the store operands
are extracted vector elements. This is a partial fix for
PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 ).

For the new test case, codegen improves from:

   vmovss  %xmm0, (%rdi)
   vextractps      $1, %xmm0, 4(%rdi)
   vextractps      $2, %xmm0, 8(%rdi)
   vextractps      $3, %xmm0, 12(%rdi)
   vextractf128    $1, %ymm0, %xmm0
   vmovss  %xmm0, 16(%rdi)
   vextractps      $1, %xmm0, 20(%rdi)
   vextractps      $2, %xmm0, 24(%rdi)
   vextractps      $3, %xmm0, 28(%rdi)
   vzeroupper
   retq

To:

   vmovups	%ymm0, (%rdi)
   vzeroupper
   retq

Patch reviewed by Nadav Rotem.

Differential Revision: http://reviews.llvm.org/D6698

llvm-svn: 224611
This commit is contained in:
Sanjay Patel 2014-12-19 20:23:41 +00:00
parent 38ce8cd2e2
commit 0428a5786e
2 changed files with 108 additions and 4 deletions

View File

@ -9498,11 +9498,14 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
return false;
// Perform an early exit check. Do not bother looking at stored values that
// are not constants or loads.
// are not constants, loads, or extracted vector elements.
SDValue StoredVal = St->getValue();
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
!IsLoadSrc)
bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
isa<ConstantFPSDNode>(StoredVal);
bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
return false;
// Only look at ends of store sequences.
@ -9644,7 +9647,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
// Store the constants into memory as one consecutive store.
if (!IsLoadSrc) {
if (IsConstantSrc) {
unsigned LastLegalType = 0;
unsigned LastLegalVectorType = 0;
bool NonZero = false;
@ -9774,6 +9777,74 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
return true;
}
// When extracting multiple vector elements, try to store them
// in one vector store rather than a sequence of scalar stores.
if (IsExtractVecEltSrc) {
unsigned NumElem = 0;
for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
// Find a legal type for the vector store.
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
if (TLI.isTypeLegal(Ty))
NumElem = i + 1;
}
// Make sure we have a legal type and something to merge.
if (NumElem < 2)
return false;
unsigned EarliestNodeUsed = 0;
for (unsigned i=0; i < NumElem; ++i) {
// Find a chain for the new wide-store operand. Notice that some
// of the store nodes that we found may not be selected for inclusion
// in the wide store. The chain we use needs to be the chain of the
// earliest store node which is *used* and replaced by the wide store.
if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
EarliestNodeUsed = i;
}
// The earliest Node in the DAG.
LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
SDLoc DL(StoreNodes[0].MemNode);
SDValue StoredVal;
// Find a legal type for the vector store.
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElem ; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue Val = St->getValue();
// All of the operands of a BUILD_VECTOR must have the same type.
if (Val.getValueType() != MemVT)
return false;
Ops.push_back(Val);
}
// Build the extracted vector elements back into a vector.
StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
false, false,
FirstInChain->getAlignment());
// Replace the first store with the new store
CombineTo(EarliestOp, NewStore);
// Erase all other stores.
for (unsigned i = 0; i < NumElem ; ++i) {
if (StoreNodes[i].MemNode == EarliestOp)
continue;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
while (!St->use_empty())
DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
deleteAndRecombine(St);
}
return true;
}
// Below we handle the case of multiple consecutive stores that
// come from multiple consecutive loads. We merge them into a single
// wide load and a single wide store.

View File

@ -434,3 +434,36 @@ define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
; <label>:14
ret void
}
define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
%vecext0 = extractelement <8 x float> %v, i32 0
%vecext1 = extractelement <8 x float> %v, i32 1
%vecext2 = extractelement <8 x float> %v, i32 2
%vecext3 = extractelement <8 x float> %v, i32 3
%vecext4 = extractelement <8 x float> %v, i32 4
%vecext5 = extractelement <8 x float> %v, i32 5
%vecext6 = extractelement <8 x float> %v, i32 6
%vecext7 = extractelement <8 x float> %v, i32 7
%arrayidx1 = getelementptr inbounds float* %ptr, i64 1
%arrayidx2 = getelementptr inbounds float* %ptr, i64 2
%arrayidx3 = getelementptr inbounds float* %ptr, i64 3
%arrayidx4 = getelementptr inbounds float* %ptr, i64 4
%arrayidx5 = getelementptr inbounds float* %ptr, i64 5
%arrayidx6 = getelementptr inbounds float* %ptr, i64 6
%arrayidx7 = getelementptr inbounds float* %ptr, i64 7
store float %vecext0, float* %ptr, align 4
store float %vecext1, float* %arrayidx1, align 4
store float %vecext2, float* %arrayidx2, align 4
store float %vecext3, float* %arrayidx3, align 4
store float %vecext4, float* %arrayidx4, align 4
store float %vecext5, float* %arrayidx5, align 4
store float %vecext6, float* %arrayidx6, align 4
store float %vecext7, float* %arrayidx7, align 4
ret void
; CHECK-LABEL: merge_vec_element_store
; CHECK: vmovups
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
}