forked from OSchip/llvm-project
[AVX] If the data which is going to be saved is already in two XMM registers
(for example, after integer operation), do not pack the registers into a YMM before saving. Its better to save as two XMM registers. Before: vinsertf128 $1, %xmm3, %ymm0, %ymm3 vinsertf128 $0, %xmm1, %ymm3, %ymm1 vmovaps %ymm1, 416(%rsp) After: vmovaps %xmm3, 416+16(%rsp) vmovaps %xmm1, 416(%rsp) llvm-svn: 137308
This commit is contained in:
parent
aa51684318
commit
1542d5a00a
|
@ -12589,6 +12589,29 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
|
||||||
EVT VT = St->getValue().getValueType();
|
EVT VT = St->getValue().getValueType();
|
||||||
EVT StVT = St->getMemoryVT();
|
EVT StVT = St->getMemoryVT();
|
||||||
DebugLoc dl = St->getDebugLoc();
|
DebugLoc dl = St->getDebugLoc();
|
||||||
|
SDValue StoredVal = St->getOperand(1);
|
||||||
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||||
|
|
||||||
|
// If we are saving a concatination of two XMM registers, perform two stores.
|
||||||
|
if (VT.getSizeInBits() == 256 &&
|
||||||
|
StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
|
||||||
|
StoredVal.getNumOperands() == 2) {
|
||||||
|
|
||||||
|
SDValue Value0 = StoredVal.getOperand(0);
|
||||||
|
SDValue Value1 = StoredVal.getOperand(1);
|
||||||
|
|
||||||
|
SDValue Stride = DAG.getConstant(16, TLI.getPointerTy());
|
||||||
|
SDValue Ptr0 = St->getBasePtr();
|
||||||
|
SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride);
|
||||||
|
|
||||||
|
SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0,
|
||||||
|
St->getPointerInfo(), St->isVolatile(),
|
||||||
|
St->isNonTemporal(), St->getAlignment());
|
||||||
|
SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
|
||||||
|
St->getPointerInfo(), St->isVolatile(),
|
||||||
|
St->isNonTemporal(), St->getAlignment());
|
||||||
|
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
|
||||||
|
}
|
||||||
|
|
||||||
// Optimize trunc store (of multiple scalars) to shuffle and store.
|
// Optimize trunc store (of multiple scalars) to shuffle and store.
|
||||||
// First, pack all of the elements in one place. Next, store to memory
|
// First, pack all of the elements in one place. Next, store to memory
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
||||||
|
|
||||||
|
; It is faster to make two saves, if the data is already in XMM registers. For
|
||||||
|
; example, after making an integer operation.
|
||||||
|
define void @double_save(<4 x i32>* %Ap, <4 x i32>* %Bp, <8 x i32>* %P) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: movaps
|
||||||
|
%A = load <4 x i32>* %Ap
|
||||||
|
%B = load <4 x i32>* %Bp
|
||||||
|
%Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
|
store <8 x i32> %Z, <8 x i32>* %P, align 16
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue