forked from OSchip/llvm-project
AArch64: avoid splitting vector truncating stores.
We have code to split vector splats (of zero and non-zero) for performance reasons, but it ignores the fact that a store might be truncating. Actually, truncating stores are formed for vNi8 and vNi16 types. Since the truncation is from a legal type, the size of the store is always <= 64-bits and so they don't actually benefit from being split up anyway, so this patch just disables that transformation. llvm-svn: 350620
This commit is contained in:
parent
b12738d932
commit
964eea7ad2
|
@ -10053,6 +10053,7 @@ static SDValue performExtendCombine(SDNode *N,
|
|||
|
||||
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
|
||||
SDValue SplatVal, unsigned NumVecElts) {
|
||||
assert(!St.isTruncatingStore() && "cannot split truncating vector store");
|
||||
unsigned OrigAlignment = St.getAlignment();
|
||||
unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
|
||||
|
||||
|
@ -10127,6 +10128,11 @@ static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
|
|||
if (!StVal.hasOneUse())
|
||||
return SDValue();
|
||||
|
||||
// If the store is truncating then it's going down to i16 or smaller, which
|
||||
// means it can be implemented in a single store anyway.
|
||||
if (St.isTruncatingStore())
|
||||
return SDValue();
|
||||
|
||||
// If the immediate offset of the address operand is too large for the stp
|
||||
// instruction, then bail out.
|
||||
if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
|
||||
|
@ -10177,6 +10183,11 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
|
|||
if (NumVecElts != 4 && NumVecElts != 2)
|
||||
return SDValue();
|
||||
|
||||
// If the store is truncating then it's going down to i16 or smaller, which
|
||||
// means it can be implemented in a single store anyway.
|
||||
if (St.isTruncatingStore())
|
||||
return SDValue();
|
||||
|
||||
// Check that this is a splat.
|
||||
// Make sure that each of the relevant vector element locations are inserted
|
||||
// to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
|
||||
|
|
|
@ -1681,3 +1681,19 @@ entry:
|
|||
%add = add i64 %ld, 1
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: trunc_splat_zero:
|
||||
; CHECK-DAG: strh wzr, [x0]
|
||||
define void @trunc_splat_zero(<2 x i8>* %ptr) {
|
||||
store <2 x i8> zeroinitializer, <2 x i8>* %ptr, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: trunc_splat:
|
||||
; CHECK: mov [[VAL:w[0-9]+]], #42
|
||||
; CHECK: movk [[VAL]], #42, lsl #16
|
||||
; CHECK: str [[VAL]], [x0]
|
||||
define void @trunc_splat(<2 x i16>* %ptr) {
|
||||
store <2 x i16> <i16 42, i16 42>, <2 x i16>* %ptr, align 4
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue