forked from OSchip/llvm-project
[AArch64][GlobalISel] Split vector stores of zero.
This results in a very minor improvement in most cases, generating stores of xzr instead of moving zero to a vector register. Differential Revision: https://reviews.llvm.org/D115479
This commit is contained in:
parent
50f3380290
commit
98095afbcb
|
@ -196,6 +196,13 @@ def mutate_anyext_to_zext : GICombineRule<
|
|||
(apply [{ applyMutateAnyExtToZExt(*${d}, MRI, B, Observer); }])
|
||||
>;
|
||||
|
||||
def split_store_zero_128 : GICombineRule<
|
||||
(defs root:$d),
|
||||
(match (wip_match_opcode G_STORE):$d,
|
||||
[{ return matchSplitStoreZero128(*${d}, MRI); }]),
|
||||
(apply [{ applySplitStoreZero128(*${d}, MRI, B, Observer); }])
|
||||
>;
|
||||
|
||||
// Post-legalization combines which should happen at all optimization levels.
|
||||
// (E.g. ones that facilitate matching for the selector) For example, matching
|
||||
// pseudos.
|
||||
|
@ -220,6 +227,7 @@ def AArch64PostLegalizerCombinerHelper
|
|||
icmp_to_true_false_known_bits, merge_unmerge,
|
||||
select_combines, fold_merge_to_zext,
|
||||
constant_fold, identity_combines,
|
||||
ptr_add_immed_chain, overlapping_and]> {
|
||||
ptr_add_immed_chain, overlapping_and,
|
||||
split_store_zero_128]> {
|
||||
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
|
||||
}
|
||||
|
|
|
@ -289,6 +289,44 @@ static void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,
|
|||
Observer.changedInstr(MI);
|
||||
}
|
||||
|
||||
/// Match a 128b store of zero and split it into two 64 bit stores, for
|
||||
/// size/performance reasons.
|
||||
static bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {
|
||||
GStore &Store = cast<GStore>(MI);
|
||||
if (!Store.isSimple())
|
||||
return false;
|
||||
LLT ValTy = MRI.getType(Store.getValueReg());
|
||||
if (!ValTy.isVector() || ValTy.getSizeInBits() != 128)
|
||||
return false;
|
||||
if (ValTy.getSizeInBits() != Store.getMemSizeInBits())
|
||||
return false; // Don't split truncating stores.
|
||||
if (!MRI.hasOneNonDBGUse(Store.getValueReg()))
|
||||
return false;
|
||||
auto MaybeCst = isConstantOrConstantSplatVector(
|
||||
*MRI.getVRegDef(Store.getValueReg()), MRI);
|
||||
return MaybeCst && MaybeCst->isZero();
|
||||
}
|
||||
|
||||
static void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B,
|
||||
GISelChangeObserver &Observer) {
|
||||
B.setInstrAndDebugLoc(MI);
|
||||
GStore &Store = cast<GStore>(MI);
|
||||
LLT ValTy = MRI.getType(Store.getValueReg());
|
||||
assert(ValTy.isVector() && "Expected a vector store value");
|
||||
LLT NewTy = LLT::scalar(64);
|
||||
Register PtrReg = Store.getPointerReg();
|
||||
auto Zero = B.buildConstant(NewTy, 0);
|
||||
auto HighPtr = B.buildPtrAdd(MRI.getType(PtrReg), PtrReg,
|
||||
B.buildConstant(LLT::scalar(64), 8));
|
||||
auto &MF = *MI.getMF();
|
||||
auto *LowMMO = MF.getMachineMemOperand(&Store.getMMO(), 0, NewTy);
|
||||
auto *HighMMO = MF.getMachineMemOperand(&Store.getMMO(), 8, NewTy);
|
||||
B.buildStore(Zero, PtrReg, *LowMMO);
|
||||
B.buildStore(Zero, HighPtr, *HighMMO);
|
||||
Store.eraseFromParent();
|
||||
}
|
||||
|
||||
#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
|
||||
#include "AArch64GenPostLegalizeGICombiner.inc"
|
||||
#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
|
||||
|
|
|
@ -0,0 +1,200 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
...
|
||||
---
|
||||
name: v2s64_split
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $x0
|
||||
|
||||
; Split a store of <2 x i64> into two scalar stores.
|
||||
|
||||
; CHECK-LABEL: name: v2s64_split
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
|
||||
; CHECK-NEXT: G_STORE %zero(s64), [[COPY]](p0) :: (store (s64), align 16)
|
||||
; CHECK-NEXT: G_STORE %zero(s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8)
|
||||
; CHECK-NEXT: RET_ReallyLR
|
||||
%0:_(p0) = COPY $x0
|
||||
%zero:_(s64) = G_CONSTANT i64 0
|
||||
%zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero, %zero
|
||||
G_STORE %zerovec(<2 x s64>), %0(p0) :: (store (<2 x s64>))
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: v4i32_split
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: v4i32_split
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64), align 16)
|
||||
; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8)
|
||||
; CHECK-NEXT: RET_ReallyLR
|
||||
%0:_(p0) = COPY $x0
|
||||
%zero:_(s32) = G_CONSTANT i32 0
|
||||
%zerovec:_(<4 x s32>) = G_BUILD_VECTOR %zero, %zero, %zero, %zero
|
||||
G_STORE %zerovec(<4 x s32>), %0(p0) :: (store (<4 x s32>))
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: v8i16_split
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: v8i16_split
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
|
||||
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
|
||||
; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64), align 16)
|
||||
; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8)
|
||||
; CHECK-NEXT: RET_ReallyLR
|
||||
%0:_(p0) = COPY $x0
|
||||
%zero:_(s16) = G_CONSTANT i16 0
|
||||
%zerovec:_(<8 x s16>) = G_BUILD_VECTOR %zero, %zero, %zero, %zero, %zero, %zero, %zero, %zero
|
||||
G_STORE %zerovec(<8 x s16>), %0(p0) :: (store (<8 x s16>))
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
|
||||
# Negative tests
|
||||
---
|
||||
name: v2i32_nosplit
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: v2i32_nosplit
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK-NEXT: %zerovec:_(<2 x s32>) = G_BUILD_VECTOR %zero(s32), %zero(s32)
|
||||
; CHECK-NEXT: G_STORE %zerovec(<2 x s32>), [[COPY]](p0) :: (store (<2 x s32>))
|
||||
; CHECK-NEXT: RET_ReallyLR
|
||||
%0:_(p0) = COPY $x0
|
||||
%zero:_(s32) = G_CONSTANT i32 0
|
||||
%zerovec:_(<2 x s32>) = G_BUILD_VECTOR %zero, %zero
|
||||
G_STORE %zerovec(<2 x s32>), %0(p0) :: (store (<2 x s32>))
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: multiple_uses
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: multiple_uses
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK-NEXT: %zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
|
||||
; CHECK-NEXT: G_STORE %zerovec(<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>))
|
||||
; CHECK-NEXT: $q0 = COPY %zerovec(<2 x s64>)
|
||||
; CHECK-NEXT: RET_ReallyLR
|
||||
%0:_(p0) = COPY $x0
|
||||
%zero:_(s64) = G_CONSTANT i64 0
|
||||
%zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero, %zero
|
||||
G_STORE %zerovec(<2 x s64>), %0(p0) :: (store (<2 x s64>))
|
||||
$q0 = COPY %zerovec
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: truncating
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: truncating
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK-NEXT: %zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
|
||||
; CHECK-NEXT: G_STORE %zerovec(<2 x s64>), [[COPY]](p0) :: (store (<2 x s32>))
|
||||
; CHECK-NEXT: RET_ReallyLR
|
||||
%0:_(p0) = COPY $x0
|
||||
%zero:_(s64) = G_CONSTANT i64 0
|
||||
%zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero, %zero
|
||||
G_STORE %zerovec(<2 x s64>), %0(p0) :: (store (<2 x s32>))
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: volatile
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: volatile
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK-NEXT: %zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
|
||||
; CHECK-NEXT: G_STORE %zerovec(<2 x s64>), [[COPY]](p0) :: (volatile store (<4 x s32>))
|
||||
; CHECK-NEXT: RET_ReallyLR
|
||||
%0:_(p0) = COPY $x0
|
||||
%zero:_(s64) = G_CONSTANT i64 0
|
||||
%zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero, %zero
|
||||
G_STORE %zerovec(<2 x s64>), %0(p0) :: (volatile store (<4 x s32>))
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: s128_scalar
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $x0
|
||||
|
||||
; Split a store of <2 x i64> into two scalar stores.
|
||||
|
||||
; CHECK-LABEL: name: s128_scalar
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK-NEXT: %zero:_(s128) = G_CONSTANT i128 0
|
||||
; CHECK-NEXT: G_STORE %zero(s128), [[COPY]](p0) :: (store (s128))
|
||||
; CHECK-NEXT: RET_ReallyLR
|
||||
%0:_(p0) = COPY $x0
|
||||
%zero:_(s128) = G_CONSTANT i128 0
|
||||
G_STORE %zero(s128), %0(p0) :: (store (s128))
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
Loading…
Reference in New Issue