[AArch64][GlobalISel] Split vector stores of zero.

This results in a very minor improvement in most cases, generating
stores of xzr instead of moving zero to a vector register.

Differential Revision: https://reviews.llvm.org/D115479
This commit is contained in:
Amara Emerson 2021-12-09 16:05:14 -08:00
parent 50f3380290
commit 98095afbcb
3 changed files with 247 additions and 1 deletions

View File

@ -196,6 +196,13 @@ def mutate_anyext_to_zext : GICombineRule<
(apply [{ applyMutateAnyExtToZExt(*${d}, MRI, B, Observer); }])
>;
def split_store_zero_128 : GICombineRule<
(defs root:$d),
(match (wip_match_opcode G_STORE):$d,
[{ return matchSplitStoreZero128(*${d}, MRI); }]),
(apply [{ applySplitStoreZero128(*${d}, MRI, B, Observer); }])
>;
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@ -220,6 +227,7 @@ def AArch64PostLegalizerCombinerHelper
icmp_to_true_false_known_bits, merge_unmerge,
select_combines, fold_merge_to_zext,
constant_fold, identity_combines,
ptr_add_immed_chain, overlapping_and]> {
ptr_add_immed_chain, overlapping_and,
split_store_zero_128]> {
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}

View File

@ -289,6 +289,44 @@ static void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,
Observer.changedInstr(MI);
}
/// Match a 128b store of zero and split it into two 64 bit stores, for
/// size/performance reasons.
static bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {
GStore &Store = cast<GStore>(MI);
if (!Store.isSimple())
return false;
LLT ValTy = MRI.getType(Store.getValueReg());
if (!ValTy.isVector() || ValTy.getSizeInBits() != 128)
return false;
if (ValTy.getSizeInBits() != Store.getMemSizeInBits())
return false; // Don't split truncating stores.
if (!MRI.hasOneNonDBGUse(Store.getValueReg()))
return false;
auto MaybeCst = isConstantOrConstantSplatVector(
*MRI.getVRegDef(Store.getValueReg()), MRI);
return MaybeCst && MaybeCst->isZero();
}
static void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B,
GISelChangeObserver &Observer) {
B.setInstrAndDebugLoc(MI);
GStore &Store = cast<GStore>(MI);
LLT ValTy = MRI.getType(Store.getValueReg());
assert(ValTy.isVector() && "Expected a vector store value");
LLT NewTy = LLT::scalar(64);
Register PtrReg = Store.getPointerReg();
auto Zero = B.buildConstant(NewTy, 0);
auto HighPtr = B.buildPtrAdd(MRI.getType(PtrReg), PtrReg,
B.buildConstant(LLT::scalar(64), 8));
auto &MF = *MI.getMF();
auto *LowMMO = MF.getMachineMemOperand(&Store.getMMO(), 0, NewTy);
auto *HighMMO = MF.getMachineMemOperand(&Store.getMMO(), 8, NewTy);
B.buildStore(Zero, PtrReg, *LowMMO);
B.buildStore(Zero, HighPtr, *HighMMO);
Store.eraseFromParent();
}
#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPostLegalizeGICombiner.inc"
#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS

View File

@ -0,0 +1,200 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
...
---
name: v2s64_split
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $x0
; Split a store of <2 x i64> into two scalar stores.
; CHECK-LABEL: name: v2s64_split
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; CHECK-NEXT: G_STORE %zero(s64), [[COPY]](p0) :: (store (s64), align 16)
; CHECK-NEXT: G_STORE %zero(s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8)
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%zero:_(s64) = G_CONSTANT i64 0
%zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero, %zero
G_STORE %zerovec(<2 x s64>), %0(p0) :: (store (<2 x s64>))
RET_ReallyLR
...
---
name: v4i32_split
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: v4i32_split
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64), align 16)
; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8)
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%zero:_(s32) = G_CONSTANT i32 0
%zerovec:_(<4 x s32>) = G_BUILD_VECTOR %zero, %zero, %zero, %zero
G_STORE %zerovec(<4 x s32>), %0(p0) :: (store (<4 x s32>))
RET_ReallyLR
...
---
name: v8i16_split
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: v8i16_split
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64), align 16)
; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8)
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%zero:_(s16) = G_CONSTANT i16 0
%zerovec:_(<8 x s16>) = G_BUILD_VECTOR %zero, %zero, %zero, %zero, %zero, %zero, %zero, %zero
G_STORE %zerovec(<8 x s16>), %0(p0) :: (store (<8 x s16>))
RET_ReallyLR
...
# Negative tests
---
name: v2i32_nosplit
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: v2i32_nosplit
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: %zerovec:_(<2 x s32>) = G_BUILD_VECTOR %zero(s32), %zero(s32)
; CHECK-NEXT: G_STORE %zerovec(<2 x s32>), [[COPY]](p0) :: (store (<2 x s32>))
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%zero:_(s32) = G_CONSTANT i32 0
%zerovec:_(<2 x s32>) = G_BUILD_VECTOR %zero, %zero
G_STORE %zerovec(<2 x s32>), %0(p0) :: (store (<2 x s32>))
RET_ReallyLR
...
---
name: multiple_uses
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: multiple_uses
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: %zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
; CHECK-NEXT: G_STORE %zerovec(<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>))
; CHECK-NEXT: $q0 = COPY %zerovec(<2 x s64>)
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%zero:_(s64) = G_CONSTANT i64 0
%zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero, %zero
G_STORE %zerovec(<2 x s64>), %0(p0) :: (store (<2 x s64>))
$q0 = COPY %zerovec
RET_ReallyLR
...
---
name: truncating
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: truncating
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: %zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
; CHECK-NEXT: G_STORE %zerovec(<2 x s64>), [[COPY]](p0) :: (store (<2 x s32>))
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%zero:_(s64) = G_CONSTANT i64 0
%zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero, %zero
G_STORE %zerovec(<2 x s64>), %0(p0) :: (store (<2 x s32>))
RET_ReallyLR
...
---
name: volatile
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: volatile
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: %zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
; CHECK-NEXT: G_STORE %zerovec(<2 x s64>), [[COPY]](p0) :: (volatile store (<4 x s32>))
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%zero:_(s64) = G_CONSTANT i64 0
%zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero, %zero
G_STORE %zerovec(<2 x s64>), %0(p0) :: (volatile store (<4 x s32>))
RET_ReallyLR
...
---
name: s128_scalar
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $x0
; Split a store of <2 x i64> into two scalar stores.
; CHECK-LABEL: name: s128_scalar
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: %zero:_(s128) = G_CONSTANT i128 0
; CHECK-NEXT: G_STORE %zero(s128), [[COPY]](p0) :: (store (s128))
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%zero:_(s128) = G_CONSTANT i128 0
G_STORE %zero(s128), %0(p0) :: (store (s128))
RET_ReallyLR
...