forked from OSchip/llvm-project
[AArch64][GlobalISel] Add post-legalize combine for sext_inreg(trunc(sextload)) -> copy
On AArch64 we generate redundant G_SEXTs or G_SEXT_INREGs because of this. Differential Revision: https://reviews.llvm.org/D81993
This commit is contained in:
parent
830a7c2ad4
commit
64eb3a4915
|
@ -107,6 +107,9 @@ public:
|
||||||
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
|
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
|
||||||
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
|
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
|
||||||
|
|
||||||
|
bool matchSextAlreadyExtended(MachineInstr &MI);
|
||||||
|
bool applySextAlreadyExtended(MachineInstr &MI);
|
||||||
|
|
||||||
bool matchElideBrByInvertingCond(MachineInstr &MI);
|
bool matchElideBrByInvertingCond(MachineInstr &MI);
|
||||||
void applyElideBrByInvertingCond(MachineInstr &MI);
|
void applyElideBrByInvertingCond(MachineInstr &MI);
|
||||||
bool tryElideBrByInvertingCond(MachineInstr &MI);
|
bool tryElideBrByInvertingCond(MachineInstr &MI);
|
||||||
|
|
|
@ -125,6 +125,12 @@ def extending_loads : GICombineRule<
|
||||||
(apply [{ Helper.applyCombineExtendingLoads(*${root}, ${matchinfo}); }])>;
|
(apply [{ Helper.applyCombineExtendingLoads(*${root}, ${matchinfo}); }])>;
|
||||||
def combines_for_extload: GICombineGroup<[extending_loads]>;
|
def combines_for_extload: GICombineGroup<[extending_loads]>;
|
||||||
|
|
||||||
|
def sext_already_extended : GICombineRule<
|
||||||
|
(defs root:$d),
|
||||||
|
(match (wip_match_opcode G_SEXT_INREG):$d,
|
||||||
|
[{ return Helper.matchSextAlreadyExtended(*${d}); }]),
|
||||||
|
(apply [{ Helper.applySextAlreadyExtended(*${d}); }])>;
|
||||||
|
|
||||||
def combine_indexed_load_store : GICombineRule<
|
def combine_indexed_load_store : GICombineRule<
|
||||||
(defs root:$root, indexed_load_store_matchdata:$matchinfo),
|
(defs root:$root, indexed_load_store_matchdata:$matchinfo),
|
||||||
(match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD, G_STORE):$root,
|
(match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD, G_STORE):$root,
|
||||||
|
|
|
@ -576,6 +576,24 @@ bool CombinerHelper::dominates(const MachineInstr &DefMI,
|
||||||
return isPredecessor(DefMI, UseMI);
|
return isPredecessor(DefMI, UseMI);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CombinerHelper::matchSextAlreadyExtended(MachineInstr &MI) {
|
||||||
|
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
|
||||||
|
Register SrcReg = MI.getOperand(1).getReg();
|
||||||
|
unsigned SrcSignBits = KB->computeNumSignBits(SrcReg);
|
||||||
|
unsigned NumSextBits =
|
||||||
|
MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() -
|
||||||
|
MI.getOperand(2).getImm();
|
||||||
|
return SrcSignBits >= NumSextBits;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CombinerHelper::applySextAlreadyExtended(MachineInstr &MI) {
|
||||||
|
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
|
||||||
|
MachineIRBuilder MIB(MI);
|
||||||
|
MIB.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
|
||||||
|
MI.eraseFromParent();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
|
bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
|
||||||
Register &Base, Register &Offset) {
|
Register &Base, Register &Offset) {
|
||||||
auto &MF = *MI.getParent()->getParent();
|
auto &MF = *MI.getParent()->getParent();
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
//
|
//
|
||||||
//===------------------
|
//===------------------
|
||||||
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
|
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
|
||||||
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||||
#include "llvm/Analysis/ValueTracking.h"
|
#include "llvm/Analysis/ValueTracking.h"
|
||||||
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
||||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||||
|
@ -441,6 +442,16 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
|
||||||
unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
|
unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
|
||||||
return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
|
return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
|
||||||
}
|
}
|
||||||
|
case TargetOpcode::G_SEXTLOAD: {
|
||||||
|
Register Dst = MI.getOperand(0).getReg();
|
||||||
|
LLT Ty = MRI.getType(Dst);
|
||||||
|
// TODO: add vector support
|
||||||
|
if (Ty.isVector())
|
||||||
|
break;
|
||||||
|
if (MI.hasOneMemOperand())
|
||||||
|
return Ty.getSizeInBits() - (*MI.memoperands_begin())->getSizeInBits();
|
||||||
|
break;
|
||||||
|
}
|
||||||
case TargetOpcode::G_TRUNC: {
|
case TargetOpcode::G_TRUNC: {
|
||||||
Register Src = MI.getOperand(1).getReg();
|
Register Src = MI.getOperand(1).getReg();
|
||||||
LLT SrcTy = MRI.getType(Src);
|
LLT SrcTy = MRI.getType(Src);
|
||||||
|
|
|
@ -79,6 +79,6 @@ def shuffle_vector_pseudos : GICombineGroup<[dup, rev, ext, zip, uzp, trn]>;
|
||||||
def AArch64PostLegalizerCombinerHelper
|
def AArch64PostLegalizerCombinerHelper
|
||||||
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
|
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
|
||||||
[erase_undef_store, combines_for_extload,
|
[erase_undef_store, combines_for_extload,
|
||||||
shuffle_vector_pseudos]> {
|
sext_already_extended, shuffle_vector_pseudos]> {
|
||||||
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
|
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,81 @@
|
||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
|
||||||
|
---
|
||||||
|
name: test_combine_sext_trunc_of_sextload
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0.entry:
|
||||||
|
liveins: $x0
|
||||||
|
; CHECK-LABEL: name: test_combine_sext_trunc_of_sextload
|
||||||
|
; CHECK: liveins: $x0
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||||
|
; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 2)
|
||||||
|
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[SEXTLOAD]](s64)
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
|
||||||
|
; CHECK: $w0 = COPY [[COPY1]](s32)
|
||||||
|
%0:_(p0) = COPY $x0
|
||||||
|
%1:_(s64) = G_SEXTLOAD %0:_(p0) :: (load 2)
|
||||||
|
%2:_(s32) = G_TRUNC %1:_(s64)
|
||||||
|
%3:_(s32) = G_SEXT_INREG %2:_(s32), 16
|
||||||
|
$w0 = COPY %3(s32)
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test_combine_sext_of_sextload
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0.entry:
|
||||||
|
liveins: $x0
|
||||||
|
; CHECK-LABEL: name: test_combine_sext_of_sextload
|
||||||
|
; CHECK: liveins: $x0
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||||
|
; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2)
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32)
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
|
||||||
|
; CHECK: $w0 = COPY [[COPY2]](s32)
|
||||||
|
%0:_(p0) = COPY $x0
|
||||||
|
%1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load 2)
|
||||||
|
%2:_(s32) = COPY %1:_(s32)
|
||||||
|
%3:_(s32) = G_SEXT_INREG %2:_(s32), 16
|
||||||
|
$w0 = COPY %3(s32)
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test_combine_sext_of_sextload_not_matching
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0.entry:
|
||||||
|
liveins: $x0
|
||||||
|
; Here we're trying to extend from a larger width than was extended in the load.
|
||||||
|
; CHECK-LABEL: name: test_combine_sext_of_sextload_not_matching
|
||||||
|
; CHECK: liveins: $x0
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||||
|
; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2)
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32)
|
||||||
|
; CHECK: $w0 = COPY [[COPY1]](s32)
|
||||||
|
%0:_(p0) = COPY $x0
|
||||||
|
%1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load 2)
|
||||||
|
%2:_(s32) = G_SEXT_INREG %1:_(s32), 24
|
||||||
|
$w0 = COPY %2(s32)
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: test_combine_sext_of_sextload_not_enough_src_signbits
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0.entry:
|
||||||
|
liveins: $x0
|
||||||
|
; Here we're trying to extend from a smaller width than was extended in the load.
|
||||||
|
; Don't perform the combine.
|
||||||
|
; CHECK-LABEL: name: test_combine_sext_of_sextload_not_enough_src_signbits
|
||||||
|
; CHECK: liveins: $x0
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
|
||||||
|
; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2)
|
||||||
|
; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 8
|
||||||
|
; CHECK: $w0 = COPY [[SEXT_INREG]](s32)
|
||||||
|
%0:_(p0) = COPY $x0
|
||||||
|
%1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load 2)
|
||||||
|
%2:_(s32) = G_SEXT_INREG %1:_(s32), 8
|
||||||
|
$w0 = COPY %2(s32)
|
||||||
|
...
|
Loading…
Reference in New Issue