[AArch64][GlobalISel] Add a combine to fold away truncate in: G_ICMP EQ/NE (G_TRUNC(v), 0)

We try to do this optimization if we can determine that testing for the
truncated bits with an eq/ne predicate results in the same thing as testing
the lower bits.

Differential Revision: https://reviews.llvm.org/D95645
This commit is contained in:
Amara Emerson 2021-01-26 23:39:56 -08:00
parent e9dc94291e
commit be62b3ba34
3 changed files with 170 additions and 1 deletions

View File

@ -17,9 +17,17 @@ def fconstant_to_constant : GICombineRule<
[{ return matchFConstantToConstant(*${root}, MRI); }]),
(apply [{ applyFConstantToConstant(*${root}); }])>;
def icmp_redundant_trunc_matchdata : GIDefMatchData<"Register">;
def icmp_redundant_trunc : GICombineRule<
(defs root:$root, icmp_redundant_trunc_matchdata:$matchinfo),
(match (wip_match_opcode G_ICMP):$root,
[{ return matchICmpRedundantTrunc(*${root}, MRI, Helper.getKnownBits(), ${matchinfo}); }]),
(apply [{ applyICmpRedundantTrunc(*${root}, MRI, B, Observer, ${matchinfo}); }])>;
def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
"AArch64GenPreLegalizerCombinerHelper", [all_combines,
fconstant_to_constant]> {
fconstant_to_constant,
icmp_redundant_trunc]> {
let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
let StateClass = "AArch64PreLegalizerCombinerHelperState";
let AdditionalArguments = [];

View File

@ -17,9 +17,13 @@
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
@ -53,6 +57,56 @@ static void applyFConstantToConstant(MachineInstr &MI) {
MI.eraseFromParent();
}
/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
/// are sign bits. In this case, we can transform the G_ICMP to directly compare
/// the wide value with a zero.
static bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
GISelKnownBits *KB, Register &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_ICMP && KB);
auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
if (!ICmpInst::isEquality(Pred))
return false;
Register LHS = MI.getOperand(2).getReg();
LLT LHSTy = MRI.getType(LHS);
if (!LHSTy.isScalar())
return false;
Register RHS = MI.getOperand(3).getReg();
Register WideReg;
if (!mi_match(LHS, MRI, m_GTrunc(m_Reg(WideReg))) ||
!mi_match(RHS, MRI, m_SpecificICst(0)))
return false;
LLT WideTy = MRI.getType(WideReg);
if (KB->computeNumSignBits(WideReg) <=
WideTy.getSizeInBits() - LHSTy.getSizeInBits())
return false;
MatchInfo = WideReg;
return true;
}
static bool applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &Builder,
GISelChangeObserver &Observer,
Register &WideReg) {
assert(MI.getOpcode() == TargetOpcode::G_ICMP);
LLT WideTy = MRI.getType(WideReg);
// We're going to directly use the wide register as the LHS, and then use an
// equivalent size zero for RHS.
Builder.setInstrAndDebugLoc(MI);
auto WideZero = Builder.buildConstant(WideTy, 0);
Observer.changingInstr(MI);
MI.getOperand(2).setReg(WideReg);
MI.getOperand(3).setReg(WideZero.getReg(0));
Observer.changedInstr(MI);
return true;
}
class AArch64PreLegalizerCombinerHelperState {
protected:
CombinerHelper &Helper;

View File

@ -0,0 +1,107 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner %s -o - -verify-machineinstrs | FileCheck %s
# This test checks the optimization to remove the G_TRUNC if we can determine it's redundant.
---
name: icmp_trunc_sextload
tracksRegLiveness: true
body: |
bb.1:
liveins: $x0
; CHECK-LABEL: name: icmp_trunc_sextload
; CHECK: liveins: $x0
; CHECK: %v:_(p0) = COPY $x0
; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: %cmp:_(s1) = G_ICMP intpred(ne), %load(s64), [[C]]
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
; CHECK: $w0 = COPY [[ANYEXT]](s32)
; CHECK: RET_ReallyLR implicit $w0
%v:_(p0) = COPY $x0
%load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
%trunc:_(s32) = G_TRUNC %load(s64)
%zero:_(s32) = G_CONSTANT i32 0
%cmp:_(s1) = G_ICMP intpred(ne), %trunc(s32), %zero
%5:_(s32) = G_ANYEXT %cmp
$w0 = COPY %5(s32)
RET_ReallyLR implicit $w0
...
---
name: icmp_trunc_sextload_eq
tracksRegLiveness: true
body: |
bb.1:
liveins: $x0
; CHECK-LABEL: name: icmp_trunc_sextload_eq
; CHECK: liveins: $x0
; CHECK: %v:_(p0) = COPY $x0
; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: %cmp:_(s1) = G_ICMP intpred(eq), %load(s64), [[C]]
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
; CHECK: $w0 = COPY [[ANYEXT]](s32)
; CHECK: RET_ReallyLR implicit $w0
%v:_(p0) = COPY $x0
%load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
%trunc:_(s32) = G_TRUNC %load(s64)
%zero:_(s32) = G_CONSTANT i32 0
%cmp:_(s1) = G_ICMP intpred(eq), %trunc(s32), %zero
%5:_(s32) = G_ANYEXT %cmp
$w0 = COPY %5(s32)
RET_ReallyLR implicit $w0
...
---
name: icmp_trunc_sextload_wrongpred
tracksRegLiveness: true
body: |
bb.1:
liveins: $x0
; CHECK-LABEL: name: icmp_trunc_sextload_wrongpred
; CHECK: liveins: $x0
; CHECK: %v:_(p0) = COPY $x0
; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
; CHECK: %trunc:_(s32) = G_TRUNC %load(s64)
; CHECK: %zero:_(s32) = G_CONSTANT i32 0
; CHECK: %cmp:_(s1) = G_ICMP intpred(slt), %trunc(s32), %zero
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
; CHECK: $w0 = COPY [[ANYEXT]](s32)
; CHECK: RET_ReallyLR implicit $w0
%v:_(p0) = COPY $x0
%load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
%trunc:_(s32) = G_TRUNC %load(s64)
%zero:_(s32) = G_CONSTANT i32 0
%cmp:_(s1) = G_ICMP intpred(slt), %trunc(s32), %zero
%5:_(s32) = G_ANYEXT %cmp
$w0 = COPY %5(s32)
RET_ReallyLR implicit $w0
...
---
name: icmp_trunc_sextload_extend_mismatch
tracksRegLiveness: true
body: |
bb.1:
liveins: $x0
; CHECK-LABEL: name: icmp_trunc_sextload_extend_mismatch
; CHECK: liveins: $x0
; CHECK: %v:_(p0) = COPY $x0
; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
; CHECK: %trunc:_(s16) = G_TRUNC %load(s64)
; CHECK: %zero:_(s16) = G_CONSTANT i16 0
; CHECK: %cmp:_(s1) = G_ICMP intpred(ne), %trunc(s16), %zero
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
; CHECK: $w0 = COPY [[ANYEXT]](s32)
; CHECK: RET_ReallyLR implicit $w0
%v:_(p0) = COPY $x0
%load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
%trunc:_(s16) = G_TRUNC %load(s64)
%zero:_(s16) = G_CONSTANT i16 0
%cmp:_(s1) = G_ICMP intpred(ne), %trunc(s16), %zero
%5:_(s32) = G_ANYEXT %cmp
$w0 = COPY %5(s32)
RET_ReallyLR implicit $w0
...