forked from OSchip/llvm-project
[AArch64][GlobalISel] Add a combine to fold away truncate in: G_ICMP EQ/NE (G_TRUNC(v), 0)
We try to do this optimization if we can determine that testing for the truncated bits with an eq/ne predicate results in the same thing as testing the lower bits. Differential Revision: https://reviews.llvm.org/D95645
This commit is contained in:
parent
e9dc94291e
commit
be62b3ba34
|
@ -17,9 +17,17 @@ def fconstant_to_constant : GICombineRule<
|
|||
[{ return matchFConstantToConstant(*${root}, MRI); }]),
|
||||
(apply [{ applyFConstantToConstant(*${root}); }])>;
|
||||
|
||||
def icmp_redundant_trunc_matchdata : GIDefMatchData<"Register">;
|
||||
def icmp_redundant_trunc : GICombineRule<
|
||||
(defs root:$root, icmp_redundant_trunc_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_ICMP):$root,
|
||||
[{ return matchICmpRedundantTrunc(*${root}, MRI, Helper.getKnownBits(), ${matchinfo}); }]),
|
||||
(apply [{ applyICmpRedundantTrunc(*${root}, MRI, B, Observer, ${matchinfo}); }])>;
|
||||
|
||||
def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
|
||||
"AArch64GenPreLegalizerCombinerHelper", [all_combines,
|
||||
fconstant_to_constant]> {
|
||||
fconstant_to_constant,
|
||||
icmp_redundant_trunc]> {
|
||||
let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
|
||||
let StateClass = "AArch64PreLegalizerCombinerHelperState";
|
||||
let AdditionalArguments = [];
|
||||
|
|
|
@ -17,9 +17,13 @@
|
|||
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
|
||||
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
|
||||
|
@ -53,6 +57,56 @@ static void applyFConstantToConstant(MachineInstr &MI) {
|
|||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
|
||||
/// are sign bits. In this case, we can transform the G_ICMP to directly compare
|
||||
/// the wide value with a zero.
|
||||
static bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
GISelKnownBits *KB, Register &MatchInfo) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_ICMP && KB);
|
||||
|
||||
auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
|
||||
if (!ICmpInst::isEquality(Pred))
|
||||
return false;
|
||||
|
||||
Register LHS = MI.getOperand(2).getReg();
|
||||
LLT LHSTy = MRI.getType(LHS);
|
||||
if (!LHSTy.isScalar())
|
||||
return false;
|
||||
|
||||
Register RHS = MI.getOperand(3).getReg();
|
||||
Register WideReg;
|
||||
|
||||
if (!mi_match(LHS, MRI, m_GTrunc(m_Reg(WideReg))) ||
|
||||
!mi_match(RHS, MRI, m_SpecificICst(0)))
|
||||
return false;
|
||||
|
||||
LLT WideTy = MRI.getType(WideReg);
|
||||
if (KB->computeNumSignBits(WideReg) <=
|
||||
WideTy.getSizeInBits() - LHSTy.getSizeInBits())
|
||||
return false;
|
||||
|
||||
MatchInfo = WideReg;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &Builder,
|
||||
GISelChangeObserver &Observer,
|
||||
Register &WideReg) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_ICMP);
|
||||
|
||||
LLT WideTy = MRI.getType(WideReg);
|
||||
// We're going to directly use the wide register as the LHS, and then use an
|
||||
// equivalent size zero for RHS.
|
||||
Builder.setInstrAndDebugLoc(MI);
|
||||
auto WideZero = Builder.buildConstant(WideTy, 0);
|
||||
Observer.changingInstr(MI);
|
||||
MI.getOperand(2).setReg(WideReg);
|
||||
MI.getOperand(3).setReg(WideZero.getReg(0));
|
||||
Observer.changedInstr(MI);
|
||||
return true;
|
||||
}
|
||||
|
||||
class AArch64PreLegalizerCombinerHelperState {
|
||||
protected:
|
||||
CombinerHelper &Helper;
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner %s -o - -verify-machineinstrs | FileCheck %s
|
||||
|
||||
# This test checks the optimization to remove the G_TRUNC if we can determine it's redundant.
|
||||
---
|
||||
name: icmp_trunc_sextload
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: icmp_trunc_sextload
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %v:_(p0) = COPY $x0
|
||||
; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK: %cmp:_(s1) = G_ICMP intpred(ne), %load(s64), [[C]]
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
|
||||
; CHECK: $w0 = COPY [[ANYEXT]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%v:_(p0) = COPY $x0
|
||||
%load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
|
||||
%trunc:_(s32) = G_TRUNC %load(s64)
|
||||
%zero:_(s32) = G_CONSTANT i32 0
|
||||
%cmp:_(s1) = G_ICMP intpred(ne), %trunc(s32), %zero
|
||||
%5:_(s32) = G_ANYEXT %cmp
|
||||
$w0 = COPY %5(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
name: icmp_trunc_sextload_eq
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: icmp_trunc_sextload_eq
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %v:_(p0) = COPY $x0
|
||||
; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK: %cmp:_(s1) = G_ICMP intpred(eq), %load(s64), [[C]]
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
|
||||
; CHECK: $w0 = COPY [[ANYEXT]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%v:_(p0) = COPY $x0
|
||||
%load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
|
||||
%trunc:_(s32) = G_TRUNC %load(s64)
|
||||
%zero:_(s32) = G_CONSTANT i32 0
|
||||
%cmp:_(s1) = G_ICMP intpred(eq), %trunc(s32), %zero
|
||||
%5:_(s32) = G_ANYEXT %cmp
|
||||
$w0 = COPY %5(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
name: icmp_trunc_sextload_wrongpred
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: icmp_trunc_sextload_wrongpred
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %v:_(p0) = COPY $x0
|
||||
; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
|
||||
; CHECK: %trunc:_(s32) = G_TRUNC %load(s64)
|
||||
; CHECK: %zero:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: %cmp:_(s1) = G_ICMP intpred(slt), %trunc(s32), %zero
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
|
||||
; CHECK: $w0 = COPY [[ANYEXT]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%v:_(p0) = COPY $x0
|
||||
%load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
|
||||
%trunc:_(s32) = G_TRUNC %load(s64)
|
||||
%zero:_(s32) = G_CONSTANT i32 0
|
||||
%cmp:_(s1) = G_ICMP intpred(slt), %trunc(s32), %zero
|
||||
%5:_(s32) = G_ANYEXT %cmp
|
||||
$w0 = COPY %5(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
---
|
||||
name: icmp_trunc_sextload_extend_mismatch
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
|
||||
; CHECK-LABEL: name: icmp_trunc_sextload_extend_mismatch
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %v:_(p0) = COPY $x0
|
||||
; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
|
||||
; CHECK: %trunc:_(s16) = G_TRUNC %load(s64)
|
||||
; CHECK: %zero:_(s16) = G_CONSTANT i16 0
|
||||
; CHECK: %cmp:_(s1) = G_ICMP intpred(ne), %trunc(s16), %zero
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
|
||||
; CHECK: $w0 = COPY [[ANYEXT]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%v:_(p0) = COPY $x0
|
||||
%load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
|
||||
%trunc:_(s16) = G_TRUNC %load(s64)
|
||||
%zero:_(s16) = G_CONSTANT i16 0
|
||||
%cmp:_(s1) = G_ICMP intpred(ne), %trunc(s16), %zero
|
||||
%5:_(s32) = G_ANYEXT %cmp
|
||||
$w0 = COPY %5(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
|
Loading…
Reference in New Issue