[AArch64][GlobalISel] Add a post-legalize combine for lowering vector-immediate G_ASHR/G_LSHR.

In order to select the immediate forms using the imported patterns, we need to
lower them into new G_VASHR/G_VLSHR target generic ops. Add a combine to do this
matching build_vector of constant operands.

With this, we get selection for free.
This commit is contained in:
Amara Emerson 2020-09-21 15:03:29 -07:00
parent 3a799deed7
commit a513fdec90
5 changed files with 255 additions and 1 deletions

View File

@ -75,11 +75,20 @@ def ext: GICombineRule <
// instruction.
def shuffle_vector_pseudos : GICombineGroup<[dup, rev, ext, zip, uzp, trn]>;
def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">;
def vashr_vlshr_imm : GICombineRule<
(defs root:$root, vashr_vlshr_imm_matchdata:$matchinfo),
(match (wip_match_opcode G_ASHR, G_LSHR):$root,
[{ return matchVAshrLshrImm(*${root}, MRI, ${matchinfo}); }]),
(apply [{ applyVAshrLshrImm(*${root}, MRI, ${matchinfo}); }])
>;
def AArch64PostLegalizerCombinerHelper
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
[copy_prop, erase_undef_store, combines_for_extload,
sext_trunc_sextload, shuffle_vector_pseudos,
hoist_logic_op_with_same_opcode_hands,
and_trivial_mask]> {
and_trivial_mask, vashr_vlshr_imm]> {
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}

View File

@ -111,6 +111,18 @@ def G_EXT: AArch64GenericInstruction {
let InOperandList = (ins type0:$v1, type0:$v2, untyped_imm_0:$imm);
}
// Represents a vector G_ASHR with an immediate.
def G_VASHR : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
}
// Represents a vector G_LSHR with an immediate.
def G_VLSHR : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
}
def : GINodeEquiv<G_REV16, AArch64rev16>;
def : GINodeEquiv<G_REV32, AArch64rev32>;
def : GINodeEquiv<G_REV64, AArch64rev64>;
@ -122,3 +134,5 @@ def : GINodeEquiv<G_DUP, AArch64dup>;
def : GINodeEquiv<G_TRN1, AArch64trn1>;
def : GINodeEquiv<G_TRN2, AArch64trn2>;
def : GINodeEquiv<G_EXT, AArch64ext>;
def : GINodeEquiv<G_VASHR, AArch64vashr>;
def : GINodeEquiv<G_VLSHR, AArch64vlshr>;

View File

@ -15,14 +15,19 @@
//===----------------------------------------------------------------------===//
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
@ -368,6 +373,45 @@ static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
return true;
}
/// isVShiftRImm - Check if this is a valid vector for the immediate
/// operand of a vector shift right operation. The value must be in the range:
/// 1 <= Value <= ElementBits for a right shift.
static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
int64_t &Cnt) {
assert(Ty.isVector() && "vector shift count is not a vector type");
MachineInstr *MI = MRI.getVRegDef(Reg);
auto Cst = getBuildVectorConstantSplat(*MI, MRI);
if (!Cst)
return false;
Cnt = *Cst;
int64_t ElementBits = Ty.getScalarSizeInBits();
return Cnt >= 1 && Cnt <= ElementBits;
}
/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
int64_t &Imm) {
assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
MI.getOpcode() == TargetOpcode::G_LSHR);
LLT Ty = MRI.getType(MI.getOperand(1).getReg());
if (!Ty.isVector())
return false;
return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
}
static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
int64_t &Imm) {
unsigned Opc = MI.getOpcode();
assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
unsigned NewOpc =
Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
MachineIRBuilder MIB(MI);
auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
MI.eraseFromParent();
return true;
}
#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPostLegalizeGICombiner.inc"
#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS

View File

@ -0,0 +1,147 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
...
---
name: ashr_v4s32
alignment: 4
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $d0, $d1
; CHECK-LABEL: name: ashr_v4s32
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK: [[VASHR:%[0-9]+]]:_(<4 x s32>) = G_VASHR [[COPY]], [[C]](s32)
; CHECK: $q0 = COPY [[VASHR]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(s32) = G_CONSTANT i32 5
%2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
%3:_(<4 x s32>) = G_ASHR %0, %2(<4 x s32>)
$q0 = COPY %3(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: lshr_v4s32
alignment: 4
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $d0, $d1
; CHECK-LABEL: name: lshr_v4s32
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK: [[VLSHR:%[0-9]+]]:_(<4 x s32>) = G_VLSHR [[COPY]], [[C]](s32)
; CHECK: $q0 = COPY [[VLSHR]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(s32) = G_CONSTANT i32 5
%2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
%3:_(<4 x s32>) = G_LSHR %0, %2(<4 x s32>)
$q0 = COPY %3(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: lshr_v8s16
alignment: 4
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $d0, $d1
; CHECK-LABEL: name: lshr_v8s16
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK: [[VLSHR:%[0-9]+]]:_(<8 x s16>) = G_VLSHR [[COPY]], [[C]](s32)
; CHECK: $q0 = COPY [[VLSHR]](<8 x s16>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<8 x s16>) = COPY $q0
%1:_(s16) = G_CONSTANT i16 5
%2:_(<8 x s16>) = G_BUILD_VECTOR %1(s16), %1(s16), %1(s16), %1(s16), %1(s16), %1(s16), %1(s16), %1(s16)
%3:_(<8 x s16>) = G_LSHR %0, %2(<8 x s16>)
$q0 = COPY %3(<8 x s16>)
RET_ReallyLR implicit $q0
...
---
name: imm_too_large
alignment: 4
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $d0, $d1
; CHECK-LABEL: name: imm_too_large
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>)
; CHECK: $q0 = COPY [[LSHR]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(s32) = G_CONSTANT i32 40
%2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
%3:_(<4 x s32>) = G_LSHR %0, %2(<4 x s32>)
$q0 = COPY %3(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: imm_zero
alignment: 4
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $d0, $d1
; CHECK-LABEL: name: imm_zero
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>)
; CHECK: $q0 = COPY [[LSHR]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(s32) = G_CONSTANT i32 0
%2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
%3:_(<4 x s32>) = G_LSHR %0, %2(<4 x s32>)
$q0 = COPY %3(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: imm_not_splat
alignment: 4
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $d0, $d1
; CHECK-LABEL: name: imm_not_splat
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C]](s32), [[C]](s32)
; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>)
; CHECK: $q0 = COPY [[LSHR]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(s32) = G_CONSTANT i32 4
%4:_(s32) = G_CONSTANT i32 6
%2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %4(s32), %1(s32), %1(s32)
%3:_(<4 x s32>) = G_LSHR %0, %2(<4 x s32>)
$q0 = COPY %3(<4 x s32>)
RET_ReallyLR implicit $q0
...

View File

@ -372,3 +372,43 @@ body: |
$d0 = COPY %2(<4 x s16>)
RET_ReallyLR implicit $d0
...
---
name: vashr_v4i16_imm
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $d0, $d1
; CHECK-LABEL: name: vashr_v4i16_imm
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[SSHRv4i16_shift:%[0-9]+]]:fpr64 = SSHRv4i16_shift [[COPY]], 5
; CHECK: $d0 = COPY [[SSHRv4i16_shift]]
; CHECK: RET_ReallyLR implicit $d0
%0:fpr(<4 x s16>) = COPY $d0
%1:gpr(s32) = G_CONSTANT i32 5
%2:fpr(<4 x s16>) = G_VASHR %0, %1
$d0 = COPY %2(<4 x s16>)
RET_ReallyLR implicit $d0
...
---
name: vlshr_v4i16_imm
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1:
liveins: $d0, $d1
; CHECK-LABEL: name: vlshr_v4i16_imm
; CHECK: liveins: $d0, $d1
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[USHRv4i16_shift:%[0-9]+]]:fpr64 = USHRv4i16_shift [[COPY]], 5
; CHECK: $d0 = COPY [[USHRv4i16_shift]]
; CHECK: RET_ReallyLR implicit $d0
%0:fpr(<4 x s16>) = COPY $d0
%1:gpr(s32) = G_CONSTANT i32 5
%2:fpr(<4 x s16>) = G_VLSHR %0, %1
$d0 = COPY %2(<4 x s16>)
RET_ReallyLR implicit $d0
...