forked from OSchip/llvm-project
[AArch64][GlobalISel] Add a post-legalize combine for lowering vector-immediate G_ASHR/G_LSHR.
In order to select the immediate forms using the imported patterns, we need to lower them into new G_VASHR/G_VLSHR target generic ops. Add a combine to do this matching build_vector of constant operands. With this, we get selection for free.
This commit is contained in:
parent
3a799deed7
commit
a513fdec90
|
@ -75,11 +75,20 @@ def ext: GICombineRule <
|
|||
// instruction.
|
||||
def shuffle_vector_pseudos : GICombineGroup<[dup, rev, ext, zip, uzp, trn]>;
|
||||
|
||||
def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">;
|
||||
def vashr_vlshr_imm : GICombineRule<
|
||||
(defs root:$root, vashr_vlshr_imm_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_ASHR, G_LSHR):$root,
|
||||
[{ return matchVAshrLshrImm(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ applyVAshrLshrImm(*${root}, MRI, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
|
||||
def AArch64PostLegalizerCombinerHelper
|
||||
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
|
||||
[copy_prop, erase_undef_store, combines_for_extload,
|
||||
sext_trunc_sextload, shuffle_vector_pseudos,
|
||||
hoist_logic_op_with_same_opcode_hands,
|
||||
and_trivial_mask]> {
|
||||
and_trivial_mask, vashr_vlshr_imm]> {
|
||||
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
|
||||
}
|
||||
|
|
|
@ -111,6 +111,18 @@ def G_EXT: AArch64GenericInstruction {
|
|||
let InOperandList = (ins type0:$v1, type0:$v2, untyped_imm_0:$imm);
|
||||
}
|
||||
|
||||
// Represents a vector G_ASHR with an immediate.
|
||||
def G_VASHR : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
|
||||
}
|
||||
|
||||
// Represents a vector G_LSHR with an immediate.
|
||||
def G_VLSHR : AArch64GenericInstruction {
|
||||
let OutOperandList = (outs type0:$dst);
|
||||
let InOperandList = (ins type0:$src1, untyped_imm_0:$imm);
|
||||
}
|
||||
|
||||
def : GINodeEquiv<G_REV16, AArch64rev16>;
|
||||
def : GINodeEquiv<G_REV32, AArch64rev32>;
|
||||
def : GINodeEquiv<G_REV64, AArch64rev64>;
|
||||
|
@ -122,3 +134,5 @@ def : GINodeEquiv<G_DUP, AArch64dup>;
|
|||
def : GINodeEquiv<G_TRN1, AArch64trn1>;
|
||||
def : GINodeEquiv<G_TRN2, AArch64trn2>;
|
||||
def : GINodeEquiv<G_EXT, AArch64ext>;
|
||||
def : GINodeEquiv<G_VASHR, AArch64vashr>;
|
||||
def : GINodeEquiv<G_VLSHR, AArch64vlshr>;
|
|
@ -15,14 +15,19 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AArch64TargetMachine.h"
|
||||
#include "MCTargetDesc/AArch64MCTargetDesc.h"
|
||||
#include "llvm/CodeGen/GlobalISel/Combiner.h"
|
||||
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
|
||||
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
|
||||
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetOpcodes.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
|
@ -368,6 +373,45 @@ static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
|
|||
return true;
|
||||
}
|
||||
|
||||
/// isVShiftRImm - Check if this is a valid vector for the immediate
|
||||
/// operand of a vector shift right operation. The value must be in the range:
|
||||
/// 1 <= Value <= ElementBits for a right shift.
|
||||
static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
|
||||
int64_t &Cnt) {
|
||||
assert(Ty.isVector() && "vector shift count is not a vector type");
|
||||
MachineInstr *MI = MRI.getVRegDef(Reg);
|
||||
auto Cst = getBuildVectorConstantSplat(*MI, MRI);
|
||||
if (!Cst)
|
||||
return false;
|
||||
Cnt = *Cst;
|
||||
int64_t ElementBits = Ty.getScalarSizeInBits();
|
||||
return Cnt >= 1 && Cnt <= ElementBits;
|
||||
}
|
||||
|
||||
/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
|
||||
static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
int64_t &Imm) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
|
||||
MI.getOpcode() == TargetOpcode::G_LSHR);
|
||||
LLT Ty = MRI.getType(MI.getOperand(1).getReg());
|
||||
if (!Ty.isVector())
|
||||
return false;
|
||||
return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
|
||||
}
|
||||
|
||||
static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
int64_t &Imm) {
|
||||
unsigned Opc = MI.getOpcode();
|
||||
assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
|
||||
unsigned NewOpc =
|
||||
Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
|
||||
MachineIRBuilder MIB(MI);
|
||||
auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
|
||||
MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
|
||||
#include "AArch64GenPostLegalizeGICombiner.inc"
|
||||
#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
|
||||
|
|
|
@ -0,0 +1,147 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
...
|
||||
---
|
||||
name: ashr_v4s32
|
||||
alignment: 4
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $d0, $d1
|
||||
|
||||
; CHECK-LABEL: name: ashr_v4s32
|
||||
; CHECK: liveins: $d0, $d1
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
|
||||
; CHECK: [[VASHR:%[0-9]+]]:_(<4 x s32>) = G_VASHR [[COPY]], [[C]](s32)
|
||||
; CHECK: $q0 = COPY [[VASHR]](<4 x s32>)
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%0:_(<4 x s32>) = COPY $q0
|
||||
%1:_(s32) = G_CONSTANT i32 5
|
||||
%2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
|
||||
%3:_(<4 x s32>) = G_ASHR %0, %2(<4 x s32>)
|
||||
$q0 = COPY %3(<4 x s32>)
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
---
|
||||
name: lshr_v4s32
|
||||
alignment: 4
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $d0, $d1
|
||||
|
||||
; CHECK-LABEL: name: lshr_v4s32
|
||||
; CHECK: liveins: $d0, $d1
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
|
||||
; CHECK: [[VLSHR:%[0-9]+]]:_(<4 x s32>) = G_VLSHR [[COPY]], [[C]](s32)
|
||||
; CHECK: $q0 = COPY [[VLSHR]](<4 x s32>)
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%0:_(<4 x s32>) = COPY $q0
|
||||
%1:_(s32) = G_CONSTANT i32 5
|
||||
%2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
|
||||
%3:_(<4 x s32>) = G_LSHR %0, %2(<4 x s32>)
|
||||
$q0 = COPY %3(<4 x s32>)
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
---
|
||||
name: lshr_v8s16
|
||||
alignment: 4
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $d0, $d1
|
||||
|
||||
; CHECK-LABEL: name: lshr_v8s16
|
||||
; CHECK: liveins: $d0, $d1
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
|
||||
; CHECK: [[VLSHR:%[0-9]+]]:_(<8 x s16>) = G_VLSHR [[COPY]], [[C]](s32)
|
||||
; CHECK: $q0 = COPY [[VLSHR]](<8 x s16>)
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%0:_(<8 x s16>) = COPY $q0
|
||||
%1:_(s16) = G_CONSTANT i16 5
|
||||
%2:_(<8 x s16>) = G_BUILD_VECTOR %1(s16), %1(s16), %1(s16), %1(s16), %1(s16), %1(s16), %1(s16), %1(s16)
|
||||
%3:_(<8 x s16>) = G_LSHR %0, %2(<8 x s16>)
|
||||
$q0 = COPY %3(<8 x s16>)
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
---
|
||||
name: imm_too_large
|
||||
alignment: 4
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $d0, $d1
|
||||
|
||||
; CHECK-LABEL: name: imm_too_large
|
||||
; CHECK: liveins: $d0, $d1
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK: $q0 = COPY [[LSHR]](<4 x s32>)
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%0:_(<4 x s32>) = COPY $q0
|
||||
%1:_(s32) = G_CONSTANT i32 40
|
||||
%2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
|
||||
%3:_(<4 x s32>) = G_LSHR %0, %2(<4 x s32>)
|
||||
$q0 = COPY %3(<4 x s32>)
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
---
|
||||
name: imm_zero
|
||||
alignment: 4
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $d0, $d1
|
||||
|
||||
; CHECK-LABEL: name: imm_zero
|
||||
; CHECK: liveins: $d0, $d1
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK: $q0 = COPY [[LSHR]](<4 x s32>)
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%0:_(<4 x s32>) = COPY $q0
|
||||
%1:_(s32) = G_CONSTANT i32 0
|
||||
%2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
|
||||
%3:_(<4 x s32>) = G_LSHR %0, %2(<4 x s32>)
|
||||
$q0 = COPY %3(<4 x s32>)
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
---
|
||||
name: imm_not_splat
|
||||
alignment: 4
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1.entry:
|
||||
liveins: $d0, $d1
|
||||
|
||||
; CHECK-LABEL: name: imm_not_splat
|
||||
; CHECK: liveins: $d0, $d1
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C]](s32), [[C]](s32)
|
||||
; CHECK: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[COPY]], [[BUILD_VECTOR]](<4 x s32>)
|
||||
; CHECK: $q0 = COPY [[LSHR]](<4 x s32>)
|
||||
; CHECK: RET_ReallyLR implicit $q0
|
||||
%0:_(<4 x s32>) = COPY $q0
|
||||
%1:_(s32) = G_CONSTANT i32 4
|
||||
%4:_(s32) = G_CONSTANT i32 6
|
||||
%2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %4(s32), %1(s32), %1(s32)
|
||||
%3:_(<4 x s32>) = G_LSHR %0, %2(<4 x s32>)
|
||||
$q0 = COPY %3(<4 x s32>)
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
|
@ -372,3 +372,43 @@ body: |
|
|||
$d0 = COPY %2(<4 x s16>)
|
||||
RET_ReallyLR implicit $d0
|
||||
...
|
||||
---
|
||||
name: vashr_v4i16_imm
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $d0, $d1
|
||||
; CHECK-LABEL: name: vashr_v4i16_imm
|
||||
; CHECK: liveins: $d0, $d1
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
|
||||
; CHECK: [[SSHRv4i16_shift:%[0-9]+]]:fpr64 = SSHRv4i16_shift [[COPY]], 5
|
||||
; CHECK: $d0 = COPY [[SSHRv4i16_shift]]
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%0:fpr(<4 x s16>) = COPY $d0
|
||||
%1:gpr(s32) = G_CONSTANT i32 5
|
||||
%2:fpr(<4 x s16>) = G_VASHR %0, %1
|
||||
$d0 = COPY %2(<4 x s16>)
|
||||
RET_ReallyLR implicit $d0
|
||||
...
|
||||
---
|
||||
name: vlshr_v4i16_imm
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $d0, $d1
|
||||
; CHECK-LABEL: name: vlshr_v4i16_imm
|
||||
; CHECK: liveins: $d0, $d1
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
|
||||
; CHECK: [[USHRv4i16_shift:%[0-9]+]]:fpr64 = USHRv4i16_shift [[COPY]], 5
|
||||
; CHECK: $d0 = COPY [[USHRv4i16_shift]]
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%0:fpr(<4 x s16>) = COPY $d0
|
||||
%1:gpr(s32) = G_CONSTANT i32 5
|
||||
%2:fpr(<4 x s16>) = G_VLSHR %0, %1
|
||||
$d0 = COPY %2(<4 x s16>)
|
||||
RET_ReallyLR implicit $d0
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue