[AArch64][GlobalISel] Add combine for G_EXTRACT_VECTOR_ELT to allow selection of pairwise FADD.

For the <2 x float> case, instead of adding another combine or legalization to
get it into a <4 x float> form, I'm just adding a GISel specific selection
pattern to cover it.

Differential Revision: https://reviews.llvm.org/D90699
This commit is contained in:
Amara Emerson 2020-11-03 11:17:31 -08:00
parent c298824f9c
commit 393b55380a
5 changed files with 337 additions and 2 deletions

View File

@ -94,6 +94,14 @@ def adjust_icmp_imm : GICombineRule <
def icmp_lowering : GICombineGroup<[adjust_icmp_imm]>;
def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple<unsigned, LLT, Register>">;
def extractvecelt_pairwise_add : GICombineRule<
(defs root:$root, extractvecelt_pairwise_add_matchdata:$matchinfo),
(match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root,
[{ return matchExtractVecEltPairwiseAdd(*${root}, MRI, ${matchinfo}); }]),
(apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }])
>;
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@ -110,6 +118,7 @@ def AArch64PostLegalizerCombinerHelper
[copy_prop, erase_undef_store, combines_for_extload,
sext_trunc_sextload,
hoist_logic_op_with_same_opcode_hands,
and_trivial_mask, xor_of_and_with_same_reg]> {
and_trivial_mask, xor_of_and_with_same_reg,
extractvecelt_pairwise_add]> {
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}

View File

@ -135,4 +135,9 @@ def : GINodeEquiv<G_TRN1, AArch64trn1>;
def : GINodeEquiv<G_TRN2, AArch64trn2>;
def : GINodeEquiv<G_EXT, AArch64ext>;
def : GINodeEquiv<G_VASHR, AArch64vashr>;
def : GINodeEquiv<G_VLSHR, AArch64vlshr>;
def : GINodeEquiv<G_VLSHR, AArch64vlshr>;
// These are patterns that we only use for GlobalISel via the importer.
def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)),
(vector_extract (v2f32 FPR64:$Rn), (i64 1)))),
(f32 (FADDPv2i32p (v2f32 FPR64:$Rn)))>;

View File

@ -24,8 +24,11 @@
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
@ -33,6 +36,74 @@
using namespace llvm;
/// This combine tries do what performExtractVectorEltCombine does in SDAG.
/// Rewrite for pairwise fadd pattern
/// (s32 (g_extract_vector_elt
/// (g_fadd (vXs32 Other)
/// (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))
/// ->
/// (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)
/// (g_extract_vector_elt (vXs32 Other) 1))
bool matchExtractVecEltPairwiseAdd(
MachineInstr &MI, MachineRegisterInfo &MRI,
std::tuple<unsigned, LLT, Register> &MatchInfo) {
Register Src1 = MI.getOperand(1).getReg();
Register Src2 = MI.getOperand(2).getReg();
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
auto Cst = getConstantVRegValWithLookThrough(Src2, MRI);
if (!Cst || Cst->Value != 0)
return false;
// SDAG also checks for FullFP16, but this looks to be beneficial anyway.
// Now check for an fadd operation. TODO: expand this for integer add?
auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI);
if (!FAddMI)
return false;
// If we add support for integer add, must restrict these types to just s64.
unsigned DstSize = DstTy.getSizeInBits();
if (DstSize != 16 && DstSize != 32 && DstSize != 64)
return false;
Register Src1Op1 = FAddMI->getOperand(1).getReg();
Register Src1Op2 = FAddMI->getOperand(2).getReg();
MachineInstr *Shuffle =
getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI);
MachineInstr *Other = MRI.getVRegDef(Src1Op1);
if (!Shuffle) {
Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI);
Other = MRI.getVRegDef(Src1Op2);
}
// We're looking for a shuffle that moves the second element to index 0.
if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 &&
Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) {
std::get<0>(MatchInfo) = TargetOpcode::G_FADD;
std::get<1>(MatchInfo) = DstTy;
std::get<2>(MatchInfo) = Other->getOperand(0).getReg();
return true;
}
return false;
}
bool applyExtractVecEltPairwiseAdd(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
std::tuple<unsigned, LLT, Register> &MatchInfo) {
unsigned Opc = std::get<0>(MatchInfo);
assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");
// We want to generate two extracts of elements 0 and 1, and add them.
LLT Ty = std::get<1>(MatchInfo);
Register Src = std::get<2>(MatchInfo);
LLT s64 = LLT::scalar(64);
B.setInstrAndDebugLoc(MI);
auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0));
auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));
B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});
MI.eraseFromParent();
return true;
}
#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPostLegalizeGICombiner.inc"
#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS

View File

@ -0,0 +1,188 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
---
name: f64_faddp
alignment: 4
legalized: true
tracksRegLiveness: true
liveins:
- { reg: '$q0' }
body: |
bb.1:
liveins: $q0
; CHECK-LABEL: name: f64_faddp
; CHECK: liveins: $q0
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64)
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C1]](s64)
; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[EVEC]], [[EVEC1]]
; CHECK: $d0 = COPY [[FADD]](s64)
; CHECK: RET_ReallyLR implicit $d0
%0:_(<2 x s64>) = COPY $q0
%2:_(<2 x s64>) = G_IMPLICIT_DEF
%5:_(s64) = G_CONSTANT i64 0
%1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(1, undef)
%3:_(<2 x s64>) = G_FADD %1, %0
%4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64)
$d0 = COPY %4(s64)
RET_ReallyLR implicit $d0
...
---
name: f64_faddp_commuted
alignment: 4
legalized: true
tracksRegLiveness: true
liveins:
- { reg: '$q0' }
body: |
bb.1:
liveins: $q0
; CHECK-LABEL: name: f64_faddp_commuted
; CHECK: liveins: $q0
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64)
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C1]](s64)
; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[EVEC]], [[EVEC1]]
; CHECK: $d0 = COPY [[FADD]](s64)
; CHECK: RET_ReallyLR implicit $d0
%0:_(<2 x s64>) = COPY $q0
%2:_(<2 x s64>) = G_IMPLICIT_DEF
%5:_(s64) = G_CONSTANT i64 0
%1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(1, undef)
%3:_(<2 x s64>) = G_FADD %0, %1
%4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64)
$d0 = COPY %4(s64)
RET_ReallyLR implicit $d0
...
---
name: f32_faddp
alignment: 4
legalized: true
tracksRegLiveness: true
liveins:
- { reg: '$d0' }
body: |
bb.1:
liveins: $d0
; CHECK-LABEL: name: f32_faddp
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C1]](s64)
; CHECK: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[EVEC]], [[EVEC1]]
; CHECK: $s0 = COPY [[FADD]](s32)
; CHECK: RET_ReallyLR implicit $s0
%0:_(<2 x s32>) = COPY $d0
%2:_(<2 x s32>) = G_IMPLICIT_DEF
%5:_(s64) = G_CONSTANT i64 0
%1:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %2, shufflemask(1, undef)
%3:_(<2 x s32>) = G_FADD %1, %0
%4:_(s32) = G_EXTRACT_VECTOR_ELT %3(<2 x s32>), %5(s64)
$s0 = COPY %4(s32)
RET_ReallyLR implicit $s0
...
---
name: f32_faddp_commuted
alignment: 4
legalized: true
tracksRegLiveness: true
liveins:
- { reg: '$d0' }
body: |
bb.1:
liveins: $d0
; CHECK-LABEL: name: f32_faddp_commuted
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C1]](s64)
; CHECK: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[EVEC]], [[EVEC1]]
; CHECK: $s0 = COPY [[FADD]](s32)
; CHECK: RET_ReallyLR implicit $s0
%0:_(<2 x s32>) = COPY $d0
%2:_(<2 x s32>) = G_IMPLICIT_DEF
%5:_(s64) = G_CONSTANT i64 0
%1:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %2, shufflemask(1, undef)
%3:_(<2 x s32>) = G_FADD %0, %1
%4:_(s32) = G_EXTRACT_VECTOR_ELT %3(<2 x s32>), %5(s64)
$s0 = COPY %4(s32)
RET_ReallyLR implicit $s0
...
---
name: wrong_extract_idx
alignment: 4
legalized: true
tracksRegLiveness: true
liveins:
- { reg: '$q0' }
body: |
bb.1:
liveins: $q0
; CHECK-LABEL: name: wrong_extract_idx
; CHECK: liveins: $q0
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[DEF]], shufflemask(1, undef)
; CHECK: [[FADD:%[0-9]+]]:_(<2 x s64>) = G_FADD [[SHUF]], [[COPY]]
; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[FADD]](<2 x s64>), [[C]](s64)
; CHECK: $d0 = COPY [[EVEC]](s64)
; CHECK: RET_ReallyLR implicit $d0
%0:_(<2 x s64>) = COPY $q0
%2:_(<2 x s64>) = G_IMPLICIT_DEF
%5:_(s64) = G_CONSTANT i64 1
%1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(1, undef)
%3:_(<2 x s64>) = G_FADD %1, %0
%4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64)
$d0 = COPY %4(s64)
RET_ReallyLR implicit $d0
...
---
name: wrong_shuffle_mask
alignment: 4
legalized: true
tracksRegLiveness: true
liveins:
- { reg: '$q0' }
body: |
bb.1:
liveins: $q0
; CHECK-LABEL: name: wrong_shuffle_mask
; CHECK: liveins: $q0
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[DEF]], shufflemask(0, undef)
; CHECK: [[FADD:%[0-9]+]]:_(<2 x s64>) = G_FADD [[SHUF]], [[COPY]]
; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[FADD]](<2 x s64>), [[C]](s64)
; CHECK: $d0 = COPY [[EVEC]](s64)
; CHECK: RET_ReallyLR implicit $d0
%0:_(<2 x s64>) = COPY $q0
%2:_(<2 x s64>) = G_IMPLICIT_DEF
%5:_(s64) = G_CONSTANT i64 0
%1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(0, undef)
%3:_(<2 x s64>) = G_FADD %1, %0
%4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64)
$d0 = COPY %4(s64)
RET_ReallyLR implicit $d0
...

View File

@ -0,0 +1,62 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-unknown -run-pass=instruction-select %s -o - | FileCheck %s
---
name: f64_faddp
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
liveins:
- { reg: '$q0' }
frameInfo:
maxAlignment: 1
body: |
bb.1:
liveins: $q0
; CHECK-LABEL: name: f64_faddp
; CHECK: liveins: $q0
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
; CHECK: [[FADDPv2i64p:%[0-9]+]]:fpr64 = FADDPv2i64p [[COPY]]
; CHECK: $d0 = COPY [[FADDPv2i64p]]
; CHECK: RET_ReallyLR implicit $d0
%0:fpr(<2 x s64>) = COPY $q0
%6:gpr(s64) = G_CONSTANT i64 0
%7:fpr(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %6(s64)
%8:gpr(s64) = G_CONSTANT i64 1
%9:fpr(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %8(s64)
%4:fpr(s64) = G_FADD %7, %9
$d0 = COPY %4(s64)
RET_ReallyLR implicit $d0
...
---
name: f32_faddp
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
liveins:
- { reg: '$d0' }
frameInfo:
maxAlignment: 1
body: |
bb.1:
liveins: $d0
; CHECK-LABEL: name: f32_faddp
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
; CHECK: [[FADDPv2i32p:%[0-9]+]]:fpr32 = FADDPv2i32p [[COPY]]
; CHECK: $s0 = COPY [[FADDPv2i32p]]
; CHECK: RET_ReallyLR implicit $s0
%0:fpr(<2 x s32>) = COPY $d0
%6:gpr(s64) = G_CONSTANT i64 0
%7:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %6(s64)
%8:gpr(s64) = G_CONSTANT i64 1
%9:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %8(s64)
%4:fpr(s32) = G_FADD %7, %9
$s0 = COPY %4(s32)
RET_ReallyLR implicit $s0
...