forked from OSchip/llvm-project
[AArch64][GlobalISel] Add combine for G_EXTRACT_VECTOR_ELT to allow selection of pairwise FADD.
For the <2 x float> case, instead of adding another combine or legalization to get it into a <4 x float> form, I'm just adding a GISel specific selection pattern to cover it. Differential Revision: https://reviews.llvm.org/D90699
This commit is contained in:
parent
c298824f9c
commit
393b55380a
|
@ -94,6 +94,14 @@ def adjust_icmp_imm : GICombineRule <
|
|||
|
||||
def icmp_lowering : GICombineGroup<[adjust_icmp_imm]>;
|
||||
|
||||
def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple<unsigned, LLT, Register>">;
|
||||
def extractvecelt_pairwise_add : GICombineRule<
|
||||
(defs root:$root, extractvecelt_pairwise_add_matchdata:$matchinfo),
|
||||
(match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root,
|
||||
[{ return matchExtractVecEltPairwiseAdd(*${root}, MRI, ${matchinfo}); }]),
|
||||
(apply [{ applyExtractVecEltPairwiseAdd(*${root}, MRI, B, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
// Post-legalization combines which should happen at all optimization levels.
|
||||
// (E.g. ones that facilitate matching for the selector) For example, matching
|
||||
// pseudos.
|
||||
|
@ -110,6 +118,7 @@ def AArch64PostLegalizerCombinerHelper
|
|||
[copy_prop, erase_undef_store, combines_for_extload,
|
||||
sext_trunc_sextload,
|
||||
hoist_logic_op_with_same_opcode_hands,
|
||||
and_trivial_mask, xor_of_and_with_same_reg]> {
|
||||
and_trivial_mask, xor_of_and_with_same_reg,
|
||||
extractvecelt_pairwise_add]> {
|
||||
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
|
||||
}
|
||||
|
|
|
@ -135,4 +135,9 @@ def : GINodeEquiv<G_TRN1, AArch64trn1>;
|
|||
def : GINodeEquiv<G_TRN2, AArch64trn2>;
|
||||
def : GINodeEquiv<G_EXT, AArch64ext>;
|
||||
def : GINodeEquiv<G_VASHR, AArch64vashr>;
|
||||
def : GINodeEquiv<G_VLSHR, AArch64vlshr>;
|
||||
def : GINodeEquiv<G_VLSHR, AArch64vlshr>;
|
||||
|
||||
// These are patterns that we only use for GlobalISel via the importer.
|
||||
def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)),
|
||||
(vector_extract (v2f32 FPR64:$Rn), (i64 1)))),
|
||||
(f32 (FADDPv2i32p (v2f32 FPR64:$Rn)))>;
|
|
@ -24,8 +24,11 @@
|
|||
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
|
||||
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
|
||||
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||
#include "llvm/CodeGen/GlobalISel/Utils.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/TargetOpcodes.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
|
@ -33,6 +36,74 @@
|
|||
|
||||
using namespace llvm;
|
||||
|
||||
/// This combine tries do what performExtractVectorEltCombine does in SDAG.
|
||||
/// Rewrite for pairwise fadd pattern
|
||||
/// (s32 (g_extract_vector_elt
|
||||
/// (g_fadd (vXs32 Other)
|
||||
/// (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))
|
||||
/// ->
|
||||
/// (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)
|
||||
/// (g_extract_vector_elt (vXs32 Other) 1))
|
||||
bool matchExtractVecEltPairwiseAdd(
|
||||
MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
std::tuple<unsigned, LLT, Register> &MatchInfo) {
|
||||
Register Src1 = MI.getOperand(1).getReg();
|
||||
Register Src2 = MI.getOperand(2).getReg();
|
||||
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
|
||||
|
||||
auto Cst = getConstantVRegValWithLookThrough(Src2, MRI);
|
||||
if (!Cst || Cst->Value != 0)
|
||||
return false;
|
||||
// SDAG also checks for FullFP16, but this looks to be beneficial anyway.
|
||||
|
||||
// Now check for an fadd operation. TODO: expand this for integer add?
|
||||
auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI);
|
||||
if (!FAddMI)
|
||||
return false;
|
||||
|
||||
// If we add support for integer add, must restrict these types to just s64.
|
||||
unsigned DstSize = DstTy.getSizeInBits();
|
||||
if (DstSize != 16 && DstSize != 32 && DstSize != 64)
|
||||
return false;
|
||||
|
||||
Register Src1Op1 = FAddMI->getOperand(1).getReg();
|
||||
Register Src1Op2 = FAddMI->getOperand(2).getReg();
|
||||
MachineInstr *Shuffle =
|
||||
getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI);
|
||||
MachineInstr *Other = MRI.getVRegDef(Src1Op1);
|
||||
if (!Shuffle) {
|
||||
Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI);
|
||||
Other = MRI.getVRegDef(Src1Op2);
|
||||
}
|
||||
|
||||
// We're looking for a shuffle that moves the second element to index 0.
|
||||
if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 &&
|
||||
Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) {
|
||||
std::get<0>(MatchInfo) = TargetOpcode::G_FADD;
|
||||
std::get<1>(MatchInfo) = DstTy;
|
||||
std::get<2>(MatchInfo) = Other->getOperand(0).getReg();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool applyExtractVecEltPairwiseAdd(
|
||||
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
|
||||
std::tuple<unsigned, LLT, Register> &MatchInfo) {
|
||||
unsigned Opc = std::get<0>(MatchInfo);
|
||||
assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");
|
||||
// We want to generate two extracts of elements 0 and 1, and add them.
|
||||
LLT Ty = std::get<1>(MatchInfo);
|
||||
Register Src = std::get<2>(MatchInfo);
|
||||
LLT s64 = LLT::scalar(64);
|
||||
B.setInstrAndDebugLoc(MI);
|
||||
auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0));
|
||||
auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));
|
||||
B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
|
||||
#include "AArch64GenPostLegalizeGICombiner.inc"
|
||||
#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
|
||||
|
|
|
@ -0,0 +1,188 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
|
||||
---
|
||||
name: f64_faddp
|
||||
alignment: 4
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$q0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: f64_faddp
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C1]](s64)
|
||||
; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[EVEC]], [[EVEC1]]
|
||||
; CHECK: $d0 = COPY [[FADD]](s64)
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%0:_(<2 x s64>) = COPY $q0
|
||||
%2:_(<2 x s64>) = G_IMPLICIT_DEF
|
||||
%5:_(s64) = G_CONSTANT i64 0
|
||||
%1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(1, undef)
|
||||
%3:_(<2 x s64>) = G_FADD %1, %0
|
||||
%4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64)
|
||||
$d0 = COPY %4(s64)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
||||
---
|
||||
name: f64_faddp_commuted
|
||||
alignment: 4
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$q0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: f64_faddp_commuted
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C1]](s64)
|
||||
; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[EVEC]], [[EVEC1]]
|
||||
; CHECK: $d0 = COPY [[FADD]](s64)
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%0:_(<2 x s64>) = COPY $q0
|
||||
%2:_(<2 x s64>) = G_IMPLICIT_DEF
|
||||
%5:_(s64) = G_CONSTANT i64 0
|
||||
%1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(1, undef)
|
||||
%3:_(<2 x s64>) = G_FADD %0, %1
|
||||
%4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64)
|
||||
$d0 = COPY %4(s64)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
||||
---
|
||||
name: f32_faddp
|
||||
alignment: 4
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$d0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $d0
|
||||
|
||||
; CHECK-LABEL: name: f32_faddp
|
||||
; CHECK: liveins: $d0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C1]](s64)
|
||||
; CHECK: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[EVEC]], [[EVEC1]]
|
||||
; CHECK: $s0 = COPY [[FADD]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $s0
|
||||
%0:_(<2 x s32>) = COPY $d0
|
||||
%2:_(<2 x s32>) = G_IMPLICIT_DEF
|
||||
%5:_(s64) = G_CONSTANT i64 0
|
||||
%1:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %2, shufflemask(1, undef)
|
||||
%3:_(<2 x s32>) = G_FADD %1, %0
|
||||
%4:_(s32) = G_EXTRACT_VECTOR_ELT %3(<2 x s32>), %5(s64)
|
||||
$s0 = COPY %4(s32)
|
||||
RET_ReallyLR implicit $s0
|
||||
|
||||
...
|
||||
---
|
||||
name: f32_faddp_commuted
|
||||
alignment: 4
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$d0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $d0
|
||||
|
||||
; CHECK-LABEL: name: f32_faddp_commuted
|
||||
; CHECK: liveins: $d0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C1]](s64)
|
||||
; CHECK: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[EVEC]], [[EVEC1]]
|
||||
; CHECK: $s0 = COPY [[FADD]](s32)
|
||||
; CHECK: RET_ReallyLR implicit $s0
|
||||
%0:_(<2 x s32>) = COPY $d0
|
||||
%2:_(<2 x s32>) = G_IMPLICIT_DEF
|
||||
%5:_(s64) = G_CONSTANT i64 0
|
||||
%1:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %2, shufflemask(1, undef)
|
||||
%3:_(<2 x s32>) = G_FADD %0, %1
|
||||
%4:_(s32) = G_EXTRACT_VECTOR_ELT %3(<2 x s32>), %5(s64)
|
||||
$s0 = COPY %4(s32)
|
||||
RET_ReallyLR implicit $s0
|
||||
|
||||
...
|
||||
---
|
||||
name: wrong_extract_idx
|
||||
alignment: 4
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$q0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: wrong_extract_idx
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[DEF]], shufflemask(1, undef)
|
||||
; CHECK: [[FADD:%[0-9]+]]:_(<2 x s64>) = G_FADD [[SHUF]], [[COPY]]
|
||||
; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[FADD]](<2 x s64>), [[C]](s64)
|
||||
; CHECK: $d0 = COPY [[EVEC]](s64)
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%0:_(<2 x s64>) = COPY $q0
|
||||
%2:_(<2 x s64>) = G_IMPLICIT_DEF
|
||||
%5:_(s64) = G_CONSTANT i64 1
|
||||
%1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(1, undef)
|
||||
%3:_(<2 x s64>) = G_FADD %1, %0
|
||||
%4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64)
|
||||
$d0 = COPY %4(s64)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
||||
---
|
||||
name: wrong_shuffle_mask
|
||||
alignment: 4
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$q0' }
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: wrong_shuffle_mask
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[DEF]], shufflemask(0, undef)
|
||||
; CHECK: [[FADD:%[0-9]+]]:_(<2 x s64>) = G_FADD [[SHUF]], [[COPY]]
|
||||
; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[FADD]](<2 x s64>), [[C]](s64)
|
||||
; CHECK: $d0 = COPY [[EVEC]](s64)
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%0:_(<2 x s64>) = COPY $q0
|
||||
%2:_(<2 x s64>) = G_IMPLICIT_DEF
|
||||
%5:_(s64) = G_CONSTANT i64 0
|
||||
%1:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %2, shufflemask(0, undef)
|
||||
%3:_(<2 x s64>) = G_FADD %1, %0
|
||||
%4:_(s64) = G_EXTRACT_VECTOR_ELT %3(<2 x s64>), %5(s64)
|
||||
$d0 = COPY %4(s64)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
|
@ -0,0 +1,62 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-unknown -run-pass=instruction-select %s -o - | FileCheck %s
|
||||
---
|
||||
name: f64_faddp
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$q0' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0
|
||||
|
||||
; CHECK-LABEL: name: f64_faddp
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
|
||||
; CHECK: [[FADDPv2i64p:%[0-9]+]]:fpr64 = FADDPv2i64p [[COPY]]
|
||||
; CHECK: $d0 = COPY [[FADDPv2i64p]]
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%0:fpr(<2 x s64>) = COPY $q0
|
||||
%6:gpr(s64) = G_CONSTANT i64 0
|
||||
%7:fpr(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %6(s64)
|
||||
%8:gpr(s64) = G_CONSTANT i64 1
|
||||
%9:fpr(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %8(s64)
|
||||
%4:fpr(s64) = G_FADD %7, %9
|
||||
$d0 = COPY %4(s64)
|
||||
RET_ReallyLR implicit $d0
|
||||
|
||||
...
|
||||
---
|
||||
name: f32_faddp
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$d0' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $d0
|
||||
|
||||
; CHECK-LABEL: name: f32_faddp
|
||||
; CHECK: liveins: $d0
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
|
||||
; CHECK: [[FADDPv2i32p:%[0-9]+]]:fpr32 = FADDPv2i32p [[COPY]]
|
||||
; CHECK: $s0 = COPY [[FADDPv2i32p]]
|
||||
; CHECK: RET_ReallyLR implicit $s0
|
||||
%0:fpr(<2 x s32>) = COPY $d0
|
||||
%6:gpr(s64) = G_CONSTANT i64 0
|
||||
%7:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %6(s64)
|
||||
%8:gpr(s64) = G_CONSTANT i64 1
|
||||
%9:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %8(s64)
|
||||
%4:fpr(s32) = G_FADD %7, %9
|
||||
$s0 = COPY %4(s32)
|
||||
RET_ReallyLR implicit $s0
|
||||
|
||||
...
|
Loading…
Reference in New Issue