forked from OSchip/llvm-project
[AArch64][GlobalISel] Implement custom legalization for s32/s64 G_FCOPYSIGN
This is intended to be equivalent to the s32 + s64 cases in AArch64TargetLowering::LowerFCOPYSIGN. Widen everything and then use G_BIT + a mask to handle the actual copysign operation. Then, narrow back down to s32/s64. I wasn't sure about what the best/most canonical INSERT_SUBREG-selectable pattern is. I chose G_INSERT_VECTOR_ELT + an undef vector because it produces reasonably okay codegen. (It doesn't produce INSERT_SUBREG right now though.) If there's a better way to do this then I'm happy to change it. We also have a couple codegen deficiencies with how we emit vector constants right now. (We need a GISel equivalent to the tryAdvSIMDModImm64 stuff) Differential Revision: https://reviews.llvm.org/D108725
This commit is contained in:
parent
ba9cc6537c
commit
a7aaafde2e
|
@ -814,6 +814,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
|
|||
getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
|
||||
.legalFor({{s64, s32}, {s64, s64}});
|
||||
|
||||
// TODO: Custom legalization for vector types.
|
||||
// TODO: Custom legalization for mismatched types.
|
||||
// TODO: s16 support.
|
||||
getActionDefinitionsBuilder(G_FCOPYSIGN).customFor({{s32, s32}, {s64, s64}});
|
||||
|
||||
getLegacyLegalizerInfo().computeTables();
|
||||
verify(*ST.getInstrInfo());
|
||||
}
|
||||
|
@ -856,6 +861,8 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
|
|||
case TargetOpcode::G_MEMMOVE:
|
||||
case TargetOpcode::G_MEMSET:
|
||||
return legalizeMemOps(MI, Helper);
|
||||
case TargetOpcode::G_FCOPYSIGN:
|
||||
return legalizeFCopySign(MI, Helper);
|
||||
}
|
||||
|
||||
llvm_unreachable("expected switch to return");
|
||||
|
@ -1438,3 +1445,63 @@ bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
|
|||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
|
||||
LegalizerHelper &Helper) const {
|
||||
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
|
||||
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
|
||||
Register Dst = MI.getOperand(0).getReg();
|
||||
LLT DstTy = MRI.getType(Dst);
|
||||
assert(DstTy.isScalar() && "Only expected scalars right now!");
|
||||
const unsigned DstSize = DstTy.getSizeInBits();
|
||||
assert((DstSize == 32 || DstSize == 64) && "Unexpected dst type!");
|
||||
assert(MRI.getType(MI.getOperand(2).getReg()) == DstTy &&
|
||||
"Expected homogeneous types!");
|
||||
|
||||
// We want to materialize a mask with the high bit set.
|
||||
uint64_t EltMask;
|
||||
LLT VecTy;
|
||||
|
||||
// TODO: s16 support.
|
||||
switch (DstSize) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected type for G_FCOPYSIGN!");
|
||||
case 64: {
|
||||
// AdvSIMD immediate moves cannot materialize out mask in a single
|
||||
// instruction for 64-bit elements. Instead, materialize zero and then
|
||||
// negate it.
|
||||
EltMask = 0;
|
||||
VecTy = LLT::fixed_vector(2, DstTy);
|
||||
break;
|
||||
}
|
||||
case 32:
|
||||
EltMask = 0x80000000ULL;
|
||||
VecTy = LLT::fixed_vector(4, DstTy);
|
||||
break;
|
||||
}
|
||||
|
||||
// Widen In1 and In2 to 128 bits. We want these to eventually become
|
||||
// INSERT_SUBREGs.
|
||||
auto Undef = MIRBuilder.buildUndef(VecTy);
|
||||
auto Zero = MIRBuilder.buildConstant(DstTy, 0);
|
||||
auto Ins1 = MIRBuilder.buildInsertVectorElement(
|
||||
VecTy, Undef, MI.getOperand(1).getReg(), Zero);
|
||||
auto Ins2 = MIRBuilder.buildInsertVectorElement(
|
||||
VecTy, Undef, MI.getOperand(2).getReg(), Zero);
|
||||
|
||||
// Construct the mask.
|
||||
auto Mask = MIRBuilder.buildConstant(VecTy, EltMask);
|
||||
if (DstSize == 64)
|
||||
Mask = MIRBuilder.buildFNeg(VecTy, Mask);
|
||||
|
||||
auto Sel = MIRBuilder.buildInstr(AArch64::G_BIT, {VecTy}, {Ins1, Ins2, Mask});
|
||||
|
||||
// Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We
|
||||
// want this to eventually become an EXTRACT_SUBREG.
|
||||
SmallVector<Register, 2> DstRegs(1, Dst);
|
||||
for (unsigned I = 1, E = VecTy.getNumElements(); I < E; ++I)
|
||||
DstRegs.push_back(MRI.createGenericVirtualRegister(DstTy));
|
||||
MIRBuilder.buildUnmerge(DstRegs, Sel);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -57,6 +57,7 @@ private:
|
|||
LegalizerHelper &Helper) const;
|
||||
bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const;
|
||||
bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const;
|
||||
bool legalizeFCopySign(MachineInstr &MI, LegalizerHelper &Helper) const;
|
||||
const AArch64Subtarget *ST;
|
||||
};
|
||||
} // End llvm namespace.
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=aarch64 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
...
|
||||
---
|
||||
name: legalize_s32
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $s0, $s1
|
||||
; CHECK-LABEL: name: legalize_s32
|
||||
; CHECK: liveins: $s0, $s1
|
||||
; CHECK: %val:_(s32) = COPY $s0
|
||||
; CHECK: %sign:_(s32) = COPY $s1
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
|
||||
; CHECK: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s32), [[C]](s32)
|
||||
; CHECK: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s32), [[C]](s32)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
|
||||
; CHECK: [[BIT:%[0-9]+]]:_(<4 x s32>) = G_BIT [[IVEC]], [[IVEC1]], [[BUILD_VECTOR]]
|
||||
; CHECK: %fcopysign:_(s32), %10:_(s32), %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES [[BIT]](<4 x s32>)
|
||||
; CHECK: $s0 = COPY %fcopysign(s32)
|
||||
; CHECK: RET_ReallyLR implicit $s0
|
||||
%val:_(s32) = COPY $s0
|
||||
%sign:_(s32) = COPY $s1
|
||||
%fcopysign:_(s32) = G_FCOPYSIGN %val, %sign(s32)
|
||||
$s0 = COPY %fcopysign(s32)
|
||||
RET_ReallyLR implicit $s0
|
||||
|
||||
...
|
||||
---
|
||||
name: legalize_s64
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $d0, $d1
|
||||
; CHECK-LABEL: name: legalize_s64
|
||||
; CHECK: liveins: $d0, $d1
|
||||
; CHECK: %val:_(s64) = COPY $d0
|
||||
; CHECK: %sign:_(s64) = COPY $d1
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s64), [[C]](s64)
|
||||
; CHECK: [[IVEC1:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s64), [[C]](s64)
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
|
||||
; CHECK: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG [[BUILD_VECTOR]]
|
||||
; CHECK: [[BIT:%[0-9]+]]:_(<2 x s64>) = G_BIT [[IVEC]], [[IVEC1]], [[FNEG]]
|
||||
; CHECK: %fcopysign:_(s64), %10:_(s64) = G_UNMERGE_VALUES [[BIT]](<2 x s64>)
|
||||
; CHECK: $d0 = COPY %fcopysign(s64)
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%val:_(s64) = COPY $d0
|
||||
%sign:_(s64) = COPY $d1
|
||||
%fcopysign:_(s64) = G_FCOPYSIGN %val, %sign(s64)
|
||||
$d0 = COPY %fcopysign(s64)
|
||||
RET_ReallyLR implicit $d0
|
|
@ -487,8 +487,8 @@
|
|||
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
|
||||
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
|
||||
# DEBUG-NEXT: G_FCOPYSIGN (opcode {{[0-9]+}}): 2 type indices
|
||||
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
|
||||
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
|
||||
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
|
||||
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
|
||||
# DEBUG-NEXT: G_IS_FPCLASS (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
|
||||
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
|
||||
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
|
||||
|
|
Loading…
Reference in New Issue