llvm-project/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp

329 lines
10 KiB
C++
Raw Normal View History

//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements the targeting of the Machinelegalizer class for
/// AArch64.
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
#include "AArch64LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
AArch64LegalizerInfo::AArch64LegalizerInfo() {
using namespace TargetOpcode;
const LLT p0 = LLT::pointer(0, 64);
const LLT s1 = LLT::scalar(1);
const LLT s8 = LLT::scalar(8);
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
const LLT v2s32 = LLT::vector(2, 32);
const LLT v4s32 = LLT::vector(4, 32);
const LLT v2s64 = LLT::vector(2, 64);
for (auto Ty : {p0, s1, s8, s16, s32, s64})
setAction({G_IMPLICIT_DEF, Ty}, Legal);
for (auto Ty : {s16, s32, s64})
setAction({G_PHI, Ty}, Legal);
for (auto Ty : {s1, s8})
setAction({G_PHI, Ty}, WidenScalar);
for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL}) {
// These operations naturally get the right answer when used on
// GPR32, even if the actual type is narrower.
[AArch64][GlobalISel] Legalize narrow scalar ops again. Since r279760, we've been marking as legal operations on narrow integer types that have wider legal equivalents (for instance, G_ADD s8). Compared to legalizing these operations, this reduced the amount of extends/truncates required, but was always a weird legalization decision made at selection time. So far, we haven't been able to formalize it in a way that permits the selector generated from SelectionDAG patterns to be sufficient. Using a wide instruction (say, s64), when a narrower instruction exists (s32) would introduce register class incompatibilities (when one narrow generic instruction is selected to the wider variant, but another is selected to the narrower variant). It's also impractical to limit which narrow operations are matched for which instruction, as restricting "narrow selection" to ranges of types clashes with potentially incompatible instruction predicates. Concerns were also raised regarding MIPS64's sign-extended register assumptions, as well as wrapping behavior. See discussions in https://reviews.llvm.org/D26878. Instead, legalize the operations. Should we ever revert to selecting these narrow operations, we should try to represent this more accurately: for instance, by separating a "concrete" type on operations, and an "underlying" type on vregs, we could move the "this narrow-looking op is really legal" decision to the legalizer, and let the selector use the "underlying" vreg type only, which would be guaranteed to map to a register class. In any case, we eventually should mitigate: - the performance impact by selecting no-op extract/truncates to COPYs (which we currently do), and the COPYs to register reuses (which we don't do yet). - the compile-time impact by optimizing away extract/truncate sequences in the legalizer. llvm-svn: 292827
2017-01-24 05:10:05 +08:00
for (auto Ty : {s32, s64, v2s32, v4s32, v2s64})
setAction({BinOp, Ty}, Legal);
[AArch64][GlobalISel] Legalize narrow scalar ops again. Since r279760, we've been marking as legal operations on narrow integer types that have wider legal equivalents (for instance, G_ADD s8). Compared to legalizing these operations, this reduced the amount of extends/truncates required, but was always a weird legalization decision made at selection time. So far, we haven't been able to formalize it in a way that permits the selector generated from SelectionDAG patterns to be sufficient. Using a wide instruction (say, s64), when a narrower instruction exists (s32) would introduce register class incompatibilities (when one narrow generic instruction is selected to the wider variant, but another is selected to the narrower variant). It's also impractical to limit which narrow operations are matched for which instruction, as restricting "narrow selection" to ranges of types clashes with potentially incompatible instruction predicates. Concerns were also raised regarding MIPS64's sign-extended register assumptions, as well as wrapping behavior. See discussions in https://reviews.llvm.org/D26878. Instead, legalize the operations. Should we ever revert to selecting these narrow operations, we should try to represent this more accurately: for instance, by separating a "concrete" type on operations, and an "underlying" type on vregs, we could move the "this narrow-looking op is really legal" decision to the legalizer, and let the selector use the "underlying" vreg type only, which would be guaranteed to map to a register class. In any case, we eventually should mitigate: - the performance impact by selecting no-op extract/truncates to COPYs (which we currently do), and the COPYs to register reuses (which we don't do yet). - the compile-time impact by optimizing away extract/truncate sequences in the legalizer. llvm-svn: 292827
2017-01-24 05:10:05 +08:00
for (auto Ty : {s1, s8, s16})
setAction({BinOp, Ty}, WidenScalar);
}
setAction({G_GEP, p0}, Legal);
setAction({G_GEP, 1, s64}, Legal);
for (auto Ty : {s1, s8, s16, s32})
setAction({G_GEP, 1, Ty}, WidenScalar);
setAction({G_PTR_MASK, p0}, Legal);
for (unsigned BinOp : {G_LSHR, G_ASHR, G_SDIV, G_UDIV}) {
for (auto Ty : {s32, s64})
setAction({BinOp, Ty}, Legal);
for (auto Ty : {s1, s8, s16})
setAction({BinOp, Ty}, WidenScalar);
}
for (unsigned BinOp : {G_SREM, G_UREM})
for (auto Ty : { s1, s8, s16, s32, s64 })
setAction({BinOp, Ty}, Lower);
for (unsigned Op : {G_SMULO, G_UMULO})
setAction({Op, s64}, Lower);
for (unsigned Op : {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_SMULH, G_UMULH}) {
for (auto Ty : { s32, s64 })
setAction({Op, Ty}, Legal);
setAction({Op, 1, s1}, Legal);
}
for (unsigned BinOp : {G_FADD, G_FSUB, G_FMA, G_FMUL, G_FDIV})
for (auto Ty : {s32, s64})
setAction({BinOp, Ty}, Legal);
for (unsigned BinOp : {G_FREM, G_FPOW}) {
setAction({BinOp, s32}, Libcall);
setAction({BinOp, s64}, Libcall);
}
for (auto Ty : {s32, s64, p0}) {
setAction({G_INSERT, Ty}, Legal);
setAction({G_INSERT, 1, Ty}, Legal);
}
for (auto Ty : {s1, s8, s16}) {
setAction({G_INSERT, Ty}, WidenScalar);
setAction({G_INSERT, 1, Ty}, Legal);
// FIXME: Can't widen the sources because that violates the constraints on
// G_INSERT (It seems entirely reasonable that inputs shouldn't overlap).
}
for (auto Ty : {s1, s8, s16, s32, s64, p0})
setAction({G_EXTRACT, Ty}, Legal);
for (auto Ty : {s32, s64})
setAction({G_EXTRACT, 1, Ty}, Legal);
for (unsigned MemOp : {G_LOAD, G_STORE}) {
for (auto Ty : {s8, s16, s32, s64, p0, v2s32})
setAction({MemOp, Ty}, Legal);
setAction({MemOp, s1}, WidenScalar);
// And everything's fine in addrspace 0.
setAction({MemOp, 1, p0}, Legal);
}
// Constants
for (auto Ty : {s32, s64}) {
setAction({TargetOpcode::G_CONSTANT, Ty}, Legal);
setAction({TargetOpcode::G_FCONSTANT, Ty}, Legal);
}
setAction({G_CONSTANT, p0}, Legal);
for (auto Ty : {s1, s8, s16})
setAction({TargetOpcode::G_CONSTANT, Ty}, WidenScalar);
setAction({TargetOpcode::G_FCONSTANT, s16}, WidenScalar);
setAction({G_ICMP, 1, s32}, Legal);
setAction({G_ICMP, 1, s64}, Legal);
setAction({G_ICMP, 1, p0}, Legal);
for (auto Ty : {s1, s8, s16}) {
setAction({G_ICMP, Ty}, WidenScalar);
setAction({G_FCMP, Ty}, WidenScalar);
setAction({G_ICMP, 1, Ty}, WidenScalar);
}
setAction({G_ICMP, s32}, Legal);
setAction({G_FCMP, s32}, Legal);
setAction({G_FCMP, 1, s32}, Legal);
setAction({G_FCMP, 1, s64}, Legal);
// Extensions
for (auto Ty : { s1, s8, s16, s32, s64 }) {
setAction({G_ZEXT, Ty}, Legal);
setAction({G_SEXT, Ty}, Legal);
setAction({G_ANYEXT, Ty}, Legal);
}
for (auto Ty : { s1, s8, s16, s32 }) {
setAction({G_ZEXT, 1, Ty}, Legal);
setAction({G_SEXT, 1, Ty}, Legal);
setAction({G_ANYEXT, 1, Ty}, Legal);
}
// FP conversions
for (auto Ty : { s16, s32 }) {
setAction({G_FPTRUNC, Ty}, Legal);
setAction({G_FPEXT, 1, Ty}, Legal);
}
for (auto Ty : { s32, s64 }) {
setAction({G_FPTRUNC, 1, Ty}, Legal);
setAction({G_FPEXT, Ty}, Legal);
}
for (auto Ty : { s1, s8, s16, s32 })
setAction({G_TRUNC, Ty}, Legal);
for (auto Ty : { s8, s16, s32, s64 })
setAction({G_TRUNC, 1, Ty}, Legal);
// Conversions
for (auto Ty : { s32, s64 }) {
setAction({G_FPTOSI, 0, Ty}, Legal);
setAction({G_FPTOUI, 0, Ty}, Legal);
setAction({G_SITOFP, 1, Ty}, Legal);
setAction({G_UITOFP, 1, Ty}, Legal);
}
for (auto Ty : { s1, s8, s16 }) {
setAction({G_FPTOSI, 0, Ty}, WidenScalar);
setAction({G_FPTOUI, 0, Ty}, WidenScalar);
setAction({G_SITOFP, 1, Ty}, WidenScalar);
setAction({G_UITOFP, 1, Ty}, WidenScalar);
}
for (auto Ty : { s32, s64 }) {
setAction({G_FPTOSI, 1, Ty}, Legal);
setAction({G_FPTOUI, 1, Ty}, Legal);
setAction({G_SITOFP, 0, Ty}, Legal);
setAction({G_UITOFP, 0, Ty}, Legal);
}
// Control-flow
for (auto Ty : {s1, s8, s16, s32})
setAction({G_BRCOND, Ty}, Legal);
setAction({G_BRINDIRECT, p0}, Legal);
// Select
for (auto Ty : {s1, s8, s16})
setAction({G_SELECT, Ty}, WidenScalar);
for (auto Ty : {s32, s64, p0})
setAction({G_SELECT, Ty}, Legal);
setAction({G_SELECT, 1, s1}, Legal);
// Pointer-handling
setAction({G_FRAME_INDEX, p0}, Legal);
setAction({G_GLOBAL_VALUE, p0}, Legal);
for (auto Ty : {s1, s8, s16, s32, s64})
setAction({G_PTRTOINT, 0, Ty}, Legal);
setAction({G_PTRTOINT, 1, p0}, Legal);
setAction({G_INTTOPTR, 0, p0}, Legal);
setAction({G_INTTOPTR, 1, s64}, Legal);
// Casts for 32 and 64-bit width type are just copies.
for (auto Ty : {s1, s8, s16, s32, s64}) {
setAction({G_BITCAST, 0, Ty}, Legal);
setAction({G_BITCAST, 1, Ty}, Legal);
}
// For the sake of copying bits around, the type does not really
// matter as long as it fits a register.
for (int EltSize = 8; EltSize <= 64; EltSize *= 2) {
setAction({G_BITCAST, 0, LLT::vector(128/EltSize, EltSize)}, Legal);
setAction({G_BITCAST, 1, LLT::vector(128/EltSize, EltSize)}, Legal);
if (EltSize >= 64)
continue;
setAction({G_BITCAST, 0, LLT::vector(64/EltSize, EltSize)}, Legal);
setAction({G_BITCAST, 1, LLT::vector(64/EltSize, EltSize)}, Legal);
if (EltSize >= 32)
continue;
setAction({G_BITCAST, 0, LLT::vector(32/EltSize, EltSize)}, Legal);
setAction({G_BITCAST, 1, LLT::vector(32/EltSize, EltSize)}, Legal);
}
setAction({G_VASTART, p0}, Legal);
// va_list must be a pointer, but most sized types are pretty easy to handle
// as the destination.
setAction({G_VAARG, 1, p0}, Legal);
for (auto Ty : {s8, s16, s32, s64, p0})
setAction({G_VAARG, Ty}, Custom);
computeTables();
}
bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
switch (MI.getOpcode()) {
default:
// No idea what to do.
return false;
case TargetOpcode::G_VAARG:
return legalizeVaArg(MI, MRI, MIRBuilder);
}
llvm_unreachable("expected switch to return");
}
bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
MIRBuilder.setInstr(MI);
MachineFunction &MF = MIRBuilder.getMF();
unsigned Align = MI.getOperand(2).getImm();
unsigned Dst = MI.getOperand(0).getReg();
unsigned ListPtr = MI.getOperand(1).getReg();
LLT PtrTy = MRI.getType(ListPtr);
LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
unsigned List = MRI.createGenericVirtualRegister(PtrTy);
MIRBuilder.buildLoad(
List, ListPtr,
*MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
PtrSize, /* Align = */ PtrSize));
unsigned DstPtr;
if (Align > PtrSize) {
// Realign the list to the actual required alignment.
auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy);
MIRBuilder.buildGEP(ListTmp, List, AlignMinus1->getOperand(0).getReg());
DstPtr = MRI.createGenericVirtualRegister(PtrTy);
MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
} else
DstPtr = List;
uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
MIRBuilder.buildLoad(
Dst, DstPtr,
*MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
ValSize, std::max(Align, PtrSize)));
unsigned SizeReg = MRI.createGenericVirtualRegister(IntPtrTy);
MIRBuilder.buildConstant(SizeReg, alignTo(ValSize, PtrSize));
unsigned NewList = MRI.createGenericVirtualRegister(PtrTy);
MIRBuilder.buildGEP(NewList, DstPtr, SizeReg);
MIRBuilder.buildStore(
NewList, ListPtr,
*MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
PtrSize, /* Align = */ PtrSize));
MI.eraseFromParent();
return true;
}