llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

//===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements the IRTranslator class.
//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <string>
#include <utility>
#include <vector>

#define DEBUG_TYPE "irtranslator"

using namespace llvm;

char IRTranslator::ID = 0;

INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
                false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
                false, false)

static void reportTranslationError(MachineFunction &MF,
                                   const TargetPassConfig &TPC,
                                   OptimizationRemarkEmitter &ORE,
                                   OptimizationRemarkMissed &R) {
  MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);

  // Print the function name explicitly if we don't have a debug location (which
  // makes the diagnostic less useful) or if we're going to emit a raw error.
  if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
    R << (" (in function: " + MF.getName() + ")").str();

  if (TPC.isGlobalISelAbortEnabled())
    report_fatal_error(R.getMsg());
  else
    ORE.emit(R);
}

IRTranslator::IRTranslator() : MachineFunctionPass(ID) {
  initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
}

#ifndef NDEBUG
/// Verify that every instruction created has the same DILocation as the
/// instruction being translated.
class DILocationVerifier : MachineFunction::Delegate {
  MachineFunction &MF;
  const Instruction *CurrInst = nullptr;

public:
  DILocationVerifier(MachineFunction &MF) : MF(MF) { MF.setDelegate(this); }
  ~DILocationVerifier() { MF.resetDelegate(this); }

  const Instruction *getCurrentInst() const { return CurrInst; }
  void setCurrentInst(const Instruction *Inst) { CurrInst = Inst; }

  void MF_HandleInsertion(const MachineInstr &MI) override {
    assert(getCurrentInst() && "Inserted instruction without a current MI");

    // Only print the check message if we're actually checking it.
#ifndef NDEBUG
    LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
                      << " was copied to " << MI);
#endif
    assert(CurrInst->getDebugLoc() == MI.getDebugLoc() &&
           "Line info was not transferred to all instructions");
  }
  void MF_HandleRemoval(const MachineInstr &MI) override {}
};
#endif // ifndef NDEBUG


void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
  AU.addRequired<StackProtector>();
  AU.addRequired<TargetPassConfig>();
  getSelectionDAGFallbackAnalysisUsage(AU);
  MachineFunctionPass::getAnalysisUsage(AU);
}

static void computeValueLLTs(const DataLayout &DL, Type &Ty,
                             SmallVectorImpl<LLT> &ValueTys,
                             SmallVectorImpl<uint64_t> *Offsets = nullptr,
                             uint64_t StartingOffset = 0) {
  // Given a struct type, recursively traverse the elements.
  if (StructType *STy = dyn_cast<StructType>(&Ty)) {
    const StructLayout *SL = DL.getStructLayout(STy);
    for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I)
      computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets,
                       StartingOffset + SL->getElementOffset(I));
    return;
  }
  // Given an array type, recursively traverse the elements.
  if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) {
    Type *EltTy = ATy->getElementType();
    uint64_t EltSize = DL.getTypeAllocSize(EltTy);
    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
      computeValueLLTs(DL, *EltTy, ValueTys, Offsets,
                       StartingOffset + i * EltSize);
    return;
  }
  // Interpret void as zero return values.
  if (Ty.isVoidTy())
    return;
  // Base case: we can get an LLT for this LLVM IR type.
  ValueTys.push_back(getLLTForType(Ty, DL));
  if (Offsets != nullptr)
    Offsets->push_back(StartingOffset * 8);
}

IRTranslator::ValueToVRegInfo::VRegListT &
IRTranslator::allocateVRegs(const Value &Val) {
  assert(!VMap.contains(Val) && "Value already allocated in VMap");
  auto *Regs = VMap.getVRegs(Val);
  auto *Offsets = VMap.getOffsets(Val);
  SmallVector<LLT, 4> SplitTys;
  computeValueLLTs(*DL, *Val.getType(), SplitTys,
                   Offsets->empty() ? Offsets : nullptr);
  for (unsigned i = 0; i < SplitTys.size(); ++i)
    Regs->push_back(0);
  return *Regs;
}

ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) {
  auto VRegsIt = VMap.findVRegs(Val);
  if (VRegsIt != VMap.vregs_end())
    return *VRegsIt->second;

  if (Val.getType()->isVoidTy())
    return *VMap.getVRegs(Val);

  // Create entry for this type.
  auto *VRegs = VMap.getVRegs(Val);
  auto *Offsets = VMap.getOffsets(Val);

  assert(Val.getType()->isSized() &&
         "Don't know how to create an empty vreg");

  SmallVector<LLT, 4> SplitTys;
  computeValueLLTs(*DL, *Val.getType(), SplitTys,
                   Offsets->empty() ? Offsets : nullptr);

  if (!isa<Constant>(Val)) {
    for (auto Ty : SplitTys)
      VRegs->push_back(MRI->createGenericVirtualRegister(Ty));
    return *VRegs;
  }

  if (Val.getType()->isAggregateType()) {
    // UndefValue, ConstantAggregateZero
    auto &C = cast<Constant>(Val);
    unsigned Idx = 0;
    while (auto Elt = C.getAggregateElement(Idx++)) {
      auto EltRegs = getOrCreateVRegs(*Elt);
      llvm::copy(EltRegs, std::back_inserter(*VRegs));
    }
  } else {
    assert(SplitTys.size() == 1 && "unexpectedly split LLT");
    VRegs->push_back(MRI->createGenericVirtualRegister(SplitTys[0]));
    bool Success = translate(cast<Constant>(Val), VRegs->front());
    if (!Success) {
      OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                                 MF->getFunction().getSubprogram(),
                                 &MF->getFunction().getEntryBlock());
      R << "unable to translate constant: " << ore::NV("Type", Val.getType());
      reportTranslationError(*MF, *TPC, *ORE, R);
      return *VRegs;
    }
  }

  return *VRegs;
}

int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
  if (FrameIndices.find(&AI) != FrameIndices.end())
    return FrameIndices[&AI];

  unsigned ElementSize = DL->getTypeStoreSize(AI.getAllocatedType());
  unsigned Size =
      ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();

  // Always allocate at least one byte.
  Size = std::max(Size, 1u);

  unsigned Alignment = AI.getAlignment();
  if (!Alignment)
    Alignment = DL->getABITypeAlignment(AI.getAllocatedType());

  int &FI = FrameIndices[&AI];
  FI = MF->getFrameInfo().CreateStackObject(Size, Alignment, false, &AI);
  return FI;
}

unsigned IRTranslator::getMemOpAlignment(const Instruction &I) {
  unsigned Alignment = 0;
  Type *ValTy = nullptr;
  if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) {
    Alignment = SI->getAlignment();
    ValTy = SI->getValueOperand()->getType();
  } else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
    Alignment = LI->getAlignment();
    ValTy = LI->getType();
  } else if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
    // TODO(PR27168): This instruction has no alignment attribute, but unlike
    // the default alignment for load/store, the default here is to assume
    // it has NATURAL alignment, not DataLayout-specified alignment.
    const DataLayout &DL = AI->getModule()->getDataLayout();
    Alignment = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
    ValTy = AI->getCompareOperand()->getType();
  } else if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
    // TODO(PR27168): This instruction has no alignment attribute, but unlike
    // the default alignment for load/store, the default here is to assume
    // it has NATURAL alignment, not DataLayout-specified alignment.
    const DataLayout &DL = AI->getModule()->getDataLayout();
    Alignment = DL.getTypeStoreSize(AI->getValOperand()->getType());
    ValTy = AI->getType();
  } else {
    OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
    R << "unable to translate memop: " << ore::NV("Opcode", &I);
    reportTranslationError(*MF, *TPC, *ORE, R);
    return 1;
  }

  return Alignment ? Alignment : DL->getABITypeAlignment(ValTy);
}

MachineBasicBlock &IRTranslator::getMBB(const BasicBlock &BB) {
  MachineBasicBlock *&MBB = BBToMBB[&BB];
  assert(MBB && "BasicBlock was not encountered before");
  return *MBB;
}

void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
  assert(NewPred && "new predecessor must be a real MachineBasicBlock");
  MachinePreds[Edge].push_back(NewPred);
}

bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
                                     MachineIRBuilder &MIRBuilder) {
  // FIXME: handle signed/unsigned wrapping flags.

  // Get or create a virtual register for each value.
  // Unless the value is a Constant => loadimm cst?
  // or inline constant each time?
  // Creation of a virtual register needs to have a size.
  unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
  unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
  unsigned Res = getOrCreateVReg(U);
  auto FBinOp = MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
  if (isa<Instruction>(U)) {
    MachineInstr *FBinOpMI = FBinOp.getInstr();
    const Instruction &I = cast<Instruction>(U);
    FBinOpMI->copyIRFlags(I);
  }
  return true;
}

bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
  // -0.0 - X --> G_FNEG
  if (isa<Constant>(U.getOperand(0)) &&
      U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) {
    MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
        .addDef(getOrCreateVReg(U))
        .addUse(getOrCreateVReg(*U.getOperand(1)));
    return true;
  }
  return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
}

bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
  MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
      .addDef(getOrCreateVReg(U))
      .addUse(getOrCreateVReg(*U.getOperand(1)));
  return true;
}

bool IRTranslator::translateCompare(const User &U,
                                    MachineIRBuilder &MIRBuilder) {
  const CmpInst *CI = dyn_cast<CmpInst>(&U);
  unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
  unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
  unsigned Res = getOrCreateVReg(U);
  CmpInst::Predicate Pred =
      CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
                                    cast<ConstantExpr>(U).getPredicate());
  if (CmpInst::isIntPredicate(Pred))
    MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
  else if (Pred == CmpInst::FCMP_FALSE)
    MIRBuilder.buildCopy(
        Res, getOrCreateVReg(*Constant::getNullValue(CI->getType())));
  else if (Pred == CmpInst::FCMP_TRUE)
    MIRBuilder.buildCopy(
        Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType())));
  else
    MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);

  return true;
}

bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
  const ReturnInst &RI = cast<ReturnInst>(U);
  const Value *Ret = RI.getReturnValue();
  if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
    Ret = nullptr;

  ArrayRef<unsigned> VRegs;
  if (Ret)
    VRegs = getOrCreateVRegs(*Ret);

  // The target may mess up with the insertion point, but
  // this is not important as a return is the last instruction
  // of the block anyway.

  return CLI->lowerReturn(MIRBuilder, Ret, VRegs);
}

bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
  const BranchInst &BrInst = cast<BranchInst>(U);
  unsigned Succ = 0;
  if (!BrInst.isUnconditional()) {
    // We want a G_BRCOND to the true BB followed by an unconditional branch.
    unsigned Tst = getOrCreateVReg(*BrInst.getCondition());
    const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++));
    MachineBasicBlock &TrueBB = getMBB(TrueTgt);
    MIRBuilder.buildBrCond(Tst, TrueBB);
  }

  const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ));
  MachineBasicBlock &TgtBB = getMBB(BrTgt);
  MachineBasicBlock &CurBB = MIRBuilder.getMBB();

  // If the unconditional target is the layout successor, fallthrough.
  if (!CurBB.isLayoutSuccessor(&TgtBB))
    MIRBuilder.buildBr(TgtBB);

  // Link successors.
  for (const BasicBlock *Succ : successors(&BrInst))
    CurBB.addSuccessor(&getMBB(*Succ));
  return true;
}

bool IRTranslator::translateSwitch(const User &U,
                                   MachineIRBuilder &MIRBuilder) {
  // For now, just translate as a chain of conditional branches.
  // FIXME: could we share most of the logic/code in
  // SelectionDAGBuilder::visitSwitch between SelectionDAG and GlobalISel?
  // At first sight, it seems most of the logic in there is independent of
  // SelectionDAG-specifics and a lot of work went in to optimize switch
  // lowering in there.

  const SwitchInst &SwInst = cast<SwitchInst>(U);
  const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition());
  const BasicBlock *OrigBB = SwInst.getParent();

  LLT LLTi1 = getLLTForType(*Type::getInt1Ty(U.getContext()), *DL);
  for (auto &CaseIt : SwInst.cases()) {
    const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue());
    const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1);
    MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue);
    MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
    const BasicBlock *TrueBB = CaseIt.getCaseSuccessor();
    MachineBasicBlock &TrueMBB = getMBB(*TrueBB);

    MIRBuilder.buildBrCond(Tst, TrueMBB);
    CurMBB.addSuccessor(&TrueMBB);
    addMachineCFGPred({OrigBB, TrueBB}, &CurMBB);

    MachineBasicBlock *FalseMBB =
        MF->CreateMachineBasicBlock(SwInst.getParent());
    // Insert the comparison blocks one after the other.
    MF->insert(std::next(CurMBB.getIterator()), FalseMBB);
    MIRBuilder.buildBr(*FalseMBB);
    CurMBB.addSuccessor(FalseMBB);

    MIRBuilder.setMBB(*FalseMBB);
  }
  // handle default case
  const BasicBlock *DefaultBB = SwInst.getDefaultDest();
  MachineBasicBlock &DefaultMBB = getMBB(*DefaultBB);
  MIRBuilder.buildBr(DefaultMBB);
  MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
  CurMBB.addSuccessor(&DefaultMBB);
  addMachineCFGPred({OrigBB, DefaultBB}, &CurMBB);

  return true;
}

bool IRTranslator::translateIndirectBr(const User &U,
                                       MachineIRBuilder &MIRBuilder) {
  const IndirectBrInst &BrInst = cast<IndirectBrInst>(U);

  const unsigned Tgt = getOrCreateVReg(*BrInst.getAddress());
  MIRBuilder.buildBrIndirect(Tgt);

  // Link successors.
  MachineBasicBlock &CurBB = MIRBuilder.getMBB();
  for (const BasicBlock *Succ : successors(&BrInst))
    CurBB.addSuccessor(&getMBB(*Succ));

  return true;
}

bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
  const LoadInst &LI = cast<LoadInst>(U);

  auto Flags = LI.isVolatile() ? MachineMemOperand::MOVolatile
                               : MachineMemOperand::MONone;
  Flags |= MachineMemOperand::MOLoad;

  if (DL->getTypeStoreSize(LI.getType()) == 0)
    return true;

  ArrayRef<unsigned> Regs = getOrCreateVRegs(LI);
  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI);
  unsigned Base = getOrCreateVReg(*LI.getPointerOperand());

  for (unsigned i = 0; i < Regs.size(); ++i) {
    unsigned Addr = 0;
    MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8);

    MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
    unsigned BaseAlign = getMemOpAlignment(LI);
    auto MMO = MF->getMachineMemOperand(
        Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8,
        MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr,
        LI.getSyncScopeID(), LI.getOrdering());
    MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
  }

  return true;
}

bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
  const StoreInst &SI = cast<StoreInst>(U);
  auto Flags = SI.isVolatile() ? MachineMemOperand::MOVolatile
                               : MachineMemOperand::MONone;
  Flags |= MachineMemOperand::MOStore;

  if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
    return true;

  ArrayRef<unsigned> Vals = getOrCreateVRegs(*SI.getValueOperand());
  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
  unsigned Base = getOrCreateVReg(*SI.getPointerOperand());

  for (unsigned i = 0; i < Vals.size(); ++i) {
    unsigned Addr = 0;
    MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8);

    MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
    unsigned BaseAlign = getMemOpAlignment(SI);
    auto MMO = MF->getMachineMemOperand(
        Ptr, Flags, (MRI->getType(Vals[i]).getSizeInBits() + 7) / 8,
        MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr,
        SI.getSyncScopeID(), SI.getOrdering());
    MIRBuilder.buildStore(Vals[i], Addr, *MMO);
  }
  return true;
}

static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) {
  const Value *Src = U.getOperand(0);
  Type *Int32Ty = Type::getInt32Ty(U.getContext());

  // getIndexedOffsetInType is designed for GEPs, so the first index is the
  // usual array element rather than looking into the actual aggregate.
  SmallVector<Value *, 1> Indices;
  Indices.push_back(ConstantInt::get(Int32Ty, 0));

  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) {
    for (auto Idx : EVI->indices())
      Indices.push_back(ConstantInt::get(Int32Ty, Idx));
  } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) {
    for (auto Idx : IVI->indices())
      Indices.push_back(ConstantInt::get(Int32Ty, Idx));
  } else {
    for (unsigned i = 1; i < U.getNumOperands(); ++i)
      Indices.push_back(U.getOperand(i));
  }

  return 8 * static_cast<uint64_t>(
                 DL.getIndexedOffsetInType(Src->getType(), Indices));
}

bool IRTranslator::translateExtractValue(const User &U,
                                         MachineIRBuilder &MIRBuilder) {
  const Value *Src = U.getOperand(0);
  uint64_t Offset = getOffsetFromIndices(U, *DL);
  ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src);
  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src);
  unsigned Idx = std::lower_bound(Offsets.begin(), Offsets.end(), Offset) -
                 Offsets.begin();
  auto &DstRegs = allocateVRegs(U);

  for (unsigned i = 0; i < DstRegs.size(); ++i)
    DstRegs[i] = SrcRegs[Idx++];

  return true;
}

bool IRTranslator::translateInsertValue(const User &U,
                                        MachineIRBuilder &MIRBuilder) {
  const Value *Src = U.getOperand(0);
  uint64_t Offset = getOffsetFromIndices(U, *DL);
  auto &DstRegs = allocateVRegs(U);
  ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U);
  ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src);
  ArrayRef<unsigned> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
  auto InsertedIt = InsertedRegs.begin();

  for (unsigned i = 0; i < DstRegs.size(); ++i) {
    if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end())
      DstRegs[i] = *InsertedIt++;
    else
      DstRegs[i] = SrcRegs[i];
  }

  return true;
}

bool IRTranslator::translateSelect(const User &U,
                                   MachineIRBuilder &MIRBuilder) {
  unsigned Tst = getOrCreateVReg(*U.getOperand(0));
  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(U);
  ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
  ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2));

  for (unsigned i = 0; i < ResRegs.size(); ++i)
    MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]);

  return true;
}

bool IRTranslator::translateBitCast(const User &U,
                                    MachineIRBuilder &MIRBuilder) {
  // If we're bitcasting to the source type, we can reuse the source vreg.
  if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
      getLLTForType(*U.getType(), *DL)) {
    unsigned SrcReg = getOrCreateVReg(*U.getOperand(0));
    auto &Regs = *VMap.getVRegs(U);
    // If we already assigned a vreg for this bitcast, we can't change that.
    // Emit a copy to satisfy the users we already emitted.
    if (!Regs.empty())
      MIRBuilder.buildCopy(Regs[0], SrcReg);
    else {
      Regs.push_back(SrcReg);
      VMap.getOffsets(U)->push_back(0);
    }
    return true;
  }
  return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
}

bool IRTranslator::translateCast(unsigned Opcode, const User &U,
                                 MachineIRBuilder &MIRBuilder) {
  unsigned Op = getOrCreateVReg(*U.getOperand(0));
  unsigned Res = getOrCreateVReg(U);
  MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op);
  return true;
}

bool IRTranslator::translateGetElementPtr(const User &U,
                                          MachineIRBuilder &MIRBuilder) {
  // FIXME: support vector GEPs.
  if (U.getType()->isVectorTy())
    return false;

  Value &Op0 = *U.getOperand(0);
  unsigned BaseReg = getOrCreateVReg(Op0);
  Type *PtrIRTy = Op0.getType();
  LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
  Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
  LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);

  int64_t Offset = 0;
  for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U);
       GTI != E; ++GTI) {
    const Value *Idx = GTI.getOperand();
    if (StructType *StTy = GTI.getStructTypeOrNull()) {
      unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
      Offset += DL->getStructLayout(StTy)->getElementOffset(Field);
      continue;
    } else {
      uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());

      // If this is a scalar constant or a splat vector of constants,
      // handle it quickly.
      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
        Offset += ElementSize * CI->getSExtValue();
        continue;
      }

      if (Offset != 0) {
        unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
        unsigned OffsetReg =
            getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
        MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg);

        BaseReg = NewBaseReg;
        Offset = 0;
      }

      unsigned IdxReg = getOrCreateVReg(*Idx);
      if (MRI->getType(IdxReg) != OffsetTy) {
        unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy);
        MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg);
        IdxReg = NewIdxReg;
      }

      // N = N + Idx * ElementSize;
      // Avoid doing it for ElementSize of 1.
      unsigned GepOffsetReg;
      if (ElementSize != 1) {
        unsigned ElementSizeReg =
            getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize));

        GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
        MIRBuilder.buildMul(GepOffsetReg, ElementSizeReg, IdxReg);
      } else
        GepOffsetReg = IdxReg;

      unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
      MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg);
      BaseReg = NewBaseReg;
    }
  }

  if (Offset != 0) {
    unsigned OffsetReg = getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
    MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg);
    return true;
  }

  MIRBuilder.buildCopy(getOrCreateVReg(U), BaseReg);
  return true;
}

bool IRTranslator::translateMemfunc(const CallInst &CI,
                                    MachineIRBuilder &MIRBuilder,
                                    unsigned ID) {
  LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL);
  Type *DstTy = CI.getArgOperand(0)->getType();
  if (cast<PointerType>(DstTy)->getAddressSpace() != 0 ||
      SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0))
    return false;

  SmallVector<CallLowering::ArgInfo, 8> Args;
  for (int i = 0; i < 3; ++i) {
    const auto &Arg = CI.getArgOperand(i);
    Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
  }

  const char *Callee;
  switch (ID) {
  case Intrinsic::memmove:
  case Intrinsic::memcpy: {
    Type *SrcTy = CI.getArgOperand(1)->getType();
    if(cast<PointerType>(SrcTy)->getAddressSpace() != 0)
      return false;
    Callee = ID == Intrinsic::memcpy ? "memcpy" : "memmove";
    break;
  }
  case Intrinsic::memset:
    Callee = "memset";
    break;
  default:
    return false;
  }

  return CLI->lowerCall(MIRBuilder, CI.getCallingConv(),
                        MachineOperand::CreateES(Callee),
                        CallLowering::ArgInfo(0, CI.getType()), Args);
}

void IRTranslator::getStackGuard(unsigned DstReg,
                                 MachineIRBuilder &MIRBuilder) {
  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
  MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
  auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD);
  MIB.addDef(DstReg);

  auto &TLI = *MF->getSubtarget().getTargetLowering();
  Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent());
  if (!Global)
    return;

  MachinePointerInfo MPInfo(Global);
  auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
               MachineMemOperand::MODereferenceable;
  MachineMemOperand *MemRef =
      MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
                               DL->getPointerABIAlignment(0));
  MIB.setMemRefs({MemRef});
}

bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
                                              MachineIRBuilder &MIRBuilder) {
  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI);
  MIRBuilder.buildInstr(Op)
      .addDef(ResRegs[0])
      .addDef(ResRegs[1])
      .addUse(getOrCreateVReg(*CI.getOperand(0)))
      .addUse(getOrCreateVReg(*CI.getOperand(1)));

  return true;
}

bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
                                           MachineIRBuilder &MIRBuilder) {
  switch (ID) {
  default:
    break;
  case Intrinsic::lifetime_start:
  case Intrinsic::lifetime_end:
    // Stack coloring is not enabled in O0 (which we care about now) so we can
    // drop these. Make sure someone notices when we start compiling at higher
    // opts though.
    if (MF->getTarget().getOptLevel() != CodeGenOpt::None)
      return false;
    return true;
  case Intrinsic::dbg_declare: {
    const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
    assert(DI.getVariable() && "Missing variable");

    const Value *Address = DI.getAddress();
    if (!Address || isa<UndefValue>(Address)) {
      LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
      return true;
    }

    assert(DI.getVariable()->isValidLocationForIntrinsic(
               MIRBuilder.getDebugLoc()) &&
           "Expected inlined-at fields to agree");
    auto AI = dyn_cast<AllocaInst>(Address);
    if (AI && AI->isStaticAlloca()) {
      // Static allocas are tracked at the MF level, no need for DBG_VALUE
      // instructions (in fact, they get ignored if they *do* exist).
      MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
                             getOrCreateFrameIndex(*AI), DI.getDebugLoc());
    } else {
      // A dbg.declare describes the address of a source variable, so lower it
      // into an indirect DBG_VALUE.
      MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
                                       DI.getVariable(), DI.getExpression());
    }
    return true;
  }
  case Intrinsic::dbg_label: {
    const DbgLabelInst &DI = cast<DbgLabelInst>(CI);
    assert(DI.getLabel() && "Missing label");

    assert(DI.getLabel()->isValidLocationForIntrinsic(
               MIRBuilder.getDebugLoc()) &&
           "Expected inlined-at fields to agree");

    MIRBuilder.buildDbgLabel(DI.getLabel());
    return true;
  }
  case Intrinsic::vaend:
    // No target I know of cares about va_end. Certainly no in-tree target
    // does. Simplest intrinsic ever!
    return true;
  case Intrinsic::vastart: {
    auto &TLI = *MF->getSubtarget().getTargetLowering();
    Value *Ptr = CI.getArgOperand(0);
    unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;

    MIRBuilder.buildInstr(TargetOpcode::G_VASTART)
        .addUse(getOrCreateVReg(*Ptr))
        .addMemOperand(MF->getMachineMemOperand(
            MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 0));
    return true;
  }
  case Intrinsic::dbg_value: {
    // This form of DBG_VALUE is target-independent.
    const DbgValueInst &DI = cast<DbgValueInst>(CI);
    const Value *V = DI.getValue();
    assert(DI.getVariable()->isValidLocationForIntrinsic(
               MIRBuilder.getDebugLoc()) &&
           "Expected inlined-at fields to agree");
    if (!V) {
      // Currently the optimizer can produce this; insert an undef to
      // help debugging.  Probably the optimizer should not do this.
      MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
    } else if (const auto *CI = dyn_cast<Constant>(V)) {
      MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
    } else {
      unsigned Reg = getOrCreateVReg(*V);
      // FIXME: This does not handle register-indirect values at offset 0. The
      // direct/indirect thing shouldn't really be handled by something as
      // implicit as reg+noreg vs reg+imm in the first palce, but it seems
      // pretty baked in right now.
      MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
    }
    return true;
  }
  case Intrinsic::uadd_with_overflow:
    return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDO, MIRBuilder);
  case Intrinsic::sadd_with_overflow:
    return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder);
  case Intrinsic::usub_with_overflow:
    return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBO, MIRBuilder);
  case Intrinsic::ssub_with_overflow:
    return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder);
  case Intrinsic::umul_with_overflow:
    return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
  case Intrinsic::smul_with_overflow:
    return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
  case Intrinsic::pow:
    MIRBuilder.buildInstr(TargetOpcode::G_FPOW)
        .addDef(getOrCreateVReg(CI))
        .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
        .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
    return true;
  case Intrinsic::exp:
    MIRBuilder.buildInstr(TargetOpcode::G_FEXP)
        .addDef(getOrCreateVReg(CI))
        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
    return true;
  case Intrinsic::exp2:
    MIRBuilder.buildInstr(TargetOpcode::G_FEXP2)
        .addDef(getOrCreateVReg(CI))
        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
    return true;
  case Intrinsic::log:
    MIRBuilder.buildInstr(TargetOpcode::G_FLOG)
        .addDef(getOrCreateVReg(CI))
        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
    return true;
  case Intrinsic::log2:
    MIRBuilder.buildInstr(TargetOpcode::G_FLOG2)
        .addDef(getOrCreateVReg(CI))
        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
    return true;
  case Intrinsic::log10:
    MIRBuilder.buildInstr(TargetOpcode::G_FLOG10)
        .addDef(getOrCreateVReg(CI))
        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
    return true;
  case Intrinsic::fabs:
    MIRBuilder.buildInstr(TargetOpcode::G_FABS)
        .addDef(getOrCreateVReg(CI))
        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
    return true;
  case Intrinsic::trunc:
    MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC)
        .addDef(getOrCreateVReg(CI))
        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
    return true;
  case Intrinsic::round:
    MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND)
        .addDef(getOrCreateVReg(CI))
        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
    return true;
  case Intrinsic::fma:
    MIRBuilder.buildInstr(TargetOpcode::G_FMA)
        .addDef(getOrCreateVReg(CI))
        .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
        .addUse(getOrCreateVReg(*CI.getArgOperand(1)))
        .addUse(getOrCreateVReg(*CI.getArgOperand(2)));
    return true;
  case Intrinsic::fmuladd: {
    const TargetMachine &TM = MF->getTarget();
    const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
    unsigned Dst = getOrCreateVReg(CI);
    unsigned Op0 = getOrCreateVReg(*CI.getArgOperand(0));
    unsigned Op1 = getOrCreateVReg(*CI.getArgOperand(1));
    unsigned Op2 = getOrCreateVReg(*CI.getArgOperand(2));
    if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
        TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) {
      // TODO: Revisit this to see if we should move this part of the
      // lowering to the combiner.
      MIRBuilder.buildInstr(TargetOpcode::G_FMA, Dst, Op0, Op1, Op2);
    } else {
      LLT Ty = getLLTForType(*CI.getType(), *DL);
      auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, Ty, Op0, Op1);
      MIRBuilder.buildInstr(TargetOpcode::G_FADD, Dst, FMul, Op2);
    }
    return true;
  }
  case Intrinsic::memcpy:
  case Intrinsic::memmove:
  case Intrinsic::memset:
    return translateMemfunc(CI, MIRBuilder, ID);
  case Intrinsic::eh_typeid_for: {
    GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
    unsigned Reg = getOrCreateVReg(CI);
    unsigned TypeID = MF->getTypeIDFor(GV);
    MIRBuilder.buildConstant(Reg, TypeID);
    return true;
  }
  case Intrinsic::objectsize: {
    // If we don't know by now, we're never going to know.
    const ConstantInt *Min = cast<ConstantInt>(CI.getArgOperand(1));

    MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0);
    return true;
  }
  case Intrinsic::is_constant:
    // If this wasn't constant-folded away by now, then it's not a
    // constant.
    MIRBuilder.buildConstant(getOrCreateVReg(CI), 0);
    return true;
  case Intrinsic::stackguard:
    getStackGuard(getOrCreateVReg(CI), MIRBuilder);
    return true;
  case Intrinsic::stackprotector: {
    LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
    unsigned GuardVal = MRI->createGenericVirtualRegister(PtrTy);
    getStackGuard(GuardVal, MIRBuilder);

    AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
    MIRBuilder.buildStore(
        GuardVal, getOrCreateVReg(*Slot),
        *MF->getMachineMemOperand(
            MachinePointerInfo::getFixedStack(*MF,
                                              getOrCreateFrameIndex(*Slot)),
            MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
            PtrTy.getSizeInBits() / 8, 8));
    return true;
  }
  case Intrinsic::cttz:
  case Intrinsic::ctlz: {
    ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
    bool isTrailing = ID == Intrinsic::cttz;
    unsigned Opcode = isTrailing
                          ? Cst->isZero() ? TargetOpcode::G_CTTZ
                                          : TargetOpcode::G_CTTZ_ZERO_UNDEF
                          : Cst->isZero() ? TargetOpcode::G_CTLZ
                                          : TargetOpcode::G_CTLZ_ZERO_UNDEF;
    MIRBuilder.buildInstr(Opcode)
        .addDef(getOrCreateVReg(CI))
        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
    return true;
  }
  case Intrinsic::ctpop: {
    MIRBuilder.buildInstr(TargetOpcode::G_CTPOP)
        .addDef(getOrCreateVReg(CI))
        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
    return true;
  }
  case Intrinsic::invariant_start: {
    LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
    unsigned Undef = MRI->createGenericVirtualRegister(PtrTy);
    MIRBuilder.buildUndef(Undef);
    return true;
  }
  case Intrinsic::invariant_end:
    return true;
  }
  return false;
}

bool IRTranslator::translateInlineAsm(const CallInst &CI,
                                      MachineIRBuilder &MIRBuilder) {
  const InlineAsm &IA = cast<InlineAsm>(*CI.getCalledValue());
  if (!IA.getConstraintString().empty())
    return false;

  unsigned ExtraInfo = 0;
  if (IA.hasSideEffects())
    ExtraInfo |= InlineAsm::Extra_HasSideEffects;
  if (IA.getDialect() == InlineAsm::AD_Intel)
    ExtraInfo |= InlineAsm::Extra_AsmDialect;

  MIRBuilder.buildInstr(TargetOpcode::INLINEASM)
    .addExternalSymbol(IA.getAsmString().c_str())
    .addImm(ExtraInfo);

  return true;
}

unsigned IRTranslator::packRegs(const Value &V,
                                  MachineIRBuilder &MIRBuilder) {
  ArrayRef<unsigned> Regs = getOrCreateVRegs(V);
  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V);
  LLT BigTy = getLLTForType(*V.getType(), *DL);

  if (Regs.size() == 1)
    return Regs[0];

  unsigned Dst = MRI->createGenericVirtualRegister(BigTy);
  MIRBuilder.buildUndef(Dst);
  for (unsigned i = 0; i < Regs.size(); ++i) {
    unsigned NewDst = MRI->createGenericVirtualRegister(BigTy);
    MIRBuilder.buildInsert(NewDst, Dst, Regs[i], Offsets[i]);
    Dst = NewDst;
  }
  return Dst;
}

void IRTranslator::unpackRegs(const Value &V, unsigned Src,
                                MachineIRBuilder &MIRBuilder) {
  ArrayRef<unsigned> Regs = getOrCreateVRegs(V);
  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V);

  for (unsigned i = 0; i < Regs.size(); ++i)
    MIRBuilder.buildExtract(Regs[i], Src, Offsets[i]);
}

bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
  const CallInst &CI = cast<CallInst>(U);
  auto TII = MF->getTarget().getIntrinsicInfo();
  const Function *F = CI.getCalledFunction();

  // FIXME: support Windows dllimport function calls.
  if (F && F->hasDLLImportStorageClass())
    return false;

  if (CI.isInlineAsm())
    return translateInlineAsm(CI, MIRBuilder);

  Intrinsic::ID ID = Intrinsic::not_intrinsic;
  if (F && F->isIntrinsic()) {
    ID = F->getIntrinsicID();
    if (TII && ID == Intrinsic::not_intrinsic)
      ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
  }

  bool IsSplitType = valueIsSplit(CI);
  if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) {
    unsigned Res = IsSplitType ? MRI->createGenericVirtualRegister(
                                     getLLTForType(*CI.getType(), *DL))
                               : getOrCreateVReg(CI);

    SmallVector<unsigned, 8> Args;
    for (auto &Arg: CI.arg_operands())
      Args.push_back(packRegs(*Arg, MIRBuilder));

    MF->getFrameInfo().setHasCalls(true);
    bool Success = CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() {
      return getOrCreateVReg(*CI.getCalledValue());
    });

    if (IsSplitType)
      unpackRegs(CI, Res, MIRBuilder);
    return Success;
  }

  assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");

  if (translateKnownIntrinsic(CI, ID, MIRBuilder))
    return true;

  unsigned Res = 0;
  if (!CI.getType()->isVoidTy()) {
    if (IsSplitType)
      Res =
          MRI->createGenericVirtualRegister(getLLTForType(*CI.getType(), *DL));
    else
      Res = getOrCreateVReg(CI);
  }
  MachineInstrBuilder MIB =
      MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory());

  for (auto &Arg : CI.arg_operands()) {
    // Some intrinsics take metadata parameters. Reject them.
    if (isa<MetadataAsValue>(Arg))
      return false;
    MIB.addUse(packRegs(*Arg, MIRBuilder));
  }

  if (IsSplitType)
    unpackRegs(CI, Res, MIRBuilder);

  // Add a MachineMemOperand if it is a target mem intrinsic.
  const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
  TargetLowering::IntrinsicInfo Info;
  // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
  if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
    uint64_t Size = Info.memVT.getStoreSize();
    MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
                                               Info.flags, Size, Info.align));
  }

  return true;
}

bool IRTranslator::translateInvoke(const User &U,
                                   MachineIRBuilder &MIRBuilder) {
  const InvokeInst &I = cast<InvokeInst>(U);
  MCContext &Context = MF->getContext();

  const BasicBlock *ReturnBB = I.getSuccessor(0);
  const BasicBlock *EHPadBB = I.getSuccessor(1);

  const Value *Callee = I.getCalledValue();
  const Function *Fn = dyn_cast<Function>(Callee);
  if (isa<InlineAsm>(Callee))
    return false;

  // FIXME: support invoking patchpoint and statepoint intrinsics.
  if (Fn && Fn->isIntrinsic())
    return false;

  // FIXME: support whatever these are.
  if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
    return false;

  // FIXME: support Windows exception handling.
  if (!isa<LandingPadInst>(EHPadBB->front()))
    return false;

  // Emit the actual call, bracketed by EH_LABELs so that the MF knows about
  // the region covered by the try.
  MCSymbol *BeginSymbol = Context.createTempSymbol();
  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);

  unsigned Res =
        MRI->createGenericVirtualRegister(getLLTForType(*I.getType(), *DL));
  SmallVector<unsigned, 8> Args;
  for (auto &Arg: I.arg_operands())
    Args.push_back(packRegs(*Arg, MIRBuilder));

  if (!CLI->lowerCall(MIRBuilder, &I, Res, Args,
                      [&]() { return getOrCreateVReg(*I.getCalledValue()); }))
    return false;

  unpackRegs(I, Res, MIRBuilder);

  MCSymbol *EndSymbol = Context.createTempSymbol();
  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);

  // FIXME: track probabilities.
  MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB),
                    &ReturnMBB = getMBB(*ReturnBB);
  MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
  MIRBuilder.getMBB().addSuccessor(&ReturnMBB);
  MIRBuilder.getMBB().addSuccessor(&EHPadMBB);
  MIRBuilder.buildBr(ReturnMBB);

  return true;
}

bool IRTranslator::translateLandingPad(const User &U,
                                       MachineIRBuilder &MIRBuilder) {
  const LandingPadInst &LP = cast<LandingPadInst>(U);

  MachineBasicBlock &MBB = MIRBuilder.getMBB();

  MBB.setIsEHPad();

  // If there aren't registers to copy the values into (e.g., during SjLj
  // exceptions), then don't bother.
  auto &TLI = *MF->getSubtarget().getTargetLowering();
  const Constant *PersonalityFn = MF->getFunction().getPersonalityFn();
  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
      TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
    return true;

  // If landingpad's return type is token type, we don't create DAG nodes
  // for its exception pointer and selector value. The extraction of exception
  // pointer or selector value from token type landingpads is not currently
  // supported.
  if (LP.getType()->isTokenTy())
    return true;

  // Add a label to mark the beginning of the landing pad.  Deletion of the
  // landing pad can thus be detected via the MachineModuleInfo.
  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
    .addSym(MF->addLandingPad(&MBB));

  LLT Ty = getLLTForType(*LP.getType(), *DL);
  unsigned Undef = MRI->createGenericVirtualRegister(Ty);
  MIRBuilder.buildUndef(Undef);

  SmallVector<LLT, 2> Tys;
  for (Type *Ty : cast<StructType>(LP.getType())->elements())
    Tys.push_back(getLLTForType(*Ty, *DL));
  assert(Tys.size() == 2 && "Only two-valued landingpads are supported");

  // Mark exception register as live in.
  unsigned ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
  if (!ExceptionReg)
    return false;

  MBB.addLiveIn(ExceptionReg);
  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(LP);
  MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);

  unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
  if (!SelectorReg)
    return false;

  MBB.addLiveIn(SelectorReg);
  unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
  MIRBuilder.buildCopy(PtrVReg, SelectorReg);
  MIRBuilder.buildCast(ResRegs[1], PtrVReg);

  return true;
}

bool IRTranslator::translateAlloca(const User &U,
                                   MachineIRBuilder &MIRBuilder) {
  auto &AI = cast<AllocaInst>(U);

  if (AI.isSwiftError())
    return false;

  if (AI.isStaticAlloca()) {
    unsigned Res = getOrCreateVReg(AI);
    int FI = getOrCreateFrameIndex(AI);
    MIRBuilder.buildFrameIndex(Res, FI);
    return true;
  }

  // FIXME: support stack probing for Windows.
  if (MF->getTarget().getTargetTriple().isOSWindows())
    return false;

  // Now we're in the harder dynamic case.
  Type *Ty = AI.getAllocatedType();
  unsigned Align =
      std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI.getAlignment());

  unsigned NumElts = getOrCreateVReg(*AI.getArraySize());

  Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
  LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL);
  if (MRI->getType(NumElts) != IntPtrTy) {
    unsigned ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
    MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts);
    NumElts = ExtElts;
  }

  unsigned AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
  unsigned TySize =
      getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty)));
  MIRBuilder.buildMul(AllocSize, NumElts, TySize);

  LLT PtrTy = getLLTForType(*AI.getType(), *DL);
  auto &TLI = *MF->getSubtarget().getTargetLowering();
  unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();

  unsigned SPTmp = MRI->createGenericVirtualRegister(PtrTy);
  MIRBuilder.buildCopy(SPTmp, SPReg);

  unsigned AllocTmp = MRI->createGenericVirtualRegister(PtrTy);
  MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize);

  // Handle alignment. We have to realign if the allocation granule was smaller
  // than stack alignment, or the specific alloca requires more than stack
  // alignment.
  unsigned StackAlign =
      MF->getSubtarget().getFrameLowering()->getStackAlignment();
  Align = std::max(Align, StackAlign);
  if (Align > StackAlign || DL->getTypeAllocSize(Ty) % StackAlign != 0) {
    // Round the size of the allocation up to the stack alignment size
    // by add SA-1 to the size. This doesn't overflow because we're computing
    // an address inside an alloca.
    unsigned AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy);
    MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align));
    AllocTmp = AlignedAlloc;
  }

  MIRBuilder.buildCopy(SPReg, AllocTmp);
  MIRBuilder.buildCopy(getOrCreateVReg(AI), AllocTmp);

  MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI);
  assert(MF->getFrameInfo().hasVarSizedObjects());
  return true;
}

bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
  // FIXME: We may need more info about the type. Because of how LLT works,
  // we're completely discarding the i64/double distinction here (amongst
  // others). Fortunately the ABIs I know of where that matters don't use va_arg
  // anyway but that's not guaranteed.
  MIRBuilder.buildInstr(TargetOpcode::G_VAARG)
    .addDef(getOrCreateVReg(U))
    .addUse(getOrCreateVReg(*U.getOperand(0)))
    .addImm(DL->getABITypeAlignment(U.getType()));
  return true;
}

bool IRTranslator::translateInsertElement(const User &U,
                                          MachineIRBuilder &MIRBuilder) {
  // If it is a <1 x Ty> vector, use the scalar as it is
  // not a legal vector type in LLT.
  if (U.getType()->getVectorNumElements() == 1) {
    unsigned Elt = getOrCreateVReg(*U.getOperand(1));
    auto &Regs = *VMap.getVRegs(U);
    if (Regs.empty()) {
      Regs.push_back(Elt);
      VMap.getOffsets(U)->push_back(0);
    } else {
      MIRBuilder.buildCopy(Regs[0], Elt);
    }
    return true;
  }

  unsigned Res = getOrCreateVReg(U);
  unsigned Val = getOrCreateVReg(*U.getOperand(0));
  unsigned Elt = getOrCreateVReg(*U.getOperand(1));
  unsigned Idx = getOrCreateVReg(*U.getOperand(2));
  MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
  return true;
}

bool IRTranslator::translateExtractElement(const User &U,
                                           MachineIRBuilder &MIRBuilder) {
  // If it is a <1 x Ty> vector, use the scalar as it is
  // not a legal vector type in LLT.
  if (U.getOperand(0)->getType()->getVectorNumElements() == 1) {
    unsigned Elt = getOrCreateVReg(*U.getOperand(0));
    auto &Regs = *VMap.getVRegs(U);
    if (Regs.empty()) {
      Regs.push_back(Elt);
      VMap.getOffsets(U)->push_back(0);
    } else {
      MIRBuilder.buildCopy(Regs[0], Elt);
    }
    return true;
  }
  unsigned Res = getOrCreateVReg(U);
  unsigned Val = getOrCreateVReg(*U.getOperand(0));
  const auto &TLI = *MF->getSubtarget().getTargetLowering();
  unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
  unsigned Idx = 0;
  if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
    if (CI->getBitWidth() != PreferredVecIdxWidth) {
      APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
      auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
      Idx = getOrCreateVReg(*NewIdxCI);
    }
  }
  if (!Idx)
    Idx = getOrCreateVReg(*U.getOperand(1));
  if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
    const LLT &VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
    Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx)->getOperand(0).getReg();
  }
  MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
  return true;
}

bool IRTranslator::translateShuffleVector(const User &U,
                                          MachineIRBuilder &MIRBuilder) {
  MIRBuilder.buildInstr(TargetOpcode::G_SHUFFLE_VECTOR)
      .addDef(getOrCreateVReg(U))
      .addUse(getOrCreateVReg(*U.getOperand(0)))
      .addUse(getOrCreateVReg(*U.getOperand(1)))
      .addUse(getOrCreateVReg(*U.getOperand(2)));
  return true;
}

bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
  const PHINode &PI = cast<PHINode>(U);

  SmallVector<MachineInstr *, 4> Insts;
  for (auto Reg : getOrCreateVRegs(PI)) {
    auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, Reg);
    Insts.push_back(MIB.getInstr());
  }

  PendingPHIs.emplace_back(&PI, std::move(Insts));
  return true;
}

bool IRTranslator::translateAtomicCmpXchg(const User &U,
                                          MachineIRBuilder &MIRBuilder) {
  const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U);

  if (I.isWeak())
    return false;

  auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile
                              : MachineMemOperand::MONone;
  Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;

  Type *ResType = I.getType();
  Type *ValType = ResType->Type::getStructElementType(0);

  auto Res = getOrCreateVRegs(I);
  unsigned OldValRes = Res[0];
  unsigned SuccessRes = Res[1];
  unsigned Addr = getOrCreateVReg(*I.getPointerOperand());
  unsigned Cmp = getOrCreateVReg(*I.getCompareOperand());
  unsigned NewVal = getOrCreateVReg(*I.getNewValOperand());

  MIRBuilder.buildAtomicCmpXchgWithSuccess(
      OldValRes, SuccessRes, Addr, Cmp, NewVal,
      *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
                                Flags, DL->getTypeStoreSize(ValType),
                                getMemOpAlignment(I), AAMDNodes(), nullptr,
                                I.getSyncScopeID(), I.getSuccessOrdering(),
                                I.getFailureOrdering()));
  return true;
}

bool IRTranslator::translateAtomicRMW(const User &U,
                                      MachineIRBuilder &MIRBuilder) {
  const AtomicRMWInst &I = cast<AtomicRMWInst>(U);

  auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile
                              : MachineMemOperand::MONone;
  Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;

  Type *ResType = I.getType();

  unsigned Res = getOrCreateVReg(I);
  unsigned Addr = getOrCreateVReg(*I.getPointerOperand());
  unsigned Val = getOrCreateVReg(*I.getValOperand());

  unsigned Opcode = 0;
  switch (I.getOperation()) {
  default:
    llvm_unreachable("Unknown atomicrmw op");
    return false;
  case AtomicRMWInst::Xchg:
    Opcode = TargetOpcode::G_ATOMICRMW_XCHG;
    break;
  case AtomicRMWInst::Add:
    Opcode = TargetOpcode::G_ATOMICRMW_ADD;
    break;
  case AtomicRMWInst::Sub:
    Opcode = TargetOpcode::G_ATOMICRMW_SUB;
    break;
  case AtomicRMWInst::And:
    Opcode = TargetOpcode::G_ATOMICRMW_AND;
    break;
  case AtomicRMWInst::Nand:
    Opcode = TargetOpcode::G_ATOMICRMW_NAND;
    break;
  case AtomicRMWInst::Or:
    Opcode = TargetOpcode::G_ATOMICRMW_OR;
    break;
  case AtomicRMWInst::Xor:
    Opcode = TargetOpcode::G_ATOMICRMW_XOR;
    break;
  case AtomicRMWInst::Max:
    Opcode = TargetOpcode::G_ATOMICRMW_MAX;
    break;
  case AtomicRMWInst::Min:
    Opcode = TargetOpcode::G_ATOMICRMW_MIN;
    break;
  case AtomicRMWInst::UMax:
    Opcode = TargetOpcode::G_ATOMICRMW_UMAX;
    break;
  case AtomicRMWInst::UMin:
    Opcode = TargetOpcode::G_ATOMICRMW_UMIN;
    break;
  }

  MIRBuilder.buildAtomicRMW(
      Opcode, Res, Addr, Val,
      *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
                                Flags, DL->getTypeStoreSize(ResType),
                                getMemOpAlignment(I), AAMDNodes(), nullptr,
                                I.getSyncScopeID(), I.getOrdering()));
  return true;
}

void IRTranslator::finishPendingPhis() {
#ifndef NDEBUG
  DILocationVerifier Verifier(*MF);
#endif // ifndef NDEBUG
  for (auto &Phi : PendingPHIs) {
    const PHINode *PI = Phi.first;
    ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
    EntryBuilder.setDebugLoc(PI->getDebugLoc());
#ifndef NDEBUG
    Verifier.setCurrentInst(PI);
#endif // ifndef NDEBUG

    // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator
    // won't create extra control flow here, otherwise we need to find the
    // dominating predecessor here (or perhaps force the weirder IRTranslators
    // to provide a simple boundary).
    SmallSet<const BasicBlock *, 4> HandledPreds;

    for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
      auto IRPred = PI->getIncomingBlock(i);
      if (HandledPreds.count(IRPred))
        continue;

      HandledPreds.insert(IRPred);
      ArrayRef<unsigned> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
      for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
        assert(Pred->isSuccessor(ComponentPHIs[0]->getParent()) &&
               "incorrect CFG at MachineBasicBlock level");
        for (unsigned j = 0; j < ValRegs.size(); ++j) {
          MachineInstrBuilder MIB(*MF, ComponentPHIs[j]);
          MIB.addUse(ValRegs[j]);
          MIB.addMBB(Pred);
        }
      }
    }
  }
}

bool IRTranslator::valueIsSplit(const Value &V,
                                SmallVectorImpl<uint64_t> *Offsets) {
  SmallVector<LLT, 4> SplitTys;
  if (Offsets && !Offsets->empty())
    Offsets->clear();
  computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets);
  return SplitTys.size() > 1;
}

bool IRTranslator::translate(const Instruction &Inst) {
  CurBuilder.setDebugLoc(Inst.getDebugLoc());
  EntryBuilder.setDebugLoc(Inst.getDebugLoc());
  switch(Inst.getOpcode()) {
#define HANDLE_INST(NUM, OPCODE, CLASS) \
    case Instruction::OPCODE: return translate##OPCODE(Inst, CurBuilder);
#include "llvm/IR/Instruction.def"
  default:
    return false;
  }
}

bool IRTranslator::translate(const Constant &C, unsigned Reg) {
  if (auto CI = dyn_cast<ConstantInt>(&C))
    EntryBuilder.buildConstant(Reg, *CI);
  else if (auto CF = dyn_cast<ConstantFP>(&C))
    EntryBuilder.buildFConstant(Reg, *CF);
  else if (isa<UndefValue>(C))
    EntryBuilder.buildUndef(Reg);
  else if (isa<ConstantPointerNull>(C)) {
    // As we are trying to build a constant val of 0 into a pointer,
    // insert a cast to make them correct with respect to types.
    unsigned NullSize = DL->getTypeSizeInBits(C.getType());
    auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize);
    auto *ZeroVal = ConstantInt::get(ZeroTy, 0);
    unsigned ZeroReg = getOrCreateVReg(*ZeroVal);
    EntryBuilder.buildCast(Reg, ZeroReg);
  } else if (auto GV = dyn_cast<GlobalValue>(&C))
    EntryBuilder.buildGlobalValue(Reg, GV);
  else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
    if (!CAZ->getType()->isVectorTy())
      return false;
    // Return the scalar if it is a <1 x Ty> vector.
    if (CAZ->getNumElements() == 1)
      return translate(*CAZ->getElementValue(0u), Reg);
    std::vector<unsigned> Ops;
    for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
      Constant &Elt = *CAZ->getElementValue(i);
      Ops.push_back(getOrCreateVReg(Elt));
    }
    EntryBuilder.buildMerge(Reg, Ops);
  } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
    // Return the scalar if it is a <1 x Ty> vector.
    if (CV->getNumElements() == 1)
      return translate(*CV->getElementAsConstant(0), Reg);
    std::vector<unsigned> Ops;
    for (unsigned i = 0; i < CV->getNumElements(); ++i) {
      Constant &Elt = *CV->getElementAsConstant(i);
      Ops.push_back(getOrCreateVReg(Elt));
    }
    EntryBuilder.buildMerge(Reg, Ops);
  } else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
    switch(CE->getOpcode()) {
#define HANDLE_INST(NUM, OPCODE, CLASS)                         \
      case Instruction::OPCODE: return translate##OPCODE(*CE, EntryBuilder);
#include "llvm/IR/Instruction.def"
    default:
      return false;
    }
  } else if (auto CV = dyn_cast<ConstantVector>(&C)) {
    if (CV->getNumOperands() == 1)
      return translate(*CV->getOperand(0), Reg);
    SmallVector<unsigned, 4> Ops;
    for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
      Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
    }
    EntryBuilder.buildMerge(Reg, Ops);
  } else if (auto *BA = dyn_cast<BlockAddress>(&C)) {
    EntryBuilder.buildBlockAddress(Reg, BA);
  } else
    return false;

  return true;
}

void IRTranslator::finalizeFunction() {
  // Release the memory used by the different maps we
  // needed during the translation.
  PendingPHIs.clear();
  VMap.reset();
  FrameIndices.clear();
  MachinePreds.clear();
  // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
  // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
  // destroying it twice (in ~IRTranslator() and ~LLVMContext())
  EntryBuilder = MachineIRBuilder();
  CurBuilder = MachineIRBuilder();
}

bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
  MF = &CurMF;
  const Function &F = MF->getFunction();
  if (F.empty())
    return false;
  CLI = MF->getSubtarget().getCallLowering();
  CurBuilder.setMF(*MF);
  EntryBuilder.setMF(*MF);
  MRI = &MF->getRegInfo();
  DL = &F.getParent()->getDataLayout();
  TPC = &getAnalysis<TargetPassConfig>();
  ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);

  assert(PendingPHIs.empty() && "stale PHIs");

  if (!DL->isLittleEndian()) {
    // Currently we don't properly handle big endian code.
    OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                               F.getSubprogram(), &F.getEntryBlock());
    R << "unable to translate in big endian mode";
    reportTranslationError(*MF, *TPC, *ORE, R);
  }

  // Release the per-function state when we return, whether we succeeded or not.
  auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); });

  // Setup a separate basic-block for the arguments and constants
  MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
  MF->push_back(EntryBB);
  EntryBuilder.setMBB(*EntryBB);

  // Create all blocks, in IR order, to preserve the layout.
  for (const BasicBlock &BB: F) {
    auto *&MBB = BBToMBB[&BB];

    MBB = MF->CreateMachineBasicBlock(&BB);
    MF->push_back(MBB);

    if (BB.hasAddressTaken())
      MBB->setHasAddressTaken();
  }

  // Make our arguments/constants entry block fallthrough to the IR entry block.
  EntryBB->addSuccessor(&getMBB(F.front()));

  // Lower the actual args into this basic block.
  SmallVector<unsigned, 8> VRegArgs;
  for (const Argument &Arg: F.args()) {
    if (DL->getTypeStoreSize(Arg.getType()) == 0)
      continue; // Don't handle zero sized types.
    VRegArgs.push_back(
        MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL)));
  }

  // We don't currently support translating swifterror or swiftself functions.
  for (auto &Arg : F.args()) {
    if (Arg.hasSwiftErrorAttr() || Arg.hasSwiftSelfAttr()) {
      OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                                 F.getSubprogram(), &F.getEntryBlock());
      R << "unable to lower arguments due to swifterror/swiftself: "
        << ore::NV("Prototype", F.getType());
      reportTranslationError(*MF, *TPC, *ORE, R);
      return false;
    }
  }

  if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) {
    OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                               F.getSubprogram(), &F.getEntryBlock());
    R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
    reportTranslationError(*MF, *TPC, *ORE, R);
    return false;
  }

  auto ArgIt = F.arg_begin();
  for (auto &VArg : VRegArgs) {
    // If the argument is an unsplit scalar then don't use unpackRegs to avoid
    // creating redundant copies.
    if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) {
      auto &VRegs = *VMap.getVRegs(cast<Value>(*ArgIt));
      assert(VRegs.empty() && "VRegs already populated?");
      VRegs.push_back(VArg);
    } else {
      unpackRegs(*ArgIt, VArg, EntryBuilder);
    }
    ArgIt++;
  }

  // Need to visit defs before uses when translating instructions.
  {
    ReversePostOrderTraversal<const Function *> RPOT(&F);
#ifndef NDEBUG
    DILocationVerifier Verifier(*MF);
#endif // ifndef NDEBUG
    for (const BasicBlock *BB : RPOT) {
      MachineBasicBlock &MBB = getMBB(*BB);
      // Set the insertion point of all the following translations to
      // the end of this basic block.
      CurBuilder.setMBB(MBB);

      for (const Instruction &Inst : *BB) {
#ifndef NDEBUG
        Verifier.setCurrentInst(&Inst);
#endif // ifndef NDEBUG
        if (translate(Inst))
          continue;

        OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                                   Inst.getDebugLoc(), BB);
        R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);

        if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
          std::string InstStrStorage;
          raw_string_ostream InstStr(InstStrStorage);
          InstStr << Inst;

          R << ": '" << InstStr.str() << "'";
        }

        reportTranslationError(*MF, *TPC, *ORE, R);
        return false;
      }
    }
  }

  finishPendingPhis();

  // Merge the argument lowering and constants block with its single
  // successor, the LLVM-IR entry block.  We want the basic block to
  // be maximal.
  assert(EntryBB->succ_size() == 1 &&
         "Custom BB used for lowering should have only one successor");
  // Get the successor of the current entry block.
  MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
  assert(NewEntryBB.pred_size() == 1 &&
         "LLVM-IR entry block has a predecessor!?");
  // Move all the instruction from the current entry block to the
  // new entry block.
  NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
                    EntryBB->end());

  // Update the live-in information for the new entry block.
  for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
    NewEntryBB.addLiveIn(LiveIn);
  NewEntryBB.sortUniqueLiveIns();

  // Get rid of the now empty basic block.
  EntryBB->removeSuccessor(&NewEntryBB);
  MF->remove(EntryBB);
  MF->DeleteMachineBasicBlock(EntryBB);

  assert(&MF->front() == &NewEntryBB &&
         "New entry wasn't next in the list of basic block!");

  // Initialize stack protector information.
  StackProtector &SP = getAnalysis<StackProtector>();
  SP.copyToMachineFrameInfo(MF->getFrameInfo());

  return false;
}
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								//===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
+								//
 								//                     The LLVM Compiler Infrastructure
 								//
 								// This file is distributed under the University of Illinois Open Source
 								// License. See LICENSE.TXT for details.
 								//
 								//===----------------------------------------------------------------------===//
 								/// \file
 								/// This file implements the IRTranslator class.
 								//===----------------------------------------------------------------------===//
 								#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
-												[GlobalISel][IRTranslator] Use RPO traversal when visiting blocks to translate.

Previously we were just visiting the blocks in the function in IR order, which
is rather arbitrary. Therefore we wouldn't always visit defs before uses, but
the translation code relies on this assumption in some places.

Only codegen change seen in tests is an elision of a redundant copy.

Fixes PR38396

llvm-svn: 338476

											
										
										
											2018-08-01 10:17:42 +08:00
+								#include "llvm/ADT/PostOrderIterator.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/ADT/STLExtras.h"
-												[GlobalISel] Finalize translated function on scope exit. NFC.

This is the compromise between having a per-function IRTranslator
and manually managing the per-function state.

llvm-svn: 296046

											
										
										
											2017-02-24 07:57:28 +08:00
+								#include "llvm/ADT/ScopeExit.h"
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								#include "llvm/ADT/SmallSet.h"
-												[GlobalISel] Add the necessary plumbing to lower formal arguments.

llvm-svn: 260579

											
										
										
											2016-02-12 03:59:41 +08:00
+								#include "llvm/ADT/SmallVector.h"
-												Rename OptimizationDiagnosticInfo.* to OptimizationRemarkEmitter.*

Sync it up with the name of the class actually defined here.  This has been
bothering me for a while...

llvm-svn: 315249

											
										
										
											2017-10-10 07:19:02 +08:00
+								#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								#include "llvm/CodeGen/Analysis.h"
-												Sort the remaining #include lines in include/... and lib/....

I did this a long time ago with a janky python script, but now
clang-format has built-in support for this. I fed clang-format every
line with a #include and let it re-sort things according to the precise
LLVM rules for include ordering baked into clang-format these days.

I've reverted a number of files where the results of sorting includes
isn't healthy. Either places where we have legacy code relying on
particular include ordering (where possible, I'll fix these separately)
or where we have particular formatting around #include lines that
I didn't want to disturb in this patch.

This patch is *entirely* mechanical. If you get merge conflicts or
anything, just ignore the changes in this patch and run clang-format
over your #include lines in the files.

Sorry for any noise here, but it is important to keep these things
stable. I was seeing an increasing number of patches with irrelevant
re-ordering of #include lines because clang-format was used. This patch
at least isolates that churn, makes it easy to skip when resolving
conflicts, and gets us to a clean baseline (again).

llvm-svn: 304787

											
										
										
											2017-06-06 19:49:48 +08:00
+								#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/CodeGen/LowLevelType.h"
 								#include "llvm/CodeGen/MachineBasicBlock.h"
-												GlobalISel: implement alloca instruction

llvm-svn: 276433

											
										
										
											2016-07-23 00:59:52 +08:00
+								#include "llvm/CodeGen/MachineFrameInfo.h"
-												Sort the remaining #include lines in include/... and lib/....

I did this a long time ago with a janky python script, but now
clang-format has built-in support for this. I fed clang-format every
line with a #include and let it re-sort things according to the precise
LLVM rules for include ordering baked into clang-format these days.

I've reverted a number of files where the results of sorting includes
isn't healthy. Either places where we have legacy code relying on
particular include ordering (where possible, I'll fix these separately)
or where we have particular formatting around #include lines that
I didn't want to disturb in this patch.

This patch is *entirely* mechanical. If you get merge conflicts or
anything, just ignore the changes in this patch and run clang-format
over your #include lines in the files.

Sorry for any noise here, but it is important to keep these things
stable. I was seeing an increasing number of patches with irrelevant
re-ordering of #include lines because clang-format was used. This patch
at least isolates that churn, makes it easy to skip when resolving
conflicts, and gets us to a clean baseline (again).

llvm-svn: 304787

											
										
										
											2017-06-06 19:49:48 +08:00
+								#include "llvm/CodeGen/MachineFunction.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/CodeGen/MachineInstrBuilder.h"
 								#include "llvm/CodeGen/MachineMemOperand.h"
 								#include "llvm/CodeGen/MachineOperand.h"
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								#include "llvm/CodeGen/MachineRegisterInfo.h"
-												CodeGen: Remove pipeline dependencies on StackProtector; NFC

This re-applies r336929 with a fix to accomodate for the Mips target
scheduling multiple SelectionDAG instances into the pass pipeline.

PrologEpilogInserter and StackColoring depend on the StackProtector analysis
being alive from the point it is run until PEI, which requires that they are all
scheduled in the same FunctionPassManager. Inserting a (machine) ModulePass
between StackProtector and PEI results in these passes being in separate
FunctionPassManagers and the StackProtector is not available for PEI.

PEI and StackColoring don't use much information from the StackProtector pass,
so transfering the required information to MachineFrameInfo is cleaner than
keeping the StackProtector pass around. This commit moves the SSP layout
information to MFI instead of keeping it in the pass.

This patch set (D37580, D37581, D37582, D37583, D37584, D37585, D37586, D37587)
is a first draft of the pagerando implementation described in
http://lists.llvm.org/pipermail/llvm-dev/2017-June/113794.html.

Patch by Stephen Crane <sjc@immunant.com>

Differential Revision: https://reviews.llvm.org/D49256

llvm-svn: 336964

											
										
										
											2018-07-13 08:08:38 +08:00
+								#include "llvm/CodeGen/StackProtector.h"
-												Fix a bunch more layering of CodeGen headers that are in Target

All these headers already depend on CodeGen headers so moving them into
CodeGen fixes the layering (since CodeGen depends on Target, not the
other way around).

llvm-svn: 318490

											
										
										
											2017-11-17 09:07:10 +08:00
+								#include "llvm/CodeGen/TargetFrameLowering.h"
 								#include "llvm/CodeGen/TargetLowering.h"
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								#include "llvm/CodeGen/TargetPassConfig.h"
-												Fix a bunch more layering of CodeGen headers that are in Target

All these headers already depend on CodeGen headers so moving them into
CodeGen fixes the layering (since CodeGen depends on Target, not the
other way around).

llvm-svn: 318490

											
										
										
											2017-11-17 09:07:10 +08:00
+								#include "llvm/CodeGen/TargetRegisterInfo.h"
 								#include "llvm/CodeGen/TargetSubtargetInfo.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/IR/BasicBlock.h"
-												[GlobalISel][IRTranslator] Use RPO traversal when visiting blocks to translate.

Previously we were just visiting the blocks in the function in IR order, which
is rather arbitrary. Therefore we wouldn't always visit defs before uses, but
the translation code relies on this assumption in some places.

Only codegen change seen in tests is an elision of a redundant copy.

Fixes PR38396

llvm-svn: 338476

											
										
										
											2018-08-01 10:17:42 +08:00
+								#include "llvm/IR/CFG.h"
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								#include "llvm/IR/Constant.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/IR/Constants.h"
 								#include "llvm/IR/DataLayout.h"
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								#include "llvm/IR/DebugInfo.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/IR/DerivedTypes.h"
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
+								#include "llvm/IR/Function.h"
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
+								#include "llvm/IR/GetElementPtrTypeIterator.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/IR/InlineAsm.h"
 								#include "llvm/IR/InstrTypes.h"
 								#include "llvm/IR/Instructions.h"
-												GlobalISel: support translation of intrinsic calls.

These come in two variants for now: G_INTRINSIC and G_INTRINSIC_W_SIDE_EFFECTS.
We may decide to split the latter up with finer-grained restrictions later, if
necessary.

llvm-svn: 277224

											
										
										
											2016-07-30 06:32:36 +08:00
+								#include "llvm/IR/IntrinsicInst.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/IR/Intrinsics.h"
 								#include "llvm/IR/LLVMContext.h"
 								#include "llvm/IR/Metadata.h"
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								#include "llvm/IR/Type.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/IR/User.h"
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								#include "llvm/IR/Value.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/MC/MCContext.h"
 								#include "llvm/Pass.h"
 								#include "llvm/Support/Casting.h"
 								#include "llvm/Support/CodeGen.h"
 								#include "llvm/Support/Debug.h"
 								#include "llvm/Support/ErrorHandling.h"
 								#include "llvm/Support/LowLevelTypeImpl.h"
 								#include "llvm/Support/MathExtras.h"
 								#include "llvm/Support/raw_ostream.h"
-												GlobalISel: support translation of intrinsic calls.

These come in two variants for now: G_INTRINSIC and G_INTRINSIC_W_SIDE_EFFECTS.
We may decide to split the latter up with finer-grained restrictions later, if
necessary.

llvm-svn: 277224

											
										
										
											2016-07-30 06:32:36 +08:00
+								#include "llvm/Target/TargetIntrinsicInfo.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/Target/TargetMachine.h"
 								#include <algorithm>
 								#include <cassert>
 								#include <cstdint>
 								#include <iterator>
 								#include <string>
 								#include <utility>
 								#include <vector>
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
 								#define DEBUG_TYPE "irtranslator"
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
+								using namespace llvm;
 								char IRTranslator::ID = 0;
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
 								                false, false)
 								INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
 								INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
-												GlobalISel: remove redundant ';'s. NFC

llvm-svn: 276723

											
										
										
											2016-07-26 11:29:18 +08:00
+								                false, false)
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
-												[GlobalISel] Emit opt remarks on isel fallbacks.

Having more fine-grained information on the specific construct that
caused us to fallback is valuable for large-scale data collection.

We still have the fallback warning, that's also used for FastISel.
We still need to remove the fallback warning, and teach FastISel to also
emit remarks (it currently has a combination of the warning, stats, and
debug prints: the remarks could unify all three).

The abort-on-fallback path could also be better handled using remarks:
one could imagine a "-Rpass-error", analoguous to "-Werror", which would
promote missed/failed remarks to errors.  It's not clear whether that
would be useful for other remarks though, so we're not there yet.

llvm-svn: 296013

											
										
										
											2017-02-24 05:05:42 +08:00
+								static void reportTranslationError(MachineFunction &MF,
 								                                   const TargetPassConfig &TPC,
 								                                   OptimizationRemarkEmitter &ORE,
 								                                   OptimizationRemarkMissed &R) {
 								  MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
 								  // Print the function name explicitly if we don't have a debug location (which
 								  // makes the diagnostic less useful) or if we're going to emit a raw error.
 								  if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
 								    R << (" (in function: " + MF.getName() + ")").str();
 								  if (TPC.isGlobalISelAbortEnabled())
 								    report_fatal_error(R.getMsg());
 								  else
 								    ORE.emit(R);
-												GlobalISel: improve error diagnostics when IRTranslation fails.

llvm-svn: 286190

											
										
										
											2016-11-08 09:12:17 +08:00
+								}
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								IRTranslator::IRTranslator() : MachineFunctionPass(ID) {
-												[GlobalISel] Introduce initializer method to support start/stop-after features.

llvm-svn: 262896

											
										
										
											2016-03-08 09:38:55 +08:00
+								  initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
-												[GlobalISel][IRTranslator] Change the ownership of the MIRBuilder field.

llvm-svn: 260551

											
										
										
											2016-02-12 01:53:23 +08:00
+								}
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								#ifndef NDEBUG
 								/// Verify that every instruction created has the same DILocation as the
 								/// instruction being translated.
 								class DILocationVerifier : MachineFunction::Delegate {
 								  MachineFunction &MF;
 								  const Instruction *CurrInst = nullptr;
 								public:
 								  DILocationVerifier(MachineFunction &MF) : MF(MF) { MF.setDelegate(this); }
 								  ~DILocationVerifier() { MF.resetDelegate(this); }
 								  const Instruction *getCurrentInst() const { return CurrInst; }
 								  void setCurrentInst(const Instruction *Inst) { CurrInst = Inst; }
 								  void MF_HandleInsertion(const MachineInstr &MI) override {
 								    assert(getCurrentInst() && "Inserted instruction without a current MI");
 								    // Only print the check message if we're actually checking it.
 								#ifndef NDEBUG
 								    LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
 								                      << " was copied to " << MI);
 								#endif
 								    assert(CurrInst->getDebugLoc() == MI.getDebugLoc() &&
 								           "Line info was not transferred to all instructions");
 								  }
 								  void MF_HandleRemoval(const MachineInstr &MI) override {}
 								};
 								#endif // ifndef NDEBUG
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
-												CodeGen: Remove pipeline dependencies on StackProtector; NFC

This re-applies r336929 with a fix to accomodate for the Mips target
scheduling multiple SelectionDAG instances into the pass pipeline.

PrologEpilogInserter and StackColoring depend on the StackProtector analysis
being alive from the point it is run until PEI, which requires that they are all
scheduled in the same FunctionPassManager. Inserting a (machine) ModulePass
between StackProtector and PEI results in these passes being in separate
FunctionPassManagers and the StackProtector is not available for PEI.

PEI and StackColoring don't use much information from the StackProtector pass,
so transfering the required information to MachineFrameInfo is cleaner than
keeping the StackProtector pass around. This commit moves the SSP layout
information to MFI instead of keeping it in the pass.

This patch set (D37580, D37581, D37582, D37583, D37584, D37585, D37586, D37587)
is a first draft of the pagerando implementation described in
http://lists.llvm.org/pipermail/llvm-dev/2017-June/113794.html.

Patch by Stephen Crane <sjc@immunant.com>

Differential Revision: https://reviews.llvm.org/D49256

llvm-svn: 336964

											
										
										
											2018-07-13 08:08:38 +08:00
+								  AU.addRequired<StackProtector>();
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								  AU.addRequired<TargetPassConfig>();
-												CodeGen: Remove pipeline dependencies on StackProtector; NFC

This re-applies r336929 with a fix to accomodate for the Mips target
scheduling multiple SelectionDAG instances into the pass pipeline.

PrologEpilogInserter and StackColoring depend on the StackProtector analysis
being alive from the point it is run until PEI, which requires that they are all
scheduled in the same FunctionPassManager. Inserting a (machine) ModulePass
between StackProtector and PEI results in these passes being in separate
FunctionPassManagers and the StackProtector is not available for PEI.

PEI and StackColoring don't use much information from the StackProtector pass,
so transfering the required information to MachineFrameInfo is cleaner than
keeping the StackProtector pass around. This commit moves the SSP layout
information to MFI instead of keeping it in the pass.

This patch set (D37580, D37581, D37582, D37583, D37584, D37585, D37586, D37587)
is a first draft of the pagerando implementation described in
http://lists.llvm.org/pipermail/llvm-dev/2017-June/113794.html.

Patch by Stephen Crane <sjc@immunant.com>

Differential Revision: https://reviews.llvm.org/D49256

llvm-svn: 336964

											
										
										
											2018-07-13 08:08:38 +08:00
+								  getSelectionDAGFallbackAnalysisUsage(AU);
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								  MachineFunctionPass::getAnalysisUsage(AU);
 								}
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								static void computeValueLLTs(const DataLayout &DL, Type &Ty,
 								                             SmallVectorImpl<LLT> &ValueTys,
 								                             SmallVectorImpl<uint64_t> *Offsets = nullptr,
 								                             uint64_t StartingOffset = 0) {
 								  // Given a struct type, recursively traverse the elements.
 								  if (StructType *STy = dyn_cast<StructType>(&Ty)) {
 								    const StructLayout *SL = DL.getStructLayout(STy);
 								    for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I)
 								      computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets,
 								                       StartingOffset + SL->getElementOffset(I));
 								    return;
 								  }
 								  // Given an array type, recursively traverse the elements.
 								  if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) {
 								    Type *EltTy = ATy->getElementType();
 								    uint64_t EltSize = DL.getTypeAllocSize(EltTy);
 								    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
 								      computeValueLLTs(DL, *EltTy, ValueTys, Offsets,
 								                       StartingOffset + i * EltSize);
 								    return;
 								  }
 								  // Interpret void as zero return values.
 								  if (Ty.isVoidTy())
 								    return;
 								  // Base case: we can get an LLT for this LLVM IR type.
 								  ValueTys.push_back(getLLTForType(Ty, DL));
 								  if (Offsets != nullptr)
 								    Offsets->push_back(StartingOffset * 8);
 								}
-												GlobalISel: rework getOrCreateVReg to avoid double lookup. NFC.

Thanks to Quentin for suggesting the refactoring.

llvm-svn: 293087

											
										
										
											2017-01-26 04:58:22 +08:00
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								IRTranslator::ValueToVRegInfo::VRegListT &
 								IRTranslator::allocateVRegs(const Value &Val) {
 								  assert(!VMap.contains(Val) && "Value already allocated in VMap");
 								  auto *Regs = VMap.getVRegs(Val);
 								  auto *Offsets = VMap.getOffsets(Val);
 								  SmallVector<LLT, 4> SplitTys;
 								  computeValueLLTs(*DL, *Val.getType(), SplitTys,
 								                   Offsets->empty() ? Offsets : nullptr);
 								  for (unsigned i = 0; i < SplitTys.size(); ++i)
 								    Regs->push_back(0);
 								  return *Regs;
 								}
 								ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) {
 								  auto VRegsIt = VMap.findVRegs(Val);
 								  if (VRegsIt != VMap.vregs_end())
 								    return *VRegsIt->second;
 								  if (Val.getType()->isVoidTy())
 								    return *VMap.getVRegs(Val);
 								  // Create entry for this type.
 								  auto *VRegs = VMap.getVRegs(Val);
 								  auto *Offsets = VMap.getOffsets(Val);
-												GlobalISel: rework getOrCreateVReg to avoid double lookup. NFC.

Thanks to Quentin for suggesting the refactoring.

llvm-svn: 293087

											
										
										
											2017-01-26 04:58:22 +08:00
 								  assert(Val.getType()->isSized() &&
 								         "Don't know how to create an empty vreg");
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  SmallVector<LLT, 4> SplitTys;
 								  computeValueLLTs(*DL, *Val.getType(), SplitTys,
 								                   Offsets->empty() ? Offsets : nullptr);
 								  if (!isa<Constant>(Val)) {
 								    for (auto Ty : SplitTys)
 								      VRegs->push_back(MRI->createGenericVirtualRegister(Ty));
 								    return *VRegs;
 								  }
 								  if (Val.getType()->isAggregateType()) {
 								    // UndefValue, ConstantAggregateZero
 								    auto &C = cast<Constant>(Val);
 								    unsigned Idx = 0;
 								    while (auto Elt = C.getAggregateElement(Idx++)) {
 								      auto EltRegs = getOrCreateVRegs(*Elt);
-												Use llvm::copy. NFC

llvm-svn: 347126

											
										
										
											2018-11-17 09:44:25 +08:00
+								      llvm::copy(EltRegs, std::back_inserter(*VRegs));
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    }
 								  } else {
 								    assert(SplitTys.size() == 1 && "unexpectedly split LLT");
 								    VRegs->push_back(MRI->createGenericVirtualRegister(SplitTys[0]));
 								    bool Success = translate(cast<Constant>(Val), VRegs->front());
-												GlobalISel: rework getOrCreateVReg to avoid double lookup. NFC.

Thanks to Quentin for suggesting the refactoring.

llvm-svn: 293087

											
										
										
											2017-01-26 04:58:22 +08:00
+								    if (!Success) {
-												[GlobalISel] Emit opt remarks on isel fallbacks.

Having more fine-grained information on the specific construct that
caused us to fallback is valuable for large-scale data collection.

We still have the fallback warning, that's also used for FastISel.
We still need to remove the fallback warning, and teach FastISel to also
emit remarks (it currently has a combination of the warning, stats, and
debug prints: the remarks could unify all three).

The abort-on-fallback path could also be better handled using remarks:
one could imagine a "-Rpass-error", analoguous to "-Werror", which would
promote missed/failed remarks to errors.  It's not clear whether that
would be useful for other remarks though, so we're not there yet.

llvm-svn: 296013

											
										
										
											2017-02-24 05:05:42 +08:00
+								      OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
-												MachineFunction: Return reference from getFunction(); NFC

The Function can never be nullptr so we can return a reference.

llvm-svn: 320884

											
										
										
											2017-12-16 06:22:58 +08:00
+								                                 MF->getFunction().getSubprogram(),
 								                                 &MF->getFunction().getEntryBlock());
-												[GlobalISel] Emit opt remarks on isel fallbacks.

Having more fine-grained information on the specific construct that
caused us to fallback is valuable for large-scale data collection.

We still have the fallback warning, that's also used for FastISel.
We still need to remove the fallback warning, and teach FastISel to also
emit remarks (it currently has a combination of the warning, stats, and
debug prints: the remarks could unify all three).

The abort-on-fallback path could also be better handled using remarks:
one could imagine a "-Rpass-error", analoguous to "-Werror", which would
promote missed/failed remarks to errors.  It's not clear whether that
would be useful for other remarks though, so we're not there yet.

llvm-svn: 296013

											
										
										
											2017-02-24 05:05:42 +08:00
+								      R << "unable to translate constant: " << ore::NV("Type", Val.getType());
 								      reportTranslationError(*MF, *TPC, *ORE, R);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								      return *VRegs;
-												GlobalISel: first translation support for Constants.

For now put them all in the entry block. This should be correct but may give
poor runtime performance. Hopefully MachineSinking combined with
isReMaterializable can solve those issues, but if not the interface is sound
enough to support alternatives.

llvm-svn: 278168

											
										
										
											2016-08-10 05:28:04 +08:00
+								    }
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								  }
-												GlobalISel: prevent heap use-after-free when looking up VReg.

Translating the constant can create more VRegs, which can invalidate the
reference into the DenseMap. So we have to look up the value again after all
that's happened.

llvm-svn: 292675

											
										
										
											2017-01-21 07:25:17 +08:00
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  return *VRegs;
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								}
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
 								  if (FrameIndices.find(&AI) != FrameIndices.end())
 								    return FrameIndices[&AI];
 								  unsigned ElementSize = DL->getTypeStoreSize(AI.getAllocatedType());
 								  unsigned Size =
 								      ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();
 								  // Always allocate at least one byte.
 								  Size = std::max(Size, 1u);
 								  unsigned Alignment = AI.getAlignment();
 								  if (!Alignment)
 								    Alignment = DL->getABITypeAlignment(AI.getAllocatedType());
 								  int &FI = FrameIndices[&AI];
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  FI = MF->getFrameInfo().CreateStackObject(Size, Alignment, false, &AI);
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								  return FI;
 								}
-												GlobalISel: add generic load and store instructions.

Pretty straightforward, the only oddity is the MachineMemOperand (which it's
surprisingly difficult to share code for).

llvm-svn: 276799

											
										
										
											2016-07-27 04:23:26 +08:00
+								unsigned IRTranslator::getMemOpAlignment(const Instruction &I) {
 								  unsigned Alignment = 0;
 								  Type *ValTy = nullptr;
 								  if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) {
 								    Alignment = SI->getAlignment();
 								    ValTy = SI->getValueOperand()->getType();
 								  } else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
 								    Alignment = LI->getAlignment();
 								    ValTy = LI->getType();
-												[globalisel][irtranslator] Add support for atomicrmw and (strong) cmpxchg

Summary:
This patch adds support for the atomicrmw instructions and the strong
cmpxchg instruction to the IRTranslator.

I've left out weak cmpxchg because LangRef.rst isn't entirely clear on what
difference it makes to the backend. As far as I can tell from the code, it
only matters to AtomicExpandPass which is run at the LLVM-IR level.

Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, javed.absar

Reviewed By: qcolombet

Subscribers: kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D40092

llvm-svn: 336589

											
										
										
											2018-07-10 03:33:40 +08:00
+								  } else if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
 								    // TODO(PR27168): This instruction has no alignment attribute, but unlike
 								    // the default alignment for load/store, the default here is to assume
 								    // it has NATURAL alignment, not DataLayout-specified alignment.
 								    const DataLayout &DL = AI->getModule()->getDataLayout();
 								    Alignment = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
 								    ValTy = AI->getCompareOperand()->getType();
 								  } else if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
 								    // TODO(PR27168): This instruction has no alignment attribute, but unlike
 								    // the default alignment for load/store, the default here is to assume
 								    // it has NATURAL alignment, not DataLayout-specified alignment.
 								    const DataLayout &DL = AI->getModule()->getDataLayout();
 								    Alignment = DL.getTypeStoreSize(AI->getValOperand()->getType());
 								    ValTy = AI->getType();
-												[GlobalISel] Emit opt remarks on isel fallbacks.

Having more fine-grained information on the specific construct that
caused us to fallback is valuable for large-scale data collection.

We still have the fallback warning, that's also used for FastISel.
We still need to remove the fallback warning, and teach FastISel to also
emit remarks (it currently has a combination of the warning, stats, and
debug prints: the remarks could unify all three).

The abort-on-fallback path could also be better handled using remarks:
one could imagine a "-Rpass-error", analoguous to "-Werror", which would
promote missed/failed remarks to errors.  It's not clear whether that
would be useful for other remarks though, so we're not there yet.

llvm-svn: 296013

											
										
										
											2017-02-24 05:05:42 +08:00
+								  } else {
 								    OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
 								    R << "unable to translate memop: " << ore::NV("Opcode", &I);
 								    reportTranslationError(*MF, *TPC, *ORE, R);
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								    return 1;
-												[GlobalISel] Emit opt remarks on isel fallbacks.

Having more fine-grained information on the specific construct that
caused us to fallback is valuable for large-scale data collection.

We still have the fallback warning, that's also used for FastISel.
We still need to remove the fallback warning, and teach FastISel to also
emit remarks (it currently has a combination of the warning, stats, and
debug prints: the remarks could unify all three).

The abort-on-fallback path could also be better handled using remarks:
one could imagine a "-Rpass-error", analoguous to "-Werror", which would
promote missed/failed remarks to errors.  It's not clear whether that
would be useful for other remarks though, so we're not there yet.

llvm-svn: 296013

											
										
										
											2017-02-24 05:05:42 +08:00
+								  }
-												GlobalISel: add generic load and store instructions.

Pretty straightforward, the only oddity is the MachineMemOperand (which it's
surprisingly difficult to share code for).

llvm-svn: 276799

											
										
										
											2016-07-27 04:23:26 +08:00
 								  return Alignment ? Alignment : DL->getABITypeAlignment(ValTy);
 								}
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								MachineBasicBlock &IRTranslator::getMBB(const BasicBlock &BB) {
-												[IRTranslator] Update getOrCreateBB API to use references.
A null basic block is invalid, so just pass a reference.

llvm-svn: 263260

											
										
										
											2016-03-12 01:27:43 +08:00
+								  MachineBasicBlock *&MBB = BBToMBB[&BB];
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								  assert(MBB && "BasicBlock was not encountered before");
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								  return *MBB;
 								}
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
 								  assert(NewPred && "new predecessor must be a real MachineBasicBlock");
 								  MachinePreds[Edge].push_back(NewPred);
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
 								                                     MachineIRBuilder &MIRBuilder) {
-												GlobalISel: make translate* functions take the most specialized class possible.

NFC.

llvm-svn: 277188

											
										
										
											2016-07-30 02:11:21 +08:00
+								  // FIXME: handle signed/unsigned wrapping flags.
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
+								  // Get or create a virtual register for each value.
 								  // Unless the value is a Constant => loadimm cst?
 								  // or inline constant each time?
 								  // Creation of a virtual register needs to have a size.
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								  unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
 								  unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
 								  unsigned Res = getOrCreateVReg(U);
-												Copy utilities updated and added for MI flags

Summary: This patch adds a GlobalIsel copy utility into MI for flags and updates the instruction emitter for the SDAG path.  Some tests show new behavior and I added one for GlobalIsel which mirrors an SDAG test for handling nsw/nuw.

Reviewers: spatel, wristow, arsenm

Reviewed By: arsenm

Subscribers: wdng

Differential Revision: https://reviews.llvm.org/D52006

llvm-svn: 342576

											
										
										
											2018-09-20 02:52:08 +08:00
+								  auto FBinOp = MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
 								  if (isa<Instruction>(U)) {
 								    MachineInstr *FBinOpMI = FBinOp.getInstr();
 								    const Instruction &I = cast<Instruction>(U);
 								    FBinOpMI->copyIRFlags(I);
 								  }
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								  return true;
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
+								}
-												[GlobalISel] Translate floating-point negation

Reviewers: qcolombet, javed.absar, aditya_nandakumar, dsanders, t.p.northover, ab

Reviewed By: qcolombet

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30671

llvm-svn: 297171

											
										
										
											2017-03-08 02:03:28 +08:00
+								bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
 								  // -0.0 - X --> G_FNEG
 								  if (isa<Constant>(U.getOperand(0)) &&
 								      U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) {
 								    MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
 								        .addDef(getOrCreateVReg(U))
 								        .addUse(getOrCreateVReg(*U.getOperand(1)));
 								    return true;
 								  }
 								  return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
 								}
-												[IR] Add a dedicated FNeg IR Instruction

The IEEE-754 Standard makes it clear that fneg(x) and
fsub(-0.0, x) are two different operations. The former is a bitwise
operation, while the latter is an arithmetic operation. This patch
creates a dedicated FNeg IR Instruction to model that behavior.

Differential Revision: https://reviews.llvm.org/D53877

llvm-svn: 346774

											
										
										
											2018-11-14 02:15:47 +08:00
+								bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
 								  MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
 								      .addDef(getOrCreateVReg(U))
 								      .addUse(getOrCreateVReg(*U.getOperand(1)));
 								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateCompare(const User &U,
 								                                    MachineIRBuilder &MIRBuilder) {
-												GlobalISel: translate floating-point comparisons

llvm-svn: 279319

											
										
										
											2016-08-20 04:48:16 +08:00
+								  const CmpInst *CI = dyn_cast<CmpInst>(&U);
 								  unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
 								  unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
 								  unsigned Res = getOrCreateVReg(U);
 								  CmpInst::Predicate Pred =
 								      CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
 								                                    cast<ConstantExpr>(U).getPredicate());
 								  if (CmpInst::isIntPredicate(Pred))
-												GlobalISel: move type information to MachineRegisterInfo.

We want each register to have a canonical type, which means the best place to
store this is in MachineRegisterInfo rather than on every MachineInstr that
happens to use or define that register.

Most changes following from this are pretty simple (you need an MRI anyway if
you're going to be doing any transformations, so just check the type there).
But legalization doesn't really want to check redundant operands (when, for
example, a G_ADD only ever has one type) so I've made use of MCInstrDesc's
operand type field to encode these constraints and limit legalization's work.

As an added bonus, more validation is possible, both in MachineVerifier and
MachineIRBuilder (coming soon).

llvm-svn: 281035

											
										
										
											2016-09-09 19:46:34 +08:00
+								    MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
-												GlobalISel: correctly handle trivial fcmp predicates.

It makes sense to only do them once in IRTranslator rather than making everyone
deal with them.

llvm-svn: 297304

											
										
										
											2017-03-09 02:49:54 +08:00
+								  else if (Pred == CmpInst::FCMP_FALSE)
-												[GlobalISel] Avoid translating synthetic constants to new G_CONSTANTS.

Currently, we create a G_CONSTANT for every "synthetic" integer
constant operand (for instance, for the G_GEP offset).
Instead, share the G_CONSTANTs we might have created by going through
the ValueToVReg machinery.

When we're emitting synthetic constants, we do need to get Constants from
the context.  One could argue that we shouldn't modify the context at
all (for instance, this means that we're going to use a tad more memory
if the constant wasn't used elsewhere), but constants are mostly
harmless.  We currently do this for extractvalue and all.

For constant fcmp, this does mean we'll emit an extra COPY, which is not
necessarily more optimal than an extra materialized constant.
But that preserves the current intended design of uniqued G_CONSTANTs,
and the rematerialization problem exists elsewhere and should be
resolved with a single coherent solution.

llvm-svn: 297875

											
										
										
											2017-03-16 03:21:11 +08:00
+								    MIRBuilder.buildCopy(
 								        Res, getOrCreateVReg(*Constant::getNullValue(CI->getType())));
 								  else if (Pred == CmpInst::FCMP_TRUE)
 								    MIRBuilder.buildCopy(
 								        Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType())));
-												GlobalISel: translate floating-point comparisons

llvm-svn: 279319

											
										
										
											2016-08-20 04:48:16 +08:00
+								  else
-												GlobalISel: move type information to MachineRegisterInfo.

We want each register to have a canonical type, which means the best place to
store this is in MachineRegisterInfo rather than on every MachineInstr that
happens to use or define that register.

Most changes following from this are pretty simple (you need an MRI anyway if
you're going to be doing any transformations, so just check the type there).
But legalization doesn't really want to check redundant operands (when, for
example, a G_ADD only ever has one type) so I've made use of MCInstrDesc's
operand type field to encode these constraints and limit legalization's work.

As an added bonus, more validation is possible, both in MachineVerifier and
MachineIRBuilder (coming soon).

llvm-svn: 281035

											
										
										
											2016-09-09 19:46:34 +08:00
+								    MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
-												GlobalISel: translate floating-point comparisons

llvm-svn: 279319

											
										
										
											2016-08-20 04:48:16 +08:00
-												GlobalISel: support irtranslation of icmp instructions.

llvm-svn: 278969

											
										
										
											2016-08-18 04:25:25 +08:00
+								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								  const ReturnInst &RI = cast<ReturnInst>(U);
-												GlobalISel: make translate* functions take the most specialized class possible.

NFC.

llvm-svn: 277188

											
										
										
											2016-07-30 02:11:21 +08:00
+								  const Value *Ret = RI.getReturnValue();
-												[GlobalISel][IRTranslator] Fix crash during translation of zero sized loads/stores/args/returns.

This fixes PR35358.

rdar://35619533

Differential Revision: https://reviews.llvm.org/D40604

llvm-svn: 319465

											
										
										
											2017-12-01 04:06:02 +08:00
+								  if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
 								    Ret = nullptr;
-												[GlobalISel] Rewrite CallLowering::lowerReturn to accept multiple VRegs per Value

This is logical continuation of https://reviews.llvm.org/D46018 (r332449)

Differential Revision: https://reviews.llvm.org/D49660

llvm-svn: 338685

											
										
										
											2018-08-02 16:33:31 +08:00
 								  ArrayRef<unsigned> VRegs;
 								  if (Ret)
 								    VRegs = getOrCreateVRegs(*Ret);
-												[GlobalISel] Teach the IRTranslator how to lower returns.

llvm-svn: 260562

											
										
										
											2016-02-12 02:53:28 +08:00
+								  // The target may mess up with the insertion point, but
 								  // this is not important as a return is the last instruction
 								  // of the block anyway.
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
-												[GlobalISel] Rewrite CallLowering::lowerReturn to accept multiple VRegs per Value

This is logical continuation of https://reviews.llvm.org/D46018 (r332449)

Differential Revision: https://reviews.llvm.org/D49660

llvm-svn: 338685

											
										
										
											2018-08-02 16:33:31 +08:00
+								  return CLI->lowerReturn(MIRBuilder, Ret, VRegs);
-												[GlobalISel] Teach the IRTranslator how to lower returns.

llvm-svn: 260562

											
										
										
											2016-02-12 02:53:28 +08:00
+								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								  const BranchInst &BrInst = cast<BranchInst>(U);
-												GlobalISel: add generic conditional branch.

Just the basic equivalent to DAG's condbr for now, we'll get to things like
br_cc when we start doing more legalization.

llvm-svn: 277184

											
										
										
											2016-07-30 01:58:00 +08:00
+								  unsigned Succ = 0;
 								  if (!BrInst.isUnconditional()) {
 								    // We want a G_BRCOND to the true BB followed by an unconditional branch.
 								    unsigned Tst = getOrCreateVReg(*BrInst.getCondition());
 								    const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++));
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								    MachineBasicBlock &TrueBB = getMBB(TrueTgt);
-												GlobalISel: move type information to MachineRegisterInfo.

We want each register to have a canonical type, which means the best place to
store this is in MachineRegisterInfo rather than on every MachineInstr that
happens to use or define that register.

Most changes following from this are pretty simple (you need an MRI anyway if
you're going to be doing any transformations, so just check the type there).
But legalization doesn't really want to check redundant operands (when, for
example, a G_ADD only ever has one type) so I've made use of MCInstrDesc's
operand type field to encode these constraints and limit legalization's work.

As an added bonus, more validation is possible, both in MachineVerifier and
MachineIRBuilder (coming soon).

llvm-svn: 281035

											
										
										
											2016-09-09 19:46:34 +08:00
+								    MIRBuilder.buildBrCond(Tst, TrueBB);
-												[IRTranslator] Translate unconditional branches.

llvm-svn: 263265

											
										
										
											2016-03-12 01:28:03 +08:00
+								  }
-												GlobalISel: add generic conditional branch.

Just the basic equivalent to DAG's condbr for now, we'll get to things like
br_cc when we start doing more legalization.

llvm-svn: 277184

											
										
										
											2016-07-30 01:58:00 +08:00
 								  const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ));
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								  MachineBasicBlock &TgtBB = getMBB(BrTgt);
-												[GlobalISel] Don't translate br to layout successor.

MI can represent fallthrough to layout successor blocks, and our
post-isel representation uses that extensively.

We might as well use it too, to avoid translating and carrying along
unnecessary branches.

llvm-svn: 298459

											
										
										
											2017-03-22 07:42:50 +08:00
+								  MachineBasicBlock &CurBB = MIRBuilder.getMBB();
 								  // If the unconditional target is the layout successor, fallthrough.
 								  if (!CurBB.isLayoutSuccessor(&TgtBB))
 								    MIRBuilder.buildBr(TgtBB);
-												GlobalISel: add generic conditional branch.

Just the basic equivalent to DAG's condbr for now, we'll get to things like
br_cc when we start doing more legalization.

llvm-svn: 277184

											
										
										
											2016-07-30 01:58:00 +08:00
-												[IRTranslator] Translate unconditional branches.

llvm-svn: 263265

											
										
										
											2016-03-12 01:28:03 +08:00
+								  // Link successors.
-												[IR] Begin removal of TerminatorInst by removing successor manipulation.

The core get and set routines move to the `Instruction` class. These
routines are only valid to call on instructions which are terminators.

The iterator and *generic* range based access move to `CFG.h` where all
the other generic successor and predecessor access lives. While moving
the iterator here, simplify it using the iterator utilities LLVM
provides and updates coding style as much as reasonable. The APIs remain
pointer-heavy when they could better use references, and retain the odd
behavior of `operator*` and `operator->` that is common in LLVM
iterators. Adjusting this API, if desired, should be a follow-up step.

Non-generic range iteration is added for the two instructions where
there is an especially easy mechanism and where there was code
attempting to use the range accessor from a specific subclass:
`indirectbr` and `br`. In both cases, the successors are contiguous
operands and can be easily iterated via the operand list.

This is the first major patch in removing the `TerminatorInst` type from
the IR's instruction type hierarchy. This change was discussed in an RFC
here and was pretty clearly positive:
http://lists.llvm.org/pipermail/llvm-dev/2018-May/123407.html

There will be a series of much more mechanical changes following this
one to complete this move.

Differential Revision: https://reviews.llvm.org/D47467

llvm-svn: 340698

											
										
										
											2018-08-26 16:41:15 +08:00
+								  for (const BasicBlock *Succ : successors(&BrInst))
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								    CurBB.addSuccessor(&getMBB(*Succ));
-												[IRTranslator] Translate unconditional branches.

llvm-svn: 263265

											
										
										
											2016-03-12 01:28:03 +08:00
+								  return true;
 								}
-												[GlobalISel] Add support for switch statements

This commit does this using a trivial chain of conditional branches.  In the
future, we probably want to reuse the optimized switch lowering used in
SelectionDAG.

Differential Revision: https://reviews.llvm.org/D28176

llvm-svn: 291099

											
										
										
											2017-01-05 19:28:51 +08:00
+								bool IRTranslator::translateSwitch(const User &U,
 								                                   MachineIRBuilder &MIRBuilder) {
 								  // For now, just translate as a chain of conditional branches.
 								  // FIXME: could we share most of the logic/code in
 								  // SelectionDAGBuilder::visitSwitch between SelectionDAG and GlobalISel?
 								  // At first sight, it seems most of the logic in there is independent of
 								  // SelectionDAG-specifics and a lot of work went in to optimize switch
 								  // lowering in there.
 								  const SwitchInst &SwInst = cast<SwitchInst>(U);
 								  const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition());
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								  const BasicBlock *OrigBB = SwInst.getParent();
-												[GlobalISel] Add support for switch statements

This commit does this using a trivial chain of conditional branches.  In the
future, we probably want to reuse the optimized switch lowering used in
SelectionDAG.

Differential Revision: https://reviews.llvm.org/D28176

llvm-svn: 291099

											
										
										
											2017-01-05 19:28:51 +08:00
-												Recommit: [globalisel] Change LLT constructor string into an LLT-based object that knows how to generate it.

Summary:
This will allow future patches to inspect the details of the LLT. The implementation is now split between
the Support and CodeGen libraries to allow TableGen to use this class without introducing layering concerns.

Thanks to Ahmed Bougacha for finding a reasonable way to avoid the layering issue and providing the version of this patch without that problem.

The problem with the previous commit appears to have been that TableGen was including CodeGen/LowLevelType.h instead of Support/LowLevelTypeImpl.h.

Reviewers: t.p.northover, qcolombet, rovka, aditya_nandakumar, ab, javed.absar

Subscribers: arsenm, nhaehnle, mgorny, dberris, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30046

llvm-svn: 297241

											
										
										
											2017-03-08 07:20:35 +08:00
+								  LLT LLTi1 = getLLTForType(*Type::getInt1Ty(U.getContext()), *DL);
-												[GlobalISel] Add support for switch statements

This commit does this using a trivial chain of conditional branches.  In the
future, we probably want to reuse the optimized switch lowering used in
SelectionDAG.

Differential Revision: https://reviews.llvm.org/D28176

llvm-svn: 291099

											
										
										
											2017-01-05 19:28:51 +08:00
+								  for (auto &CaseIt : SwInst.cases()) {
 								    const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue());
 								    const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1);
 								    MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue);
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								    MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
 								    const BasicBlock *TrueBB = CaseIt.getCaseSuccessor();
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								    MachineBasicBlock &TrueMBB = getMBB(*TrueBB);
-												[GlobalISel] Add support for switch statements

This commit does this using a trivial chain of conditional branches.  In the
future, we probably want to reuse the optimized switch lowering used in
SelectionDAG.

Differential Revision: https://reviews.llvm.org/D28176

llvm-svn: 291099

											
										
										
											2017-01-05 19:28:51 +08:00
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								    MIRBuilder.buildBrCond(Tst, TrueMBB);
 								    CurMBB.addSuccessor(&TrueMBB);
 								    addMachineCFGPred({OrigBB, TrueBB}, &CurMBB);
-												[GlobalISel] Add support for switch statements

This commit does this using a trivial chain of conditional branches.  In the
future, we probably want to reuse the optimized switch lowering used in
SelectionDAG.

Differential Revision: https://reviews.llvm.org/D28176

llvm-svn: 291099

											
										
										
											2017-01-05 19:28:51 +08:00
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								    MachineBasicBlock *FalseMBB =
-												[GlobalISel] Add support for switch statements

This commit does this using a trivial chain of conditional branches.  In the
future, we probably want to reuse the optimized switch lowering used in
SelectionDAG.

Differential Revision: https://reviews.llvm.org/D28176

llvm-svn: 291099

											
										
										
											2017-01-05 19:28:51 +08:00
+								        MF->CreateMachineBasicBlock(SwInst.getParent());
-												[GlobalISel] Insert translated switch icmp blocks after switch parent.

Now that we preserve the IR layout, we would end up with all the newly
synthesized switch comparison blocks at the end of the function.
Instead, use a hopefully more reasonable layout, with the comparison
blocks immediately following the switch comparison blocks.

llvm-svn: 297869

											
										
										
											2017-03-16 02:22:37 +08:00
+								    // Insert the comparison blocks one after the other.
 								    MF->insert(std::next(CurMBB.getIterator()), FalseMBB);
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								    MIRBuilder.buildBr(*FalseMBB);
 								    CurMBB.addSuccessor(FalseMBB);
-												[GlobalISel] Add support for switch statements

This commit does this using a trivial chain of conditional branches.  In the
future, we probably want to reuse the optimized switch lowering used in
SelectionDAG.

Differential Revision: https://reviews.llvm.org/D28176

llvm-svn: 291099

											
										
										
											2017-01-05 19:28:51 +08:00
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								    MIRBuilder.setMBB(*FalseMBB);
-												[GlobalISel] Add support for switch statements

This commit does this using a trivial chain of conditional branches.  In the
future, we probably want to reuse the optimized switch lowering used in
SelectionDAG.

Differential Revision: https://reviews.llvm.org/D28176

llvm-svn: 291099

											
										
										
											2017-01-05 19:28:51 +08:00
+								  }
 								  // handle default case
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								  const BasicBlock *DefaultBB = SwInst.getDefaultDest();
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								  MachineBasicBlock &DefaultMBB = getMBB(*DefaultBB);
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								  MIRBuilder.buildBr(DefaultMBB);
 								  MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
 								  CurMBB.addSuccessor(&DefaultMBB);
 								  addMachineCFGPred({OrigBB, DefaultBB}, &CurMBB);
-												[GlobalISel] Add support for switch statements

This commit does this using a trivial chain of conditional branches.  In the
future, we probably want to reuse the optimized switch lowering used in
SelectionDAG.

Differential Revision: https://reviews.llvm.org/D28176

llvm-svn: 291099

											
										
										
											2017-01-05 19:28:51 +08:00
 								  return true;
 								}
-												[GlobalISel] Add support for indirectbr

Differential Revision: https://reviews.llvm.org/D28079

llvm-svn: 293470

											
										
										
											2017-01-30 17:13:18 +08:00
+								bool IRTranslator::translateIndirectBr(const User &U,
 								                                       MachineIRBuilder &MIRBuilder) {
 								  const IndirectBrInst &BrInst = cast<IndirectBrInst>(U);
 								  const unsigned Tgt = getOrCreateVReg(*BrInst.getAddress());
 								  MIRBuilder.buildBrIndirect(Tgt);
 								  // Link successors.
 								  MachineBasicBlock &CurBB = MIRBuilder.getMBB();
-												[IR] Begin removal of TerminatorInst by removing successor manipulation.

The core get and set routines move to the `Instruction` class. These
routines are only valid to call on instructions which are terminators.

The iterator and *generic* range based access move to `CFG.h` where all
the other generic successor and predecessor access lives. While moving
the iterator here, simplify it using the iterator utilities LLVM
provides and updates coding style as much as reasonable. The APIs remain
pointer-heavy when they could better use references, and retain the odd
behavior of `operator*` and `operator->` that is common in LLVM
iterators. Adjusting this API, if desired, should be a follow-up step.

Non-generic range iteration is added for the two instructions where
there is an especially easy mechanism and where there was code
attempting to use the range accessor from a specific subclass:
`indirectbr` and `br`. In both cases, the successors are contiguous
operands and can be easily iterated via the operand list.

This is the first major patch in removing the `TerminatorInst` type from
the IR's instruction type hierarchy. This change was discussed in an RFC
here and was pretty clearly positive:
http://lists.llvm.org/pipermail/llvm-dev/2018-May/123407.html

There will be a series of much more mechanical changes following this
one to complete this move.

Differential Revision: https://reviews.llvm.org/D47467

llvm-svn: 340698

											
										
										
											2018-08-26 16:41:15 +08:00
+								  for (const BasicBlock *Succ : successors(&BrInst))
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								    CurBB.addSuccessor(&getMBB(*Succ));
-												[GlobalISel] Add support for indirectbr

Differential Revision: https://reviews.llvm.org/D28079

llvm-svn: 293470

											
										
										
											2017-01-30 17:13:18 +08:00
 								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								  const LoadInst &LI = cast<LoadInst>(U);
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
-												GlobalISel: support translating volatile loads and stores.

llvm-svn: 284603

											
										
										
											2016-10-19 23:55:06 +08:00
+								  auto Flags = LI.isVolatile() ? MachineMemOperand::MOVolatile
 								                               : MachineMemOperand::MONone;
 								  Flags |= MachineMemOperand::MOLoad;
-												GlobalISel: add generic load and store instructions.

Pretty straightforward, the only oddity is the MachineMemOperand (which it's
surprisingly difficult to share code for).

llvm-svn: 276799

											
										
										
											2016-07-27 04:23:26 +08:00
-												[GlobalISel][IRTranslator] Fix crash during translation of zero sized loads/stores/args/returns.

This fixes PR35358.

rdar://35619533

Differential Revision: https://reviews.llvm.org/D40604

llvm-svn: 319465

											
										
										
											2017-12-01 04:06:02 +08:00
+								  if (DL->getTypeStoreSize(LI.getType()) == 0)
 								    return true;
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  ArrayRef<unsigned> Regs = getOrCreateVRegs(LI);
 								  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI);
 								  unsigned Base = getOrCreateVReg(*LI.getPointerOperand());
 								  for (unsigned i = 0; i < Regs.size(); ++i) {
 								    unsigned Addr = 0;
 								    MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8);
 								    MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
 								    unsigned BaseAlign = getMemOpAlignment(LI);
 								    auto MMO = MF->getMachineMemOperand(
 								        Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8,
 								        MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr,
 								        LI.getSyncScopeID(), LI.getOrdering());
 								    MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
 								  }
-												Recommit: [globalisel] Change LLT constructor string into an LLT-based object that knows how to generate it.

Summary:
This will allow future patches to inspect the details of the LLT. The implementation is now split between
the Support and CodeGen libraries to allow TableGen to use this class without introducing layering concerns.

Thanks to Ahmed Bougacha for finding a reasonable way to avoid the layering issue and providing the version of this patch without that problem.

The problem with the previous commit appears to have been that TableGen was including CodeGen/LowLevelType.h instead of Support/LowLevelTypeImpl.h.

Reviewers: t.p.northover, qcolombet, rovka, aditya_nandakumar, ab, javed.absar

Subscribers: arsenm, nhaehnle, mgorny, dberris, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30046

llvm-svn: 297241

											
										
										
											2017-03-08 07:20:35 +08:00
-												GlobalISel: add generic load and store instructions.

Pretty straightforward, the only oddity is the MachineMemOperand (which it's
surprisingly difficult to share code for).

llvm-svn: 276799

											
										
										
											2016-07-27 04:23:26 +08:00
+								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								  const StoreInst &SI = cast<StoreInst>(U);
-												GlobalISel: support translating volatile loads and stores.

llvm-svn: 284603

											
										
										
											2016-10-19 23:55:06 +08:00
+								  auto Flags = SI.isVolatile() ? MachineMemOperand::MOVolatile
 								                               : MachineMemOperand::MONone;
 								  Flags |= MachineMemOperand::MOStore;
-												GlobalISel: add generic load and store instructions.

Pretty straightforward, the only oddity is the MachineMemOperand (which it's
surprisingly difficult to share code for).

llvm-svn: 276799

											
										
										
											2016-07-27 04:23:26 +08:00
-												[GlobalISel][IRTranslator] Fix crash during translation of zero sized loads/stores/args/returns.

This fixes PR35358.

rdar://35619533

Differential Revision: https://reviews.llvm.org/D40604

llvm-svn: 319465

											
										
										
											2017-12-01 04:06:02 +08:00
+								  if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
 								    return true;
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  ArrayRef<unsigned> Vals = getOrCreateVRegs(*SI.getValueOperand());
 								  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
 								  unsigned Base = getOrCreateVReg(*SI.getPointerOperand());
 								  for (unsigned i = 0; i < Vals.size(); ++i) {
 								    unsigned Addr = 0;
 								    MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8);
 								    MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
 								    unsigned BaseAlign = getMemOpAlignment(SI);
 								    auto MMO = MF->getMachineMemOperand(
 								        Ptr, Flags, (MRI->getType(Vals[i]).getSizeInBits() + 7) / 8,
 								        MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr,
 								        SI.getSyncScopeID(), SI.getOrdering());
 								    MIRBuilder.buildStore(Vals[i], Addr, *MMO);
 								  }
-												GlobalISel: add generic load and store instructions.

Pretty straightforward, the only oddity is the MachineMemOperand (which it's
surprisingly difficult to share code for).

llvm-svn: 276799

											
										
										
											2016-07-27 04:23:26 +08:00
+								  return true;
 								}
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) {
-												GlobalISel: fix insert/extract to work on ConstantExprs too.

No tests yet unfortunately (ConstantFolding reduces all supported constants to
ConstantInts before we get to translation). Soon.

llvm-svn: 279308

											
										
										
											2016-08-20 04:09:03 +08:00
+								  const Value *Src = U.getOperand(0);
 								  Type *Int32Ty = Type::getInt32Ty(U.getContext());
-												[GlobalISel] IRTranslator: Translate ConstantStruct

Reviewers: qcolombet, ab, t.p.northover, aditya_nandakumar, dsanders

Reviewed By: qcolombet

Subscribers: rovka, kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D33317

llvm-svn: 303412

											
										
										
											2017-05-19 17:47:02 +08:00
-												GlobalISel: support translation of extractvalue instructions.

llvm-svn: 279285

											
										
										
											2016-08-20 01:47:05 +08:00
+								  // getIndexedOffsetInType is designed for GEPs, so the first index is the
 								  // usual array element rather than looking into the actual aggregate.
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  SmallVector<Value *, 1> Indices;
-												GlobalISel: support translation of extractvalue instructions.

llvm-svn: 279285

											
										
										
											2016-08-20 01:47:05 +08:00
+								  Indices.push_back(ConstantInt::get(Int32Ty, 0));
-												GlobalISel: fix insert/extract to work on ConstantExprs too.

No tests yet unfortunately (ConstantFolding reduces all supported constants to
ConstantInts before we get to translation). Soon.

llvm-svn: 279308

											
										
										
											2016-08-20 04:09:03 +08:00
 								  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) {
 								    for (auto Idx : EVI->indices())
 								      Indices.push_back(ConstantInt::get(Int32Ty, Idx));
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) {
 								    for (auto Idx : IVI->indices())
 								      Indices.push_back(ConstantInt::get(Int32Ty, Idx));
-												GlobalISel: fix insert/extract to work on ConstantExprs too.

No tests yet unfortunately (ConstantFolding reduces all supported constants to
ConstantInts before we get to translation). Soon.

llvm-svn: 279308

											
										
										
											2016-08-20 04:09:03 +08:00
+								  } else {
 								    for (unsigned i = 1; i < U.getNumOperands(); ++i)
 								      Indices.push_back(U.getOperand(i));
 								  }
-												GlobalISel: support translation of extractvalue instructions.

llvm-svn: 279285

											
										
										
											2016-08-20 01:47:05 +08:00
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  return 8 * static_cast<uint64_t>(
 								                 DL.getIndexedOffsetInType(Src->getType(), Indices));
 								}
-												GlobalISel: support translation of extractvalue instructions.

llvm-svn: 279285

											
										
										
											2016-08-20 01:47:05 +08:00
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								bool IRTranslator::translateExtractValue(const User &U,
 								                                         MachineIRBuilder &MIRBuilder) {
 								  const Value *Src = U.getOperand(0);
 								  uint64_t Offset = getOffsetFromIndices(U, *DL);
 								  ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src);
 								  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src);
 								  unsigned Idx = std::lower_bound(Offsets.begin(), Offsets.end(), Offset) -
 								                 Offsets.begin();
 								  auto &DstRegs = allocateVRegs(U);
 								  for (unsigned i = 0; i < DstRegs.size(); ++i)
 								    DstRegs[i] = SrcRegs[Idx++];
-												GlobalISel: support translation of extractvalue instructions.

llvm-svn: 279285

											
										
										
											2016-08-20 01:47:05 +08:00
 								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateInsertValue(const User &U,
 								                                        MachineIRBuilder &MIRBuilder) {
-												GlobalISel: fix insert/extract to work on ConstantExprs too.

No tests yet unfortunately (ConstantFolding reduces all supported constants to
ConstantInts before we get to translation). Soon.

llvm-svn: 279308

											
										
										
											2016-08-20 04:09:03 +08:00
+								  const Value *Src = U.getOperand(0);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  uint64_t Offset = getOffsetFromIndices(U, *DL);
 								  auto &DstRegs = allocateVRegs(U);
 								  ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U);
 								  ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src);
 								  ArrayRef<unsigned> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
 								  auto InsertedIt = InsertedRegs.begin();
 								  for (unsigned i = 0; i < DstRegs.size(); ++i) {
 								    if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end())
 								      DstRegs[i] = *InsertedIt++;
 								    else
 								      DstRegs[i] = SrcRegs[i];
-												GlobalISel: fix insert/extract to work on ConstantExprs too.

No tests yet unfortunately (ConstantFolding reduces all supported constants to
ConstantInts before we get to translation). Soon.

llvm-svn: 279308

											
										
										
											2016-08-20 04:09:03 +08:00
+								  }
-												GlobalISel: translate insertvalue instructions.

This adds a G_INSERT instruction, which technically makes G_SEQUENCE redundant
(it's equivalent to a G_INSERT into an IMPLICIT_DEF). We'll leave G_SEQUENCE
for now though: it's likely to be far more common as it's a fundamental part of
legalization, so avoiding the mess and bloat of the extra IMPLICIT_DEFs is
probably worthwhile.

llvm-svn: 279306

											
										
										
											2016-08-20 04:08:55 +08:00
 								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateSelect(const User &U,
 								                                   MachineIRBuilder &MIRBuilder) {
-												[GlobalISel] Remove non-determinism from IRTranslator.

This showed up in r300535/r300537, which were reverted in r300538 due to
some of the introduced tests in there failing on some bots, due to the
non-determinism fixed in this commit.

Re-committing r300535/r300537 will add 2 tests for the change in this
commit.

llvm-svn: 300663

											
										
										
											2017-04-19 14:38:37 +08:00
+								  unsigned Tst = getOrCreateVReg(*U.getOperand(0));
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(U);
 								  ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
 								  ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
 								  for (unsigned i = 0; i < ResRegs.size(); ++i)
 								    MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]);
-												GlobalISel: support translating select instructions.

llvm-svn: 279309

											
										
										
											2016-08-20 04:09:07 +08:00
+								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateBitCast(const User &U,
 								                                    MachineIRBuilder &MIRBuilder) {
-												[GlobalISel] Avoid invalidating ValToVReg when translating no-op bitcast.

When we translate a no-op (same type) bitcast, we try to be clever and
only emit a COPY if we already assigned a vreg to the defined value.
However, when we didn't, we tried to assign to a reference into the
ValToVReg DenseMap, even though the RHS of the assignment
(getOrCreateVReg) could potentially grow that DenseMap, invalidating the
reference.

Avoid that by getting the source vreg first.
I audited the rest of the translator; this is the only tricky case.

The test is quite unwieldy, as the problem is caused by the DenseMap
growing, which happens after the 47th mapped value.

llvm-svn: 297208

											
										
										
											2017-03-08 04:53:06 +08:00
+								  // If we're bitcasting to the source type, we can reuse the source vreg.
-												Recommit: [globalisel] Change LLT constructor string into an LLT-based object that knows how to generate it.

Summary:
This will allow future patches to inspect the details of the LLT. The implementation is now split between
the Support and CodeGen libraries to allow TableGen to use this class without introducing layering concerns.

Thanks to Ahmed Bougacha for finding a reasonable way to avoid the layering issue and providing the version of this patch without that problem.

The problem with the previous commit appears to have been that TableGen was including CodeGen/LowLevelType.h instead of Support/LowLevelTypeImpl.h.

Reviewers: t.p.northover, qcolombet, rovka, aditya_nandakumar, ab, javed.absar

Subscribers: arsenm, nhaehnle, mgorny, dberris, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30046

llvm-svn: 297241

											
										
										
											2017-03-08 07:20:35 +08:00
+								  if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
 								      getLLTForType(*U.getType(), *DL)) {
-												[GlobalISel] Avoid invalidating ValToVReg when translating no-op bitcast.

When we translate a no-op (same type) bitcast, we try to be clever and
only emit a COPY if we already assigned a vreg to the defined value.
However, when we didn't, we tried to assign to a reference into the
ValToVReg DenseMap, even though the RHS of the assignment
(getOrCreateVReg) could potentially grow that DenseMap, invalidating the
reference.

Avoid that by getting the source vreg first.
I audited the rest of the translator; this is the only tricky case.

The test is quite unwieldy, as the problem is caused by the DenseMap
growing, which happens after the 47th mapped value.

llvm-svn: 297208

											
										
										
											2017-03-08 04:53:06 +08:00
+								    unsigned SrcReg = getOrCreateVReg(*U.getOperand(0));
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    auto &Regs = *VMap.getVRegs(U);
-												[GlobalISel] Avoid invalidating ValToVReg when translating no-op bitcast.

When we translate a no-op (same type) bitcast, we try to be clever and
only emit a COPY if we already assigned a vreg to the defined value.
However, when we didn't, we tried to assign to a reference into the
ValToVReg DenseMap, even though the RHS of the assignment
(getOrCreateVReg) could potentially grow that DenseMap, invalidating the
reference.

Avoid that by getting the source vreg first.
I audited the rest of the translator; this is the only tricky case.

The test is quite unwieldy, as the problem is caused by the DenseMap
growing, which happens after the 47th mapped value.

llvm-svn: 297208

											
										
										
											2017-03-08 04:53:06 +08:00
+								    // If we already assigned a vreg for this bitcast, we can't change that.
 								    // Emit a copy to satisfy the users we already emitted.
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    if (!Regs.empty())
 								      MIRBuilder.buildCopy(Regs[0], SrcReg);
 								    else {
 								      Regs.push_back(SrcReg);
 								      VMap.getOffsets(U)->push_back(0);
 								    }
-												GlobalISel: add generic casts to IRTranslator

This adds LLVM's 3 main cast instructions (inttoptr, ptrtoint, bitcast) to the
IRTranslator. The first two are direct translations (with 2 MachineInstr types
each). Since LLT discards information, a bitcast might become trivial and we
emit a COPY in those cases instead.

llvm-svn: 276690

											
										
										
											2016-07-26 05:01:29 +08:00
+								    return true;
 								  }
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								  return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
-												GlobalISel: add generic casts to IRTranslator

This adds LLVM's 3 main cast instructions (inttoptr, ptrtoint, bitcast) to the
IRTranslator. The first two are direct translations (with 2 MachineInstr types
each). Since LLT discards information, a bitcast might become trivial and we
emit a COPY in those cases instead.

llvm-svn: 276690

											
										
										
											2016-07-26 05:01:29 +08:00
+								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateCast(unsigned Opcode, const User &U,
 								                                 MachineIRBuilder &MIRBuilder) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								  unsigned Op = getOrCreateVReg(*U.getOperand(0));
 								  unsigned Res = getOrCreateVReg(U);
-												GlobalISel: move type information to MachineRegisterInfo.

We want each register to have a canonical type, which means the best place to
store this is in MachineRegisterInfo rather than on every MachineInstr that
happens to use or define that register.

Most changes following from this are pretty simple (you need an MRI anyway if
you're going to be doing any transformations, so just check the type there).
But legalization doesn't really want to check redundant operands (when, for
example, a G_ADD only ever has one type) so I've made use of MCInstrDesc's
operand type field to encode these constraints and limit legalization's work.

As an added bonus, more validation is possible, both in MachineVerifier and
MachineIRBuilder (coming soon).

llvm-svn: 281035

											
										
										
											2016-09-09 19:46:34 +08:00
+								  MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op);
-												GlobalISel: add generic casts to IRTranslator

This adds LLVM's 3 main cast instructions (inttoptr, ptrtoint, bitcast) to the
IRTranslator. The first two are direct translations (with 2 MachineInstr types
each). Since LLT discards information, a bitcast might become trivial and we
emit a COPY in those cases instead.

llvm-svn: 276690

											
										
										
											2016-07-26 05:01:29 +08:00
+								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateGetElementPtr(const User &U,
 								                                          MachineIRBuilder &MIRBuilder) {
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
+								  // FIXME: support vector GEPs.
 								  if (U.getType()->isVectorTy())
 								    return false;
 								  Value &Op0 = *U.getOperand(0);
 								  unsigned BaseReg = getOrCreateVReg(Op0);
-												[GlobalISel] Avoid translating synthetic constants to new G_CONSTANTS.

Currently, we create a G_CONSTANT for every "synthetic" integer
constant operand (for instance, for the G_GEP offset).
Instead, share the G_CONSTANTs we might have created by going through
the ValueToVReg machinery.

When we're emitting synthetic constants, we do need to get Constants from
the context.  One could argue that we shouldn't modify the context at
all (for instance, this means that we're going to use a tad more memory
if the constant wasn't used elsewhere), but constants are mostly
harmless.  We currently do this for extractvalue and all.

For constant fcmp, this does mean we'll emit an extra COPY, which is not
necessarily more optimal than an extra materialized constant.
But that preserves the current intended design of uniqued G_CONSTANTs,
and the rematerialization problem exists elsewhere and should be
resolved with a single coherent solution.

llvm-svn: 297875

											
										
										
											2017-03-16 03:21:11 +08:00
+								  Type *PtrIRTy = Op0.getType();
 								  LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
 								  Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
 								  LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
 								  int64_t Offset = 0;
 								  for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U);
 								       GTI != E; ++GTI) {
 								    const Value *Idx = GTI.getOperand();
-												Fix GlobalISel build.

llvm-svn: 288460

											
										
										
											2016-12-02 10:55:30 +08:00
+								    if (StructType *StTy = GTI.getStructTypeOrNull()) {
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
+								      unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
 								      Offset += DL->getStructLayout(StTy)->getElementOffset(Field);
 								      continue;
 								    } else {
 								      uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
 								      // If this is a scalar constant or a splat vector of constants,
 								      // handle it quickly.
 								      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
 								        Offset += ElementSize * CI->getSExtValue();
 								        continue;
 								      }
 								      if (Offset != 0) {
 								        unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
-												[GlobalISel] Avoid translating synthetic constants to new G_CONSTANTS.

Currently, we create a G_CONSTANT for every "synthetic" integer
constant operand (for instance, for the G_GEP offset).
Instead, share the G_CONSTANTs we might have created by going through
the ValueToVReg machinery.

When we're emitting synthetic constants, we do need to get Constants from
the context.  One could argue that we shouldn't modify the context at
all (for instance, this means that we're going to use a tad more memory
if the constant wasn't used elsewhere), but constants are mostly
harmless.  We currently do this for extractvalue and all.

For constant fcmp, this does mean we'll emit an extra COPY, which is not
necessarily more optimal than an extra materialized constant.
But that preserves the current intended design of uniqued G_CONSTANTs,
and the rematerialization problem exists elsewhere and should be
resolved with a single coherent solution.

llvm-svn: 297875

											
										
										
											2017-03-16 03:21:11 +08:00
+								        unsigned OffsetReg =
 								            getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
+								        MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg);
 								        BaseReg = NewBaseReg;
 								        Offset = 0;
 								      }
 								      unsigned IdxReg = getOrCreateVReg(*Idx);
 								      if (MRI->getType(IdxReg) != OffsetTy) {
 								        unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy);
 								        MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg);
 								        IdxReg = NewIdxReg;
 								      }
-												[GISel]: Don't create G_MUL with 1 during translation of GEP

When element size is 1, it's just wasteful to create MUL with 1.
https://reviews.llvm.org/D41738

llvm-svn: 321857

											
										
										
											2018-01-05 10:56:28 +08:00
+								      // N = N + Idx * ElementSize;
 								      // Avoid doing it for ElementSize of 1.
 								      unsigned GepOffsetReg;
 								      if (ElementSize != 1) {
 								        unsigned ElementSizeReg =
 								            getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize));
 								        GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
 								        MIRBuilder.buildMul(GepOffsetReg, ElementSizeReg, IdxReg);
 								      } else
 								        GepOffsetReg = IdxReg;
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
 								      unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
-												[GISel]: Don't create G_MUL with 1 during translation of GEP

When element size is 1, it's just wasteful to create MUL with 1.
https://reviews.llvm.org/D41738

llvm-svn: 321857

											
										
										
											2018-01-05 10:56:28 +08:00
+								      MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg);
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
+								      BaseReg = NewBaseReg;
 								    }
 								  }
 								  if (Offset != 0) {
-												[GlobalISel] Avoid translating synthetic constants to new G_CONSTANTS.

Currently, we create a G_CONSTANT for every "synthetic" integer
constant operand (for instance, for the G_GEP offset).
Instead, share the G_CONSTANTs we might have created by going through
the ValueToVReg machinery.

When we're emitting synthetic constants, we do need to get Constants from
the context.  One could argue that we shouldn't modify the context at
all (for instance, this means that we're going to use a tad more memory
if the constant wasn't used elsewhere), but constants are mostly
harmless.  We currently do this for extractvalue and all.

For constant fcmp, this does mean we'll emit an extra COPY, which is not
necessarily more optimal than an extra materialized constant.
But that preserves the current intended design of uniqued G_CONSTANTs,
and the rematerialization problem exists elsewhere and should be
resolved with a single coherent solution.

llvm-svn: 297875

											
										
										
											2017-03-16 03:21:11 +08:00
+								    unsigned OffsetReg = getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
+								    MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg);
 								    return true;
 								  }
 								  MIRBuilder.buildCopy(getOrCreateVReg(U), BaseReg);
 								  return true;
 								}
-												GlobalISel: translate memset & memmove.

llvm-svn: 293541

											
										
										
											2017-01-31 03:33:07 +08:00
+								bool IRTranslator::translateMemfunc(const CallInst &CI,
 								                                    MachineIRBuilder &MIRBuilder,
 								                                    unsigned ID) {
-												Recommit: [globalisel] Change LLT constructor string into an LLT-based object that knows how to generate it.

Summary:
This will allow future patches to inspect the details of the LLT. The implementation is now split between
the Support and CodeGen libraries to allow TableGen to use this class without introducing layering concerns.

Thanks to Ahmed Bougacha for finding a reasonable way to avoid the layering issue and providing the version of this patch without that problem.

The problem with the previous commit appears to have been that TableGen was including CodeGen/LowLevelType.h instead of Support/LowLevelTypeImpl.h.

Reviewers: t.p.northover, qcolombet, rovka, aditya_nandakumar, ab, javed.absar

Subscribers: arsenm, nhaehnle, mgorny, dberris, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30046

llvm-svn: 297241

											
										
										
											2017-03-08 07:20:35 +08:00
+								  LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL);
-												GlobalISel: translate memset & memmove.

llvm-svn: 293541

											
										
										
											2017-01-31 03:33:07 +08:00
+								  Type *DstTy = CI.getArgOperand(0)->getType();
 								  if (cast<PointerType>(DstTy)->getAddressSpace() != 0 ||
-												GlobalISel: translate memcpy intrinsics.

llvm-svn: 284525

											
										
										
											2016-10-19 04:03:45 +08:00
+								      SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0))
 								    return false;
 								  SmallVector<CallLowering::ArgInfo, 8> Args;
 								  for (int i = 0; i < 3; ++i) {
 								    const auto &Arg = CI.getArgOperand(i);
 								    Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
 								  }
-												GlobalISel: translate memset & memmove.

llvm-svn: 293541

											
										
										
											2017-01-31 03:33:07 +08:00
+								  const char *Callee;
 								  switch (ID) {
 								  case Intrinsic::memmove:
 								  case Intrinsic::memcpy: {
 								    Type *SrcTy = CI.getArgOperand(1)->getType();
 								    if(cast<PointerType>(SrcTy)->getAddressSpace() != 0)
 								      return false;
 								    Callee = ID == Intrinsic::memcpy ? "memcpy" : "memmove";
 								    break;
 								  }
 								  case Intrinsic::memset:
 								    Callee = "memset";
 								    break;
 								  default:
 								    return false;
 								  }
-												GlobalISel: translate memcpy intrinsics.

llvm-svn: 284525

											
										
										
											2016-10-19 04:03:45 +08:00
-												[GlobalISel] Use the correct calling conv for calls

This commit adds a parameter that lets us pass in the calling convention
of the call to CallLowering::lowerCall. This allows us to handle
situations where the calling convetion of the callee is different from
that of the caller.

Differential Revision: https://reviews.llvm.org/D31039

llvm-svn: 298254

											
										
										
											2017-03-20 22:40:18 +08:00
+								  return CLI->lowerCall(MIRBuilder, CI.getCallingConv(),
 								                        MachineOperand::CreateES(Callee),
-												GlobalISel: translate memcpy intrinsics.

llvm-svn: 284525

											
										
										
											2016-10-19 04:03:45 +08:00
+								                        CallLowering::ArgInfo(0, CI.getType()), Args);
 								}
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								void IRTranslator::getStackGuard(unsigned DstReg,
 								                                 MachineIRBuilder &MIRBuilder) {
-												GlobalISel: set correct regclass for LOAD_STACK_GUARD.

Since it's not actually a generic MI, its register operands need a RegClass,
which is conveniently the target's pointer RegClass.

llvm-svn: 293335

											
										
										
											2017-01-28 05:31:24 +08:00
+								  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
 								  MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								  auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD);
 								  MIB.addDef(DstReg);
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  auto &TLI = *MF->getSubtarget().getTargetLowering();
-												MachineFunction: Return reference from getFunction(); NFC

The Function can never be nullptr so we can return a reference.

llvm-svn: 320884

											
										
										
											2017-12-16 06:22:58 +08:00
+								  Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent());
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								  if (!Global)
 								    return;
 								  MachinePointerInfo MPInfo(Global);
 								  auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
 								               MachineMemOperand::MODereferenceable;
-												[MI] Change the array of `MachineMemOperand` pointers to be
a generically extensible collection of extra info attached to
a `MachineInstr`.

The primary change here is cleaning up the APIs used for setting and
manipulating the `MachineMemOperand` pointer arrays so chat we can
change how they are allocated.

Then we introduce an extra info object that using the trailing object
pattern to attach some number of MMOs but also other extra info. The
design of this is specifically so that this extra info has a fixed
necessary cost (the header tracking what extra info is included) and
everything else can be tail allocated. This pattern works especially
well with a `BumpPtrAllocator` which we use here.

I've also added the basic scaffolding for putting interesting pointers
into this, namely pre- and post-instruction symbols. These aren't used
anywhere yet, they're just there to ensure I've actually gotten the data
structure types correct. I'll flesh out support for these in
a subsequent patch (MIR dumping, parsing, the works).

Finally, I've included an optimization where we store any single pointer
inline in the `MachineInstr` to avoid the allocation overhead. This is
expected to be the overwhelmingly most common case and so should avoid
any memory usage growth due to slightly less clever / dense allocation
when dealing with >1 MMO. This did require several ergonomic
improvements to the `PointerSumType` to reasonably support the various
usage models.

This also has a side effect of freeing up 8 bits within the
`MachineInstr` which could be repurposed for something else.

The suggested direction here came largely from Hal Finkel. I hope it was
worth it. ;] It does hopefully clear a path for subsequent extensions
w/o nearly as much leg work. Lots of thanks to Reid and Justin for
careful reviews and ideas about how to do all of this.

Differential Revision: https://reviews.llvm.org/D50701

llvm-svn: 339940

											
										
										
											2018-08-17 05:30:05 +08:00
+								  MachineMemOperand *MemRef =
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								      MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
-												NFC Remove default argument of DataLayout::getPointerABIAlignment

Differential Revision: https://reviews.llvm.org/D40005

llvm-svn: 318272

											
										
										
											2017-11-15 14:17:32 +08:00
+								                               DL->getPointerABIAlignment(0));
-												[MI] Change the array of `MachineMemOperand` pointers to be
a generically extensible collection of extra info attached to
a `MachineInstr`.

The primary change here is cleaning up the APIs used for setting and
manipulating the `MachineMemOperand` pointer arrays so chat we can
change how they are allocated.

Then we introduce an extra info object that using the trailing object
pattern to attach some number of MMOs but also other extra info. The
design of this is specifically so that this extra info has a fixed
necessary cost (the header tracking what extra info is included) and
everything else can be tail allocated. This pattern works especially
well with a `BumpPtrAllocator` which we use here.

I've also added the basic scaffolding for putting interesting pointers
into this, namely pre- and post-instruction symbols. These aren't used
anywhere yet, they're just there to ensure I've actually gotten the data
structure types correct. I'll flesh out support for these in
a subsequent patch (MIR dumping, parsing, the works).

Finally, I've included an optimization where we store any single pointer
inline in the `MachineInstr` to avoid the allocation overhead. This is
expected to be the overwhelmingly most common case and so should avoid
any memory usage growth due to slightly less clever / dense allocation
when dealing with >1 MMO. This did require several ergonomic
improvements to the `PointerSumType` to reasonably support the various
usage models.

This also has a side effect of freeing up 8 bits within the
`MachineInstr` which could be repurposed for something else.

The suggested direction here came largely from Hal Finkel. I hope it was
worth it. ;] It does hopefully clear a path for subsequent extensions
w/o nearly as much leg work. Lots of thanks to Reid and Justin for
careful reviews and ideas about how to do all of this.

Differential Revision: https://reviews.llvm.org/D50701

llvm-svn: 339940

											
										
										
											2018-08-17 05:30:05 +08:00
+								  MIB.setMemRefs({MemRef});
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								}
-												GlobalISel: factor overflow handling into separate function. NFC.

llvm-svn: 289149

											
										
										
											2016-12-09 06:44:00 +08:00
+								bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
 								                                              MachineIRBuilder &MIRBuilder) {
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI);
-												[GISel]: Add missing opcodes for overflow intrinsics

https://reviews.llvm.org/D51197

Currently, IRTranslator (and GISel) seems to be arbitrarily picking
which overflow intrinsics get mapped into opcodes which either have a
carry as an input or not.
For intrinsics such as Intrinsic::uadd_with_overflow, translate it to an
opcode (G_UADDO) which doesn't have any carry inputs (similar to LLVM
IR).

This patch adds 4 missing opcodes for completeness - G_UADDO, G_USUBO,
G_SSUBE and G_SADDE.

llvm-svn: 340865

											
										
										
											2018-08-29 02:54:10 +08:00
+								  MIRBuilder.buildInstr(Op)
 								      .addDef(ResRegs[0])
 								      .addDef(ResRegs[1])
 								      .addUse(getOrCreateVReg(*CI.getOperand(0)))
 								      .addUse(getOrCreateVReg(*CI.getOperand(1)));
-												GlobalISel: factor overflow handling into separate function. NFC.

llvm-svn: 289149

											
										
										
											2016-12-09 06:44:00 +08:00
 								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
 								                                           MachineIRBuilder &MIRBuilder) {
-												GlobalISel: support overflow arithmetic intrinsics.

Unsigned addition and subtraction can reuse the instructions created to
legalize large width operations (i.e. both produce and consume a carry flag).
Signed operations and multiplies get a dedicated op-with-overflow instruction.

Once this is produced the two values are combined into a struct register (which
will almost always be merged with a corresponding G_EXTRACT as part of
legalization).

llvm-svn: 279278

											
										
										
											2016-08-20 01:17:06 +08:00
+								  switch (ID) {
-												GlobalISel: factor overflow handling into separate function. NFC.

llvm-svn: 289149

											
										
										
											2016-12-09 06:44:00 +08:00
+								  default:
 								    break;
-												GlobalISel: drop lifetime intrinsics during translation.

We don't use them yet and they just cause problems.

llvm-svn: 294770

											
										
										
											2017-02-11 03:10:38 +08:00
+								  case Intrinsic::lifetime_start:
 								  case Intrinsic::lifetime_end:
 								    // Stack coloring is not enabled in O0 (which we care about now) so we can
 								    // drop these. Make sure someone notices when we start compiling at higher
 								    // opts though.
 								    if (MF->getTarget().getOptLevel() != CodeGenOpt::None)
 								      return false;
 								    return true;
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								  case Intrinsic::dbg_declare: {
 								    const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
 								    assert(DI.getVariable() && "Missing variable");
 								    const Value *Address = DI.getAddress();
 								    if (!Address || isa<UndefValue>(Address)) {
-												Rename DEBUG macro to LLVM_DEBUG.
    
The DEBUG() macro is very generic so it might clash with other projects.
The renaming was done as follows:
- git grep -l 'DEBUG' | xargs sed -i 's/\bDEBUG\s\?(/LLVM_DEBUG(/g'
- git diff -U0 master | ../clang/tools/clang-format/clang-format-diff.py -i -p1 -style LLVM
- Manual change to APInt
- Manually chage DOCS as regex doesn't match it.

In the transition period the DEBUG() macro is still present and aliased
to the LLVM_DEBUG() one.

Differential Revision: https://reviews.llvm.org/D43624

llvm-svn: 332240

											
										
										
											2018-05-14 20:53:11 +08:00
+								      LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								      return true;
 								    }
 								    assert(DI.getVariable()->isValidLocationForIntrinsic(
 								               MIRBuilder.getDebugLoc()) &&
 								           "Expected inlined-at fields to agree");
-												GlobalISel: put debug info for static allocas in the MachineFunction.

The good reason to do this is that static allocas are pretty simple to handle
(especially at -O0) and avoiding tracking DBG_VALUEs throughout the pipeline
should give some kind of performance benefit.

The bad reason is that the debug pipeline is an unholy mess of implicit
contracts, where determining whether "DBG_VALUE %reg, imm" actually implies a
load or not involves the services of at least 3 soothsayers and the sacrifice
of at least one chicken.  And it still gets it wrong if the variable is at SP
directly.

llvm-svn: 297410

											
										
										
											2017-03-10 05:12:06 +08:00
+								    auto AI = dyn_cast<AllocaInst>(Address);
 								    if (AI && AI->isStaticAlloca()) {
 								      // Static allocas are tracked at the MF level, no need for DBG_VALUE
 								      // instructions (in fact, they get ignored if they *do* exist).
 								      MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
 								                             getOrCreateFrameIndex(*AI), DI.getDebugLoc());
-												[GlobalISel] Lower dbg.declare into indirect DBG_VALUE

Summary:
D31439 changed the semantics of dbg.declare to take the address of a
variable as the first argument, making it indirect.  It specifically
updated FastISel for this change here:

https://reviews.llvm.org/D31439#change-WVArzi177jPl

GlobalISel needs to follow suit, or else it will be missing a level of
indirection in the generated debuginfo.  This problem was seen in a Rust
debuginfo test on aarch64, since GlobalISel is used at -O0 for aarch64.

https://github.com/rust-lang/rust/issues/49807
https://bugzilla.redhat.com/show_bug.cgi?id=1611597
https://bugzilla.redhat.com/show_bug.cgi?id=1625768

Reviewers: dblaikie, aprantl, t.p.northover, javed.absar, rnk

Reviewed By: rnk

Subscribers: #debug-info, rovka, kristof.beyls, JDevlieghere, llvm-commits, tstellar

Differential Revision: https://reviews.llvm.org/D51749

llvm-svn: 341969

											
										
										
											2018-09-12 01:52:01 +08:00
+								    } else {
 								      // A dbg.declare describes the address of a source variable, so lower it
 								      // into an indirect DBG_VALUE.
 								      MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
 								                                       DI.getVariable(), DI.getExpression());
 								    }
-												GlobalISel: fall back gracefully for debug intrinsics.

Supporting them properly is a reasonably complex chunk of work, so to allow bot
testing before then we should at least be able to fall back to DAG ISel.

llvm-svn: 289150

											
										
										
											2016-12-09 06:44:13 +08:00
+								    return true;
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								  }
-												[DebugInfo] Generate DWARF debug information for labels. (Fix leak problems)

There are two forms for label debug information in DWARF format.

1. Labels in a non-inlined function:

DW_TAG_label
  DW_AT_name
  DW_AT_decl_file
  DW_AT_decl_line
  DW_AT_low_pc

2. Labels in an inlined function:

DW_TAG_label
  DW_AT_abstract_origin
  DW_AT_low_pc

We will collect label information from DBG_LABEL. Before every DBG_LABEL,
we will generate a temporary symbol to denote the location of the label.
The symbol could be used to get DW_AT_low_pc afterwards. So, we create a
mapping between 'inlined label' and DBG_LABEL MachineInstr in DebugHandlerBase.
The DBG_LABEL in the mapping is used to query the symbol before it.

The AbstractLabels in DwarfCompileUnit is used to process labels in inlined
functions.

We also keep a mapping between scope and labels in DwarfFile to help to
generate correct tree structure of DIEs.

It also generates label debug information under global isel.

Differential Revision: https://reviews.llvm.org/D45556

llvm-svn: 340039

											
										
										
											2018-08-17 23:22:04 +08:00
+								  case Intrinsic::dbg_label: {
 								    const DbgLabelInst &DI = cast<DbgLabelInst>(CI);
 								    assert(DI.getLabel() && "Missing label");
 								    assert(DI.getLabel()->isValidLocationForIntrinsic(
 								               MIRBuilder.getDebugLoc()) &&
 								           "Expected inlined-at fields to agree");
 								    MIRBuilder.buildDbgLabel(DI.getLabel());
 								    return true;
 								  }
-												GlobalISel: translate @llvm.va_end intrinsic.

Turns out no-one actually cares about this one (at least) in tree so we can
just drop it entirely.

llvm-svn: 294345

											
										
										
											2017-02-08 04:08:59 +08:00
+								  case Intrinsic::vaend:
 								    // No target I know of cares about va_end. Certainly no in-tree target
 								    // does. Simplest intrinsic ever!
 								    return true;
-												GlobalISel: translate @llvm.va_start intrinsic.

Because we need to preserve the memory access being performed we need a
separate instruction to represent this.

llvm-svn: 294492

											
										
										
											2017-02-09 01:57:20 +08:00
+								  case Intrinsic::vastart: {
 								    auto &TLI = *MF->getSubtarget().getTargetLowering();
 								    Value *Ptr = CI.getArgOperand(0);
 								    unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
 								    MIRBuilder.buildInstr(TargetOpcode::G_VASTART)
 								        .addUse(getOrCreateVReg(*Ptr))
 								        .addMemOperand(MF->getMachineMemOperand(
 								            MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 0));
 								    return true;
 								  }
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								  case Intrinsic::dbg_value: {
 								    // This form of DBG_VALUE is target-independent.
 								    const DbgValueInst &DI = cast<DbgValueInst>(CI);
 								    const Value *V = DI.getValue();
 								    assert(DI.getVariable()->isValidLocationForIntrinsic(
 								               MIRBuilder.getDebugLoc()) &&
 								           "Expected inlined-at fields to agree");
 								    if (!V) {
 								      // Currently the optimizer can produce this; insert an undef to
 								      // help debugging.  Probably the optimizer should not do this.
-												Remove the unused DBG_VALUE offset parameter from GlobalISel (NFC)

Followup to r309426.
rdar://problem/33580047

llvm-svn: 309449

											
										
										
											2017-07-29 06:46:20 +08:00
+								      MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								    } else if (const auto *CI = dyn_cast<Constant>(V)) {
-												Remove the unused DBG_VALUE offset parameter from GlobalISel (NFC)

Followup to r309426.
rdar://problem/33580047

llvm-svn: 309449

											
										
										
											2017-07-29 06:46:20 +08:00
+								      MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								    } else {
 								      unsigned Reg = getOrCreateVReg(*V);
 								      // FIXME: This does not handle register-indirect values at offset 0. The
 								      // direct/indirect thing shouldn't really be handled by something as
 								      // implicit as reg+noreg vs reg+imm in the first palce, but it seems
 								      // pretty baked in right now.
-												Remove the obsolete offset parameter from @llvm.dbg.value

There is no situation where this rarely-used argument cannot be
substituted with a DIExpression and removing it allows us to simplify
the DWARF backend. Note that this patch does not yet remove any of
the newly dead code.

rdar://problem/33580047
Differential Revision: https://reviews.llvm.org/D35951

llvm-svn: 309426

											
										
										
											2017-07-29 04:21:02 +08:00
+								      MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								    }
 								    return true;
 								  }
-												GlobalISel: factor overflow handling into separate function. NFC.

llvm-svn: 289149

											
										
										
											2016-12-09 06:44:00 +08:00
+								  case Intrinsic::uadd_with_overflow:
-												[GISel]: Add missing opcodes for overflow intrinsics

https://reviews.llvm.org/D51197

Currently, IRTranslator (and GISel) seems to be arbitrarily picking
which overflow intrinsics get mapped into opcodes which either have a
carry as an input or not.
For intrinsics such as Intrinsic::uadd_with_overflow, translate it to an
opcode (G_UADDO) which doesn't have any carry inputs (similar to LLVM
IR).

This patch adds 4 missing opcodes for completeness - G_UADDO, G_USUBO,
G_SSUBE and G_SADDE.

llvm-svn: 340865

											
										
										
											2018-08-29 02:54:10 +08:00
+								    return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDO, MIRBuilder);
-												GlobalISel: factor overflow handling into separate function. NFC.

llvm-svn: 289149

											
										
										
											2016-12-09 06:44:00 +08:00
+								  case Intrinsic::sadd_with_overflow:
 								    return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder);
 								  case Intrinsic::usub_with_overflow:
-												[GISel]: Add missing opcodes for overflow intrinsics

https://reviews.llvm.org/D51197

Currently, IRTranslator (and GISel) seems to be arbitrarily picking
which overflow intrinsics get mapped into opcodes which either have a
carry as an input or not.
For intrinsics such as Intrinsic::uadd_with_overflow, translate it to an
opcode (G_UADDO) which doesn't have any carry inputs (similar to LLVM
IR).

This patch adds 4 missing opcodes for completeness - G_UADDO, G_USUBO,
G_SSUBE and G_SADDE.

llvm-svn: 340865

											
										
										
											2018-08-29 02:54:10 +08:00
+								    return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBO, MIRBuilder);
-												GlobalISel: factor overflow handling into separate function. NFC.

llvm-svn: 289149

											
										
										
											2016-12-09 06:44:00 +08:00
+								  case Intrinsic::ssub_with_overflow:
 								    return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder);
 								  case Intrinsic::umul_with_overflow:
 								    return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
 								  case Intrinsic::smul_with_overflow:
 								    return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
-												GlobalISel: translate @llvm.pow intrinsic to G_FPOW.

It'll usually be immediately legalized back to a libcall, but occasionally
something can be done with it so we'd just as well enable that flexibility from
the start.

llvm-svn: 294530

											
										
										
											2017-02-09 07:23:32 +08:00
+								  case Intrinsic::pow:
 								    MIRBuilder.buildInstr(TargetOpcode::G_FPOW)
 								        .addDef(getOrCreateVReg(CI))
 								        .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
 								        .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
 								    return true;
-												[GISel]: Add G_FEXP, G_FEXP2 opcodes

Also add IRTranslator support.
https://reviews.llvm.org/D34710

llvm-svn: 306475

											
										
										
											2017-06-28 06:19:32 +08:00
+								  case Intrinsic::exp:
 								    MIRBuilder.buildInstr(TargetOpcode::G_FEXP)
 								        .addDef(getOrCreateVReg(CI))
 								        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
 								    return true;
 								  case Intrinsic::exp2:
 								    MIRBuilder.buildInstr(TargetOpcode::G_FEXP2)
 								        .addDef(getOrCreateVReg(CI))
 								        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
 								    return true;
-												[GISel]: New Opcode G_FLOG/G_FLOG2

https://reviews.llvm.org/D34837

llvm-svn: 306766

											
										
										
											2017-06-30 07:43:44 +08:00
+								  case Intrinsic::log:
 								    MIRBuilder.buildInstr(TargetOpcode::G_FLOG)
 								        .addDef(getOrCreateVReg(CI))
 								        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
 								    return true;
 								  case Intrinsic::log2:
 								    MIRBuilder.buildInstr(TargetOpcode::G_FLOG2)
 								        .addDef(getOrCreateVReg(CI))
 								        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
-												GlobalISel: IRTranslate llvm.fabs.* intrinsic

Summary:
Fabs is a common floating-point operation, especially for some expansions. This patch adds
a new generic opcode for llvm.fabs.* intrinsic in order to avoid building/matching this intrinsic.

Reviewers: qcolombet, aditya_nandakumar, dsanders, rovka

Reviewed By: aditya_nandakumar

Subscribers: kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D43864

llvm-svn: 326749

											
										
										
											2018-03-06 06:31:55 +08:00
+								    return true;
-												[GlobalISel] Add IR translation support for the @llvm.log10 intrinsic

This adds IR translation support for @llvm.log10 and updates relevant tests.

https://reviews.llvm.org/D55392

llvm-svn: 348657

											
										
										
											2018-12-08 06:08:02 +08:00
+								  case Intrinsic::log10:
 								    MIRBuilder.buildInstr(TargetOpcode::G_FLOG10)
 								        .addDef(getOrCreateVReg(CI))
 								        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
 								    return true;
-												GlobalISel: IRTranslate llvm.fabs.* intrinsic

Summary:
Fabs is a common floating-point operation, especially for some expansions. This patch adds
a new generic opcode for llvm.fabs.* intrinsic in order to avoid building/matching this intrinsic.

Reviewers: qcolombet, aditya_nandakumar, dsanders, rovka

Reviewed By: aditya_nandakumar

Subscribers: kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D43864

llvm-svn: 326749

											
										
										
											2018-03-06 06:31:55 +08:00
+								  case Intrinsic::fabs:
 								    MIRBuilder.buildInstr(TargetOpcode::G_FABS)
 								        .addDef(getOrCreateVReg(CI))
 								        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
-												[GISel]: New Opcode G_FLOG/G_FLOG2

https://reviews.llvm.org/D34837

llvm-svn: 306766

											
										
										
											2017-06-30 07:43:44 +08:00
+								    return true;
-												Revert "Revert r339977: [GISel]: Add Opcodes for a few LLVM Intrinsics"

This reverts commit 7debc334e6421bb5251ef8f18e97166dfc7dd787.

I missed updating legalizer-info-validation.mir as I had assertions
turned off in my build and that specific test requires asserts. Fixed it
now.

llvm-svn: 340197

											
										
										
											2018-08-21 02:43:19 +08:00
+								  case Intrinsic::trunc:
 								    MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC)
 								        .addDef(getOrCreateVReg(CI))
 								        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
 								    return true;
 								  case Intrinsic::round:
 								    MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND)
 								        .addDef(getOrCreateVReg(CI))
 								        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
 								    return true;
-												[GISel]: Add G_FMA opcode for fused multiply adds

https://reviews.llvm.org/D34372

Reviewed by dsanders

llvm-svn: 305824

											
										
										
											2017-06-21 03:25:23 +08:00
+								  case Intrinsic::fma:
 								    MIRBuilder.buildInstr(TargetOpcode::G_FMA)
 								        .addDef(getOrCreateVReg(CI))
 								        .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
 								        .addUse(getOrCreateVReg(*CI.getArgOperand(1)))
 								        .addUse(getOrCreateVReg(*CI.getArgOperand(2)));
 								    return true;
-												GlobalISel: IRTranslate llvm.fmuladd.* intrinsic

Reviewers: qcolombet, ab, dsanders, aditya_nandakumar, bogner

Reviewed By: qcolombet

Subscribers: rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D43090

llvm-svn: 324971

											
										
										
											2018-02-13 08:47:46 +08:00
+								  case Intrinsic::fmuladd: {
 								    const TargetMachine &TM = MF->getTarget();
 								    const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
 								    unsigned Dst = getOrCreateVReg(CI);
 								    unsigned Op0 = getOrCreateVReg(*CI.getArgOperand(0));
 								    unsigned Op1 = getOrCreateVReg(*CI.getArgOperand(1));
 								    unsigned Op2 = getOrCreateVReg(*CI.getArgOperand(2));
 								    if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
 								        TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) {
 								      // TODO: Revisit this to see if we should move this part of the
 								      // lowering to the combiner.
 								      MIRBuilder.buildInstr(TargetOpcode::G_FMA, Dst, Op0, Op1, Op2);
 								    } else {
 								      LLT Ty = getLLTForType(*CI.getType(), *DL);
 								      auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, Ty, Op0, Op1);
 								      MIRBuilder.buildInstr(TargetOpcode::G_FADD, Dst, FMul, Op2);
 								    }
 								    return true;
 								  }
-												GlobalISel: translate memcpy intrinsics.

llvm-svn: 284525

											
										
										
											2016-10-19 04:03:45 +08:00
+								  case Intrinsic::memcpy:
-												GlobalISel: translate memset & memmove.

llvm-svn: 293541

											
										
										
											2017-01-31 03:33:07 +08:00
+								  case Intrinsic::memmove:
 								  case Intrinsic::memset:
 								    return translateMemfunc(CI, MIRBuilder, ID);
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  case Intrinsic::eh_typeid_for: {
 								    GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
 								    unsigned Reg = getOrCreateVReg(CI);
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								    unsigned TypeID = MF->getTypeIDFor(GV);
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								    MIRBuilder.buildConstant(Reg, TypeID);
 								    return true;
 								  }
-												GlobalISel: translate the @llvm.objectsize intrinsic.

llvm-svn: 284527

											
										
										
											2016-10-19 04:03:51 +08:00
+								  case Intrinsic::objectsize: {
 								    // If we don't know by now, we're never going to know.
 								    const ConstantInt *Min = cast<ConstantInt>(CI.getArgOperand(1));
 								    MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0);
 								    return true;
 								  }
-												Add support for llvm.is.constant intrinsic (PR4898)

This adds the llvm-side support for post-inlining evaluation of the
__builtin_constant_p GCC intrinsic.

Also fixed SCCPSolver::visitCallSite to not blow up when seeing a call
to a function where canConstantFoldTo returns true, and one of the
arguments is a struct.

Updated from patch initially by Janusz Sobczak.

Differential Revision: https://reviews.llvm.org/D4276

llvm-svn: 346322

											
										
										
											2018-11-07 23:24:12 +08:00
+								  case Intrinsic::is_constant:
 								    // If this wasn't constant-folded away by now, then it's not a
 								    // constant.
 								    MIRBuilder.buildConstant(getOrCreateVReg(CI), 0);
 								    return true;
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								  case Intrinsic::stackguard:
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								    getStackGuard(getOrCreateVReg(CI), MIRBuilder);
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								    return true;
 								  case Intrinsic::stackprotector: {
-												Recommit: [globalisel] Change LLT constructor string into an LLT-based object that knows how to generate it.

Summary:
This will allow future patches to inspect the details of the LLT. The implementation is now split between
the Support and CodeGen libraries to allow TableGen to use this class without introducing layering concerns.

Thanks to Ahmed Bougacha for finding a reasonable way to avoid the layering issue and providing the version of this patch without that problem.

The problem with the previous commit appears to have been that TableGen was including CodeGen/LowLevelType.h instead of Support/LowLevelTypeImpl.h.

Reviewers: t.p.northover, qcolombet, rovka, aditya_nandakumar, ab, javed.absar

Subscribers: arsenm, nhaehnle, mgorny, dberris, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30046

llvm-svn: 297241

											
										
										
											2017-03-08 07:20:35 +08:00
+								    LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								    unsigned GuardVal = MRI->createGenericVirtualRegister(PtrTy);
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								    getStackGuard(GuardVal, MIRBuilder);
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
 								    AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
 								    MIRBuilder.buildStore(
 								        GuardVal, getOrCreateVReg(*Slot),
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								        *MF->getMachineMemOperand(
 								            MachinePointerInfo::getFixedStack(*MF,
 								                                              getOrCreateFrameIndex(*Slot)),
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								            MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
 								            PtrTy.getSizeInBits() / 8, 8));
 								    return true;
 								  }
-												[GISel]: Add Opcodes for CTLZ/CTTZ/CTPOP

https://reviews.llvm.org/D48600

Added IRTranslator support to translate these known intrinsics into GISel opcodes.

llvm-svn: 338944

											
										
										
											2018-08-04 09:22:12 +08:00
+								  case Intrinsic::cttz:
 								  case Intrinsic::ctlz: {
 								    ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
 								    bool isTrailing = ID == Intrinsic::cttz;
 								    unsigned Opcode = isTrailing
 								                          ? Cst->isZero() ? TargetOpcode::G_CTTZ
 								                                          : TargetOpcode::G_CTTZ_ZERO_UNDEF
 								                          : Cst->isZero() ? TargetOpcode::G_CTLZ
 								                                          : TargetOpcode::G_CTLZ_ZERO_UNDEF;
 								    MIRBuilder.buildInstr(Opcode)
 								        .addDef(getOrCreateVReg(CI))
 								        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
 								    return true;
 								  }
 								  case Intrinsic::ctpop: {
 								    MIRBuilder.buildInstr(TargetOpcode::G_CTPOP)
 								        .addDef(getOrCreateVReg(CI))
 								        .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
 								    return true;
 								  }
-												[GlobalIsel] Add llvm.invariant.start and llvm.invariant.end

Port over the implementation in SelectionDAGBuilder.cpp into the IRTranslator
and update the arm64-irtranslator test.

These were causing fallbacks in CTMark/Bullet (-Rpass-missed=gisel-select),
and this patch fixes that.

https://reviews.llvm.org/D52945

llvm-svn: 343885

											
										
										
											2018-10-06 05:02:46 +08:00
+								  case Intrinsic::invariant_start: {
 								    LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
 								    unsigned Undef = MRI->createGenericVirtualRegister(PtrTy);
 								    MIRBuilder.buildUndef(Undef);
 								    return true;
 								  }
 								  case Intrinsic::invariant_end:
 								    return true;
-												GlobalISel: support overflow arithmetic intrinsics.

Unsigned addition and subtraction can reuse the instructions created to
legalize large width operations (i.e. both produce and consume a carry flag).
Signed operations and multiplies get a dedicated op-with-overflow instruction.

Once this is produced the two values are combined into a struct register (which
will almost always be merged with a corresponding G_EXTRACT as part of
legalization).

llvm-svn: 279278

											
										
										
											2016-08-20 01:17:06 +08:00
+								  }
-												GlobalISel: factor overflow handling into separate function. NFC.

llvm-svn: 289149

											
										
										
											2016-12-09 06:44:00 +08:00
+								  return false;
-												GlobalISel: support overflow arithmetic intrinsics.

Unsigned addition and subtraction can reuse the instructions created to
legalize large width operations (i.e. both produce and consume a carry flag).
Signed operations and multiplies get a dedicated op-with-overflow instruction.

Once this is produced the two values are combined into a struct register (which
will almost always be merged with a corresponding G_EXTRACT as part of
legalization).

llvm-svn: 279278

											
										
										
											2016-08-20 01:17:06 +08:00
+								}
-												GlobalISel: support trivial inlineasm calls.

They're used for nefarious purposes by ObjC.

llvm-svn: 297422

											
										
										
											2017-03-10 07:36:26 +08:00
+								bool IRTranslator::translateInlineAsm(const CallInst &CI,
 								                                      MachineIRBuilder &MIRBuilder) {
 								  const InlineAsm &IA = cast<InlineAsm>(*CI.getCalledValue());
 								  if (!IA.getConstraintString().empty())
 								    return false;
 								  unsigned ExtraInfo = 0;
 								  if (IA.hasSideEffects())
 								    ExtraInfo |= InlineAsm::Extra_HasSideEffects;
 								  if (IA.getDialect() == InlineAsm::AD_Intel)
 								    ExtraInfo |= InlineAsm::Extra_AsmDialect;
 								  MIRBuilder.buildInstr(TargetOpcode::INLINEASM)
 								    .addExternalSymbol(IA.getAsmString().c_str())
 								    .addImm(ExtraInfo);
 								  return true;
 								}
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								unsigned IRTranslator::packRegs(const Value &V,
 								                                  MachineIRBuilder &MIRBuilder) {
 								  ArrayRef<unsigned> Regs = getOrCreateVRegs(V);
 								  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V);
 								  LLT BigTy = getLLTForType(*V.getType(), *DL);
 								  if (Regs.size() == 1)
 								    return Regs[0];
 								  unsigned Dst = MRI->createGenericVirtualRegister(BigTy);
 								  MIRBuilder.buildUndef(Dst);
 								  for (unsigned i = 0; i < Regs.size(); ++i) {
 								    unsigned NewDst = MRI->createGenericVirtualRegister(BigTy);
 								    MIRBuilder.buildInsert(NewDst, Dst, Regs[i], Offsets[i]);
 								    Dst = NewDst;
 								  }
 								  return Dst;
 								}
 								void IRTranslator::unpackRegs(const Value &V, unsigned Src,
 								                                MachineIRBuilder &MIRBuilder) {
 								  ArrayRef<unsigned> Regs = getOrCreateVRegs(V);
 								  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V);
 								  for (unsigned i = 0; i < Regs.size(); ++i)
 								    MIRBuilder.buildExtract(Regs[i], Src, Offsets[i]);
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								  const CallInst &CI = cast<CallInst>(U);
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  auto TII = MF->getTarget().getIntrinsicInfo();
-												GlobalISel: implement simple function calls on AArch64.

We're still limited in the arguments we support, but this at least handles the
basic cases.

llvm-svn: 278293

											
										
										
											2016-08-11 05:44:01 +08:00
+								  const Function *F = CI.getCalledFunction();
-												[GlobalISel] Bail out on calls to dllimported functions

Differential Revision: https://reviews.llvm.org/D42568

llvm-svn: 323811

											
										
										
											2018-01-31 03:50:58 +08:00
+								  // FIXME: support Windows dllimport function calls.
 								  if (F && F->hasDLLImportStorageClass())
 								    return false;
-												AArch64: fall back to DAG ISel for inline assembly.

We can't currently handle "calls" to inlineasm strings so it's better to let
the DAG handle it than generate rubbish.

llvm-svn: 292540

											
										
										
											2017-01-20 07:59:35 +08:00
+								  if (CI.isInlineAsm())
-												GlobalISel: support trivial inlineasm calls.

They're used for nefarious purposes by ObjC.

llvm-svn: 297422

											
										
										
											2017-03-10 07:36:26 +08:00
+								    return translateInlineAsm(CI, MIRBuilder);
-												AArch64: fall back to DAG ISel for inline assembly.

We can't currently handle "calls" to inlineasm strings so it's better to let
the DAG handle it than generate rubbish.

llvm-svn: 292540

											
										
										
											2017-01-20 07:59:35 +08:00
-												[AArch64][GlobalISel] Fix assert fail with unknown intrinsic.

A call may have an intrinsic name but not have a valid intrinsic ID,
for example with llvm.invariant.group.barrier. If so, treat it as a
normal call like FastISel does.

llvm-svn: 321662

											
										
										
											2018-01-03 02:56:39 +08:00
+								  Intrinsic::ID ID = Intrinsic::not_intrinsic;
 								  if (F && F->isIntrinsic()) {
 								    ID = F->getIntrinsicID();
 								    if (TII && ID == Intrinsic::not_intrinsic)
 								      ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
 								  }
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  bool IsSplitType = valueIsSplit(CI);
-												[AArch64][GlobalISel] Fix assert fail with unknown intrinsic.

A call may have an intrinsic name but not have a valid intrinsic ID,
for example with llvm.invariant.group.barrier. If so, treat it as a
normal call like FastISel does.

llvm-svn: 321662

											
										
										
											2018-01-03 02:56:39 +08:00
+								  if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) {
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    unsigned Res = IsSplitType ? MRI->createGenericVirtualRegister(
 								                                     getLLTForType(*CI.getType(), *DL))
 								                               : getOrCreateVReg(CI);
-												GlobalISel: implement simple function calls on AArch64.

We're still limited in the arguments we support, but this at least handles the
basic cases.

llvm-svn: 278293

											
										
										
											2016-08-11 05:44:01 +08:00
+								    SmallVector<unsigned, 8> Args;
 								    for (auto &Arg: CI.arg_operands())
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								      Args.push_back(packRegs(*Arg, MIRBuilder));
-												GlobalISel: implement simple function calls on AArch64.

We're still limited in the arguments we support, but this at least handles the
basic cases.

llvm-svn: 278293

											
										
										
											2016-08-11 05:44:01 +08:00
-												GlobalISel: inform FrameLowering when we emit a function call.

Amongst other things (I expect) this is necessary to ensure decent backtraces
when an "unreachable" is involved.

llvm-svn: 297413

											
										
										
											2017-03-10 06:00:39 +08:00
+								    MF->getFrameInfo().setHasCalls(true);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    bool Success = CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() {
-												GlobalISel: rework CallLowering so that it can be used for libcalls too.

There should be no functional change here, I'm just making the implementation
of "frem" (to libcall) legalization easier for a followup.

llvm-svn: 279987

											
										
										
											2016-08-30 03:07:08 +08:00
+								      return getOrCreateVReg(*CI.getCalledValue());
 								    });
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
 								    if (IsSplitType)
 								      unpackRegs(CI, Res, MIRBuilder);
 								    return Success;
-												GlobalISel: implement simple function calls on AArch64.

We're still limited in the arguments we support, but this at least handles the
basic cases.

llvm-svn: 278293

											
										
										
											2016-08-11 05:44:01 +08:00
+								  }
 								  assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
-												GlobalISel: support translation of intrinsic calls.

These come in two variants for now: G_INTRINSIC and G_INTRINSIC_W_SIDE_EFFECTS.
We may decide to split the latter up with finer-grained restrictions later, if
necessary.

llvm-svn: 277224

											
										
										
											2016-07-30 06:32:36 +08:00
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								  if (translateKnownIntrinsic(CI, ID, MIRBuilder))
-												GlobalISel: support overflow arithmetic intrinsics.

Unsigned addition and subtraction can reuse the instructions created to
legalize large width operations (i.e. both produce and consume a carry flag).
Signed operations and multiplies get a dedicated op-with-overflow instruction.

Once this is produced the two values are combined into a struct register (which
will almost always be merged with a corresponding G_EXTRACT as part of
legalization).

llvm-svn: 279278

											
										
										
											2016-08-20 01:17:06 +08:00
+								    return true;
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  unsigned Res = 0;
 								  if (!CI.getType()->isVoidTy()) {
 								    if (IsSplitType)
 								      Res =
 								          MRI->createGenericVirtualRegister(getLLTForType(*CI.getType(), *DL));
 								    else
 								      Res = getOrCreateVReg(CI);
 								  }
-												GlobalISel: support translation of intrinsic calls.

These come in two variants for now: G_INTRINSIC and G_INTRINSIC_W_SIDE_EFFECTS.
We may decide to split the latter up with finer-grained restrictions later, if
necessary.

llvm-svn: 277224

											
										
										
											2016-07-30 06:32:36 +08:00
+								  MachineInstrBuilder MIB =
-												GlobalISel: move type information to MachineRegisterInfo.

We want each register to have a canonical type, which means the best place to
store this is in MachineRegisterInfo rather than on every MachineInstr that
happens to use or define that register.

Most changes following from this are pretty simple (you need an MRI anyway if
you're going to be doing any transformations, so just check the type there).
But legalization doesn't really want to check redundant operands (when, for
example, a G_ADD only ever has one type) so I've made use of MCInstrDesc's
operand type field to encode these constraints and limit legalization's work.

As an added bonus, more validation is possible, both in MachineVerifier and
MachineIRBuilder (coming soon).

llvm-svn: 281035

											
										
										
											2016-09-09 19:46:34 +08:00
+								      MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory());
-												GlobalISel: support translation of intrinsic calls.

These come in two variants for now: G_INTRINSIC and G_INTRINSIC_W_SIDE_EFFECTS.
We may decide to split the latter up with finer-grained restrictions later, if
necessary.

llvm-svn: 277224

											
										
										
											2016-07-30 06:32:36 +08:00
 								  for (auto &Arg : CI.arg_operands()) {
-												[GlobalISel] Don't translate intrinsics with metadata parameters.

Some intrinsics take metadata parameters.  These all need custom
handling of some form, and cannot possibly be lowered generically to
G_INTRINSIC calls with vreg operands.
Reject them, instead of hitting an assert later in getOrCreateVReg.

llvm-svn: 297209

											
										
										
											2017-03-08 04:53:09 +08:00
+								    // Some intrinsics take metadata parameters. Reject them.
 								    if (isa<MetadataAsValue>(Arg))
 								      return false;
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    MIB.addUse(packRegs(*Arg, MIRBuilder));
-												GlobalISel: support translation of intrinsic calls.

These come in two variants for now: G_INTRINSIC and G_INTRINSIC_W_SIDE_EFFECTS.
We may decide to split the latter up with finer-grained restrictions later, if
necessary.

llvm-svn: 277224

											
										
										
											2016-07-30 06:32:36 +08:00
+								  }
-												[GlobalISel] IRTranslator: Add MachineMemOperand to target memory intrinsics

Reviewers: qcolombet, ab, t.p.northover, aditya_nandakumar, dsanders

Reviewed By: qcolombet

Subscribers: rovka, kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D33724

llvm-svn: 304743

											
										
										
											2017-06-06 06:17:17 +08:00
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  if (IsSplitType)
 								    unpackRegs(CI, Res, MIRBuilder);
-												[GlobalISel] IRTranslator: Add MachineMemOperand to target memory intrinsics

Reviewers: qcolombet, ab, t.p.northover, aditya_nandakumar, dsanders

Reviewed By: qcolombet

Subscribers: rovka, kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D33724

llvm-svn: 304743

											
										
										
											2017-06-06 06:17:17 +08:00
+								  // Add a MachineMemOperand if it is a target mem intrinsic.
 								  const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
 								  TargetLowering::IntrinsicInfo Info;
 								  // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
-												TLI: Allow using PSV for intrinsic mem operands

llvm-svn: 320756

											
										
										
											2017-12-15 06:34:10 +08:00
+								  if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
-												Use getStoreSize() in various places instead of 'BitSize >> 3'.

This is needed for cases when the memory access is not as big as the width of
the data type. For instance, storing i1 (1 bit) would be done in a byte (8
bits).

Using 'BitSize >> 3' (or '/ 8') would e.g. give the memory access of an i1 a
size of 0, which for instance makes alias analysis return NoAlias even when
it shouldn't.

There are no tests as this was done as a follow-up to the bugfix for the case
where this was discovered (r318824). This handles more similar cases.

Review: Björn Petterson
https://reviews.llvm.org/D40339

llvm-svn: 319173

											
										
										
											2017-11-28 22:44:32 +08:00
+								    uint64_t Size = Info.memVT.getStoreSize();
-												[GlobalISel] IRTranslator: Add MachineMemOperand to target memory intrinsics

Reviewers: qcolombet, ab, t.p.northover, aditya_nandakumar, dsanders

Reviewed By: qcolombet

Subscribers: rovka, kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D33724

llvm-svn: 304743

											
										
										
											2017-06-06 06:17:17 +08:00
+								    MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
-												DAG: Expose all MMO flags in getTgtMemIntrinsic

Rather than adding more bits to express every
MMO flag you could want, just directly use the
MMO flags. Also fixes using a bunch of bool arguments to
getMemIntrinsicNode.

On AMDGPU, buffer and image intrinsics should always
have MODereferencable set, but currently there is no
way to do that directly during the initial intrinsic
lowering.

llvm-svn: 320746

											
										
										
											2017-12-15 05:39:51 +08:00
+								                                               Info.flags, Size, Info.align));
-												[GlobalISel] IRTranslator: Add MachineMemOperand to target memory intrinsics

Reviewers: qcolombet, ab, t.p.northover, aditya_nandakumar, dsanders

Reviewed By: qcolombet

Subscribers: rovka, kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D33724

llvm-svn: 304743

											
										
										
											2017-06-06 06:17:17 +08:00
+								  }
-												GlobalISel: support translation of intrinsic calls.

These come in two variants for now: G_INTRINSIC and G_INTRINSIC_W_SIDE_EFFECTS.
We may decide to split the latter up with finer-grained restrictions later, if
necessary.

llvm-svn: 277224

											
										
										
											2016-07-30 06:32:36 +08:00
+								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateInvoke(const User &U,
 								                                   MachineIRBuilder &MIRBuilder) {
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  const InvokeInst &I = cast<InvokeInst>(U);
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  MCContext &Context = MF->getContext();
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
 								  const BasicBlock *ReturnBB = I.getSuccessor(0);
 								  const BasicBlock *EHPadBB = I.getSuccessor(1);
-												[GlobalISel] Fallback when failing to translate invoke.

We unintentionally stopped falling back in r293670.

While there, change an unusual construct.

llvm-svn: 297425

											
										
										
											2017-03-10 08:25:35 +08:00
+								  const Value *Callee = I.getCalledValue();
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  const Function *Fn = dyn_cast<Function>(Callee);
 								  if (isa<InlineAsm>(Callee))
 								    return false;
 								  // FIXME: support invoking patchpoint and statepoint intrinsics.
 								  if (Fn && Fn->isIntrinsic())
 								    return false;
 								  // FIXME: support whatever these are.
 								  if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
 								    return false;
 								  // FIXME: support Windows exception handling.
 								  if (!isa<LandingPadInst>(EHPadBB->front()))
 								    return false;
-												Move most EH from MachineModuleInfo to MachineFunction

Recommitting r288293 with some extra fixes for GlobalISel code.

Most of the exception handling members in MachineModuleInfo is actually
per function data (talks about the "current function") so it is better
to keep it at the function instead of the module.

This is a necessary step to have machine module passes work properly.

Also:
- Rename TidyLandingPads() to tidyLandingPads()
- Use doxygen member groups instead of "//===- EH ---"... so it is clear
  where a group ends.
- I had to add an ugly const_cast at two places in the AsmPrinter
  because the available MachineFunction pointers are const, but the code
  wants to call tidyLandingPads() in between
  (markFunctionEnd()/endFunction()).

Differential Revision: https://reviews.llvm.org/D27227

llvm-svn: 288405

											
										
										
											2016-12-02 03:32:15 +08:00
+								  // Emit the actual call, bracketed by EH_LABELs so that the MF knows about
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  // the region covered by the try.
-												Move most EH from MachineModuleInfo to MachineFunction

Recommitting r288293 with some extra fixes for GlobalISel code.

Most of the exception handling members in MachineModuleInfo is actually
per function data (talks about the "current function") so it is better
to keep it at the function instead of the module.

This is a necessary step to have machine module passes work properly.

Also:
- Rename TidyLandingPads() to tidyLandingPads()
- Use doxygen member groups instead of "//===- EH ---"... so it is clear
  where a group ends.
- I had to add an ugly const_cast at two places in the AsmPrinter
  because the available MachineFunction pointers are const, but the code
  wants to call tidyLandingPads() in between
  (markFunctionEnd()/endFunction()).

Differential Revision: https://reviews.llvm.org/D27227

llvm-svn: 288405

											
										
										
											2016-12-02 03:32:15 +08:00
+								  MCSymbol *BeginSymbol = Context.createTempSymbol();
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  unsigned Res =
 								        MRI->createGenericVirtualRegister(getLLTForType(*I.getType(), *DL));
-												GlobalISel: merge invoke and call translation paths.

Well, sort of. But the lower-level code that invoke used to be using completely
botched the handling of varargs functions, which hopefully won't be possible if
they're using the same code.

llvm-svn: 293670

											
										
										
											2017-02-01 02:36:11 +08:00
+								  SmallVector<unsigned, 8> Args;
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  for (auto &Arg: I.arg_operands())
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    Args.push_back(packRegs(*Arg, MIRBuilder));
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
-												[GlobalISel] Use ImmutableCallSite instead of templates. NFC.

ImmutableCallSite abstracts away CallInst and InvokeInst. Use it!

llvm-svn: 297426

											
										
										
											2017-03-10 08:25:44 +08:00
+								  if (!CLI->lowerCall(MIRBuilder, &I, Res, Args,
-												[GlobalISel] Fallback when failing to translate invoke.

We unintentionally stopped falling back in r293670.

While there, change an unusual construct.

llvm-svn: 297425

											
										
										
											2017-03-10 08:25:35 +08:00
+								                      [&]() { return getOrCreateVReg(*I.getCalledValue()); }))
 								    return false;
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  unpackRegs(I, Res, MIRBuilder);
-												Move most EH from MachineModuleInfo to MachineFunction

Recommitting r288293 with some extra fixes for GlobalISel code.

Most of the exception handling members in MachineModuleInfo is actually
per function data (talks about the "current function") so it is better
to keep it at the function instead of the module.

This is a necessary step to have machine module passes work properly.

Also:
- Rename TidyLandingPads() to tidyLandingPads()
- Use doxygen member groups instead of "//===- EH ---"... so it is clear
  where a group ends.
- I had to add an ugly const_cast at two places in the AsmPrinter
  because the available MachineFunction pointers are const, but the code
  wants to call tidyLandingPads() in between
  (markFunctionEnd()/endFunction()).

Differential Revision: https://reviews.llvm.org/D27227

llvm-svn: 288405

											
										
										
											2016-12-02 03:32:15 +08:00
+								  MCSymbol *EndSymbol = Context.createTempSymbol();
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
 								  // FIXME: track probabilities.
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								  MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB),
 								                    &ReturnMBB = getMBB(*ReturnBB);
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  MIRBuilder.getMBB().addSuccessor(&ReturnMBB);
 								  MIRBuilder.getMBB().addSuccessor(&EHPadMBB);
-												GlobalISel: the translation of an invoke must branch to the good block.

Otherwise bad things happen if the basic block order isn't trivial after an
invoke.

llvm-svn: 293679

											
										
										
											2017-02-01 04:12:18 +08:00
+								  MIRBuilder.buildBr(ReturnMBB);
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
 								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateLandingPad(const User &U,
 								                                       MachineIRBuilder &MIRBuilder) {
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  const LandingPadInst &LP = cast<LandingPadInst>(U);
 								  MachineBasicBlock &MBB = MIRBuilder.getMBB();
 								  MBB.setIsEHPad();
 								  // If there aren't registers to copy the values into (e.g., during SjLj
 								  // exceptions), then don't bother.
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  auto &TLI = *MF->getSubtarget().getTargetLowering();
-												MachineFunction: Return reference from getFunction(); NFC

The Function can never be nullptr so we can return a reference.

llvm-svn: 320884

											
										
										
											2017-12-16 06:22:58 +08:00
+								  const Constant *PersonalityFn = MF->getFunction().getPersonalityFn();
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
 								      TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
 								    return true;
 								  // If landingpad's return type is token type, we don't create DAG nodes
 								  // for its exception pointer and selector value. The extraction of exception
 								  // pointer or selector value from token type landingpads is not currently
 								  // supported.
 								  if (LP.getType()->isTokenTy())
 								    return true;
 								  // Add a label to mark the beginning of the landing pad.  Deletion of the
 								  // landing pad can thus be detected via the MachineModuleInfo.
 								  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								    .addSym(MF->addLandingPad(&MBB));
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
-												Fix additional constructor call missed by r297241.

It was added between my build+test and my commit.

llvm-svn: 297244

											
										
										
											2017-03-08 07:32:10 +08:00
+								  LLT Ty = getLLTForType(*LP.getType(), *DL);
-												GlobalISel: use inserts for landingpad instead of sequences.

llvm-svn: 297237

											
										
										
											2017-03-08 07:04:06 +08:00
+								  unsigned Undef = MRI->createGenericVirtualRegister(Ty);
 								  MIRBuilder.buildUndef(Undef);
-												GlobalISel: Use the correct types when translating landingpad instructions

There was a bug here where we were using p0 instead of s32 for the
selector type in the landingpad. Instead of hardcoding these types we
should get the types from the landingpad instruction directly.

Note that we replicate an assert from SDAG here to only support
two-valued landingpads.

llvm-svn: 292995

											
										
										
											2017-01-25 08:16:53 +08:00
+								  SmallVector<LLT, 2> Tys;
 								  for (Type *Ty : cast<StructType>(LP.getType())->elements())
-												Recommit: [globalisel] Change LLT constructor string into an LLT-based object that knows how to generate it.

Summary:
This will allow future patches to inspect the details of the LLT. The implementation is now split between
the Support and CodeGen libraries to allow TableGen to use this class without introducing layering concerns.

Thanks to Ahmed Bougacha for finding a reasonable way to avoid the layering issue and providing the version of this patch without that problem.

The problem with the previous commit appears to have been that TableGen was including CodeGen/LowLevelType.h instead of Support/LowLevelTypeImpl.h.

Reviewers: t.p.northover, qcolombet, rovka, aditya_nandakumar, ab, javed.absar

Subscribers: arsenm, nhaehnle, mgorny, dberris, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30046

llvm-svn: 297241

											
										
										
											2017-03-08 07:20:35 +08:00
+								    Tys.push_back(getLLTForType(*Ty, *DL));
-												GlobalISel: Use the correct types when translating landingpad instructions

There was a bug here where we were using p0 instead of s32 for the
selector type in the landingpad. Instead of hardcoding these types we
should get the types from the landingpad instruction directly.

Note that we replicate an assert from SDAG here to only support
two-valued landingpads.

llvm-svn: 292995

											
										
										
											2017-01-25 08:16:53 +08:00
+								  assert(Tys.size() == 2 && "Only two-valued landingpads are supported");
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  // Mark exception register as live in.
-												GlobalISel: use inserts for landingpad instead of sequences.

llvm-svn: 297237

											
										
										
											2017-03-08 07:04:06 +08:00
+								  unsigned ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
 								  if (!ExceptionReg)
 								    return false;
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
-												GlobalISel: use inserts for landingpad instead of sequences.

llvm-svn: 297237

											
										
										
											2017-03-08 07:04:06 +08:00
+								  MBB.addLiveIn(ExceptionReg);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(LP);
 								  MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);
-												GlobalISel: use inserts for landingpad instead of sequences.

llvm-svn: 297237

											
										
										
											2017-03-08 07:04:06 +08:00
 								  unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
 								  if (!SelectorReg)
 								    return false;
 								  MBB.addLiveIn(SelectorReg);
 								  unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
 								  MIRBuilder.buildCopy(PtrVReg, SelectorReg);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  MIRBuilder.buildCast(ResRegs[1], PtrVReg);
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
 								  return true;
 								}
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								bool IRTranslator::translateAlloca(const User &U,
 								                                   MachineIRBuilder &MIRBuilder) {
 								  auto &AI = cast<AllocaInst>(U);
-												[GlobalISel] Fall back to SDISel for swifterror/swiftself attributes.

We don't currently support these, fall back until we do.

llvm-svn: 337994

											
										
										
											2018-07-26 09:25:58 +08:00
+								  if (AI.isSwiftError())
 								    return false;
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								  if (AI.isStaticAlloca()) {
 								    unsigned Res = getOrCreateVReg(AI);
 								    int FI = getOrCreateFrameIndex(AI);
 								    MIRBuilder.buildFrameIndex(Res, FI);
 								    return true;
 								  }
-												[AArch64] Implement dynamic stack probing for windows

This makes sure that alloca() function calls properly probe the
stack as needed.

Differential Revision: https://reviews.llvm.org/D42356

llvm-svn: 325433

											
										
										
											2018-02-17 22:26:32 +08:00
+								  // FIXME: support stack probing for Windows.
 								  if (MF->getTarget().getTargetTriple().isOSWindows())
 								    return false;
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								  // Now we're in the harder dynamic case.
 								  Type *Ty = AI.getAllocatedType();
 								  unsigned Align =
 								      std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI.getAlignment());
 								  unsigned NumElts = getOrCreateVReg(*AI.getArraySize());
-												[GlobalISel] Avoid translating synthetic constants to new G_CONSTANTS.

Currently, we create a G_CONSTANT for every "synthetic" integer
constant operand (for instance, for the G_GEP offset).
Instead, share the G_CONSTANTs we might have created by going through
the ValueToVReg machinery.

When we're emitting synthetic constants, we do need to get Constants from
the context.  One could argue that we shouldn't modify the context at
all (for instance, this means that we're going to use a tad more memory
if the constant wasn't used elsewhere), but constants are mostly
harmless.  We currently do this for extractvalue and all.

For constant fcmp, this does mean we'll emit an extra COPY, which is not
necessarily more optimal than an extra materialized constant.
But that preserves the current intended design of uniqued G_CONSTANTs,
and the rematerialization problem exists elsewhere and should be
resolved with a single coherent solution.

llvm-svn: 297875

											
										
										
											2017-03-16 03:21:11 +08:00
+								  Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
 								  LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL);
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								  if (MRI->getType(NumElts) != IntPtrTy) {
 								    unsigned ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
 								    MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts);
 								    NumElts = ExtElts;
 								  }
 								  unsigned AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
-												[GlobalISel] Avoid translating synthetic constants to new G_CONSTANTS.

Currently, we create a G_CONSTANT for every "synthetic" integer
constant operand (for instance, for the G_GEP offset).
Instead, share the G_CONSTANTs we might have created by going through
the ValueToVReg machinery.

When we're emitting synthetic constants, we do need to get Constants from
the context.  One could argue that we shouldn't modify the context at
all (for instance, this means that we're going to use a tad more memory
if the constant wasn't used elsewhere), but constants are mostly
harmless.  We currently do this for extractvalue and all.

For constant fcmp, this does mean we'll emit an extra COPY, which is not
necessarily more optimal than an extra materialized constant.
But that preserves the current intended design of uniqued G_CONSTANTs,
and the rematerialization problem exists elsewhere and should be
resolved with a single coherent solution.

llvm-svn: 297875

											
										
										
											2017-03-16 03:21:11 +08:00
+								  unsigned TySize =
 								      getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty)));
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								  MIRBuilder.buildMul(AllocSize, NumElts, TySize);
-												Recommit: [globalisel] Change LLT constructor string into an LLT-based object that knows how to generate it.

Summary:
This will allow future patches to inspect the details of the LLT. The implementation is now split between
the Support and CodeGen libraries to allow TableGen to use this class without introducing layering concerns.

Thanks to Ahmed Bougacha for finding a reasonable way to avoid the layering issue and providing the version of this patch without that problem.

The problem with the previous commit appears to have been that TableGen was including CodeGen/LowLevelType.h instead of Support/LowLevelTypeImpl.h.

Reviewers: t.p.northover, qcolombet, rovka, aditya_nandakumar, ab, javed.absar

Subscribers: arsenm, nhaehnle, mgorny, dberris, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30046

llvm-svn: 297241

											
										
										
											2017-03-08 07:20:35 +08:00
+								  LLT PtrTy = getLLTForType(*AI.getType(), *DL);
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								  auto &TLI = *MF->getSubtarget().getTargetLowering();
 								  unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
 								  unsigned SPTmp = MRI->createGenericVirtualRegister(PtrTy);
 								  MIRBuilder.buildCopy(SPTmp, SPReg);
-												GlobalISel: introduce G_PTR_MASK to simplify alloca handling.

This instruction clears the low bits of a pointer without requiring (possibly
dodgy if pointers aren't ints) conversions to and from an integer. Since (as
far as I'm aware) all masks are statically known, the instruction takes an
immediate operand rather than a register to specify the mask.

llvm-svn: 295103

											
										
										
											2017-02-15 04:56:18 +08:00
+								  unsigned AllocTmp = MRI->createGenericVirtualRegister(PtrTy);
 								  MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize);
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
 								  // Handle alignment. We have to realign if the allocation granule was smaller
 								  // than stack alignment, or the specific alloca requires more than stack
 								  // alignment.
 								  unsigned StackAlign =
 								      MF->getSubtarget().getFrameLowering()->getStackAlignment();
 								  Align = std::max(Align, StackAlign);
 								  if (Align > StackAlign || DL->getTypeAllocSize(Ty) % StackAlign != 0) {
 								    // Round the size of the allocation up to the stack alignment size
 								    // by add SA-1 to the size. This doesn't overflow because we're computing
 								    // an address inside an alloca.
-												GlobalISel: introduce G_PTR_MASK to simplify alloca handling.

This instruction clears the low bits of a pointer without requiring (possibly
dodgy if pointers aren't ints) conversions to and from an integer. Since (as
far as I'm aware) all masks are statically known, the instruction takes an
immediate operand rather than a register to specify the mask.

llvm-svn: 295103

											
										
										
											2017-02-15 04:56:18 +08:00
+								    unsigned AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy);
 								    MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align));
 								    AllocTmp = AlignedAlloc;
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								  }
-												GlobalISel: introduce G_PTR_MASK to simplify alloca handling.

This instruction clears the low bits of a pointer without requiring (possibly
dodgy if pointers aren't ints) conversions to and from an integer. Since (as
far as I'm aware) all masks are statically known, the instruction takes an
immediate operand rather than a register to specify the mask.

llvm-svn: 295103

											
										
										
											2017-02-15 04:56:18 +08:00
+								  MIRBuilder.buildCopy(SPReg, AllocTmp);
 								  MIRBuilder.buildCopy(getOrCreateVReg(AI), AllocTmp);
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								  MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI);
 								  assert(MF->getFrameInfo().hasVarSizedObjects());
-												GlobalISel: implement alloca instruction

llvm-svn: 276433

											
										
										
											2016-07-23 00:59:52 +08:00
+								  return true;
 								}
-												GlobalISel: support translating va_arg

Since (say) i128 and [16 x i8] map to the same type in generic MIR, we also
need to attach the required alignment info.

llvm-svn: 295254

											
										
										
											2017-02-16 07:22:33 +08:00
+								bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
 								  // FIXME: We may need more info about the type. Because of how LLT works,
 								  // we're completely discarding the i64/double distinction here (amongst
 								  // others). Fortunately the ABIs I know of where that matters don't use va_arg
 								  // anyway but that's not guaranteed.
 								  MIRBuilder.buildInstr(TargetOpcode::G_VAARG)
 								    .addDef(getOrCreateVReg(U))
 								    .addUse(getOrCreateVReg(*U.getOperand(0)))
 								    .addImm(DL->getABITypeAlignment(U.getType()));
 								  return true;
 								}
-												[GlobalISel] Translate insertelement and extractelement

Reviewers: qcolombet, aditya_nandakumar, dsanders, ab, t.p.northover, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30761

llvm-svn: 297495

											
										
										
											2017-03-11 03:08:28 +08:00
+								bool IRTranslator::translateInsertElement(const User &U,
 								                                          MachineIRBuilder &MIRBuilder) {
 								  // If it is a <1 x Ty> vector, use the scalar as it is
 								  // not a legal vector type in LLT.
 								  if (U.getType()->getVectorNumElements() == 1) {
 								    unsigned Elt = getOrCreateVReg(*U.getOperand(1));
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    auto &Regs = *VMap.getVRegs(U);
 								    if (Regs.empty()) {
 								      Regs.push_back(Elt);
 								      VMap.getOffsets(U)->push_back(0);
 								    } else {
 								      MIRBuilder.buildCopy(Regs[0], Elt);
 								    }
-												[GlobalISel] Translate insertelement and extractelement

Reviewers: qcolombet, aditya_nandakumar, dsanders, ab, t.p.northover, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30761

llvm-svn: 297495

											
										
										
											2017-03-11 03:08:28 +08:00
+								    return true;
 								  }
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
-												[GlobalISel] Remove non-determinism from IRTranslator.

This showed up in r300535/r300537, which were reverted in r300538 due to
some of the introduced tests in there failing on some bots, due to the
non-determinism fixed in this commit.

Re-committing r300535/r300537 will add 2 tests for the change in this
commit.

llvm-svn: 300663

											
										
										
											2017-04-19 14:38:37 +08:00
+								  unsigned Res = getOrCreateVReg(U);
 								  unsigned Val = getOrCreateVReg(*U.getOperand(0));
 								  unsigned Elt = getOrCreateVReg(*U.getOperand(1));
 								  unsigned Idx = getOrCreateVReg(*U.getOperand(2));
 								  MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
-												[GlobalISel] Translate insertelement and extractelement

Reviewers: qcolombet, aditya_nandakumar, dsanders, ab, t.p.northover, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30761

llvm-svn: 297495

											
										
										
											2017-03-11 03:08:28 +08:00
+								  return true;
 								}
 								bool IRTranslator::translateExtractElement(const User &U,
 								                                           MachineIRBuilder &MIRBuilder) {
 								  // If it is a <1 x Ty> vector, use the scalar as it is
 								  // not a legal vector type in LLT.
 								  if (U.getOperand(0)->getType()->getVectorNumElements() == 1) {
 								    unsigned Elt = getOrCreateVReg(*U.getOperand(0));
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    auto &Regs = *VMap.getVRegs(U);
 								    if (Regs.empty()) {
 								      Regs.push_back(Elt);
 								      VMap.getOffsets(U)->push_back(0);
 								    } else {
 								      MIRBuilder.buildCopy(Regs[0], Elt);
 								    }
-												[GlobalISel] Translate insertelement and extractelement

Reviewers: qcolombet, aditya_nandakumar, dsanders, ab, t.p.northover, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30761

llvm-svn: 297495

											
										
										
											2017-03-11 03:08:28 +08:00
+								    return true;
 								  }
-												[GlobalISel] Remove non-determinism from IRTranslator.

This showed up in r300535/r300537, which were reverted in r300538 due to
some of the introduced tests in there failing on some bots, due to the
non-determinism fixed in this commit.

Re-committing r300535/r300537 will add 2 tests for the change in this
commit.

llvm-svn: 300663

											
										
										
											2017-04-19 14:38:37 +08:00
+								  unsigned Res = getOrCreateVReg(U);
 								  unsigned Val = getOrCreateVReg(*U.getOperand(0));
-												[GlobalISel] Use the target preferred type for G_EXTRACT_VECTOR_ELT index.

Allows for better imported pattern re-use.

llvm-svn: 345265

											
										
										
											2018-10-25 22:04:54 +08:00
+								  const auto &TLI = *MF->getSubtarget().getTargetLowering();
 								  unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
 								  unsigned Idx = 0;
 								  if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
 								    if (CI->getBitWidth() != PreferredVecIdxWidth) {
 								      APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
 								      auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
 								      Idx = getOrCreateVReg(*NewIdxCI);
 								    }
 								  }
 								  if (!Idx)
 								    Idx = getOrCreateVReg(*U.getOperand(1));
 								  if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
 								    const LLT &VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
 								    Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx)->getOperand(0).getReg();
 								  }
-												[GlobalISel] Remove non-determinism from IRTranslator.

This showed up in r300535/r300537, which were reverted in r300538 due to
some of the introduced tests in there failing on some bots, due to the
non-determinism fixed in this commit.

Re-committing r300535/r300537 will add 2 tests for the change in this
commit.

llvm-svn: 300663

											
										
										
											2017-04-19 14:38:37 +08:00
+								  MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
-												[GlobalISel] Translate insertelement and extractelement

Reviewers: qcolombet, aditya_nandakumar, dsanders, ab, t.p.northover, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30761

llvm-svn: 297495

											
										
										
											2017-03-11 03:08:28 +08:00
+								  return true;
 								}
-												[GlobalISel] Translate shufflevector

Reviewers: qcolombet, aditya_nandakumar, t.p.northover, javed.absar, ab, dsanders

Reviewed By: javed.absar

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30962

llvm-svn: 298347

											
										
										
											2017-03-21 16:44:13 +08:00
+								bool IRTranslator::translateShuffleVector(const User &U,
 								                                          MachineIRBuilder &MIRBuilder) {
 								  MIRBuilder.buildInstr(TargetOpcode::G_SHUFFLE_VECTOR)
 								      .addDef(getOrCreateVReg(U))
 								      .addUse(getOrCreateVReg(*U.getOperand(0)))
 								      .addUse(getOrCreateVReg(*U.getOperand(1)))
 								      .addUse(getOrCreateVReg(*U.getOperand(2)));
 								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								  const PHINode &PI = cast<PHINode>(U);
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  SmallVector<MachineInstr *, 4> Insts;
 								  for (auto Reg : getOrCreateVRegs(PI)) {
 								    auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, Reg);
 								    Insts.push_back(MIB.getInstr());
 								  }
 								  PendingPHIs.emplace_back(&PI, std::move(Insts));
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
+								  return true;
 								}
-												[globalisel][irtranslator] Add support for atomicrmw and (strong) cmpxchg

Summary:
This patch adds support for the atomicrmw instructions and the strong
cmpxchg instruction to the IRTranslator.

I've left out weak cmpxchg because LangRef.rst isn't entirely clear on what
difference it makes to the backend. As far as I can tell from the code, it
only matters to AtomicExpandPass which is run at the LLVM-IR level.

Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, javed.absar

Reviewed By: qcolombet

Subscribers: kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D40092

llvm-svn: 336589

											
										
										
											2018-07-10 03:33:40 +08:00
+								bool IRTranslator::translateAtomicCmpXchg(const User &U,
 								                                          MachineIRBuilder &MIRBuilder) {
 								  const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U);
 								  if (I.isWeak())
 								    return false;
 								  auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile
 								                              : MachineMemOperand::MONone;
 								  Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
 								  Type *ResType = I.getType();
 								  Type *ValType = ResType->Type::getStructElementType(0);
 								  auto Res = getOrCreateVRegs(I);
 								  unsigned OldValRes = Res[0];
 								  unsigned SuccessRes = Res[1];
 								  unsigned Addr = getOrCreateVReg(*I.getPointerOperand());
 								  unsigned Cmp = getOrCreateVReg(*I.getCompareOperand());
 								  unsigned NewVal = getOrCreateVReg(*I.getNewValOperand());
 								  MIRBuilder.buildAtomicCmpXchgWithSuccess(
 								      OldValRes, SuccessRes, Addr, Cmp, NewVal,
 								      *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
 								                                Flags, DL->getTypeStoreSize(ValType),
 								                                getMemOpAlignment(I), AAMDNodes(), nullptr,
 								                                I.getSyncScopeID(), I.getSuccessOrdering(),
 								                                I.getFailureOrdering()));
 								  return true;
 								}
 								bool IRTranslator::translateAtomicRMW(const User &U,
 								                                      MachineIRBuilder &MIRBuilder) {
 								  const AtomicRMWInst &I = cast<AtomicRMWInst>(U);
 								  auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile
 								                              : MachineMemOperand::MONone;
 								  Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
 								  Type *ResType = I.getType();
 								  unsigned Res = getOrCreateVReg(I);
 								  unsigned Addr = getOrCreateVReg(*I.getPointerOperand());
 								  unsigned Val = getOrCreateVReg(*I.getValOperand());
 								  unsigned Opcode = 0;
 								  switch (I.getOperation()) {
 								  default:
 								    llvm_unreachable("Unknown atomicrmw op");
 								    return false;
 								  case AtomicRMWInst::Xchg:
 								    Opcode = TargetOpcode::G_ATOMICRMW_XCHG;
 								    break;
 								  case AtomicRMWInst::Add:
 								    Opcode = TargetOpcode::G_ATOMICRMW_ADD;
 								    break;
 								  case AtomicRMWInst::Sub:
 								    Opcode = TargetOpcode::G_ATOMICRMW_SUB;
 								    break;
 								  case AtomicRMWInst::And:
 								    Opcode = TargetOpcode::G_ATOMICRMW_AND;
 								    break;
 								  case AtomicRMWInst::Nand:
 								    Opcode = TargetOpcode::G_ATOMICRMW_NAND;
 								    break;
 								  case AtomicRMWInst::Or:
 								    Opcode = TargetOpcode::G_ATOMICRMW_OR;
 								    break;
 								  case AtomicRMWInst::Xor:
 								    Opcode = TargetOpcode::G_ATOMICRMW_XOR;
 								    break;
 								  case AtomicRMWInst::Max:
 								    Opcode = TargetOpcode::G_ATOMICRMW_MAX;
 								    break;
 								  case AtomicRMWInst::Min:
 								    Opcode = TargetOpcode::G_ATOMICRMW_MIN;
 								    break;
 								  case AtomicRMWInst::UMax:
 								    Opcode = TargetOpcode::G_ATOMICRMW_UMAX;
 								    break;
 								  case AtomicRMWInst::UMin:
 								    Opcode = TargetOpcode::G_ATOMICRMW_UMIN;
 								    break;
 								  }
 								  MIRBuilder.buildAtomicRMW(
 								      Opcode, Res, Addr, Val,
 								      *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
 								                                Flags, DL->getTypeStoreSize(ResType),
 								                                getMemOpAlignment(I), AAMDNodes(), nullptr,
 								                                I.getSyncScopeID(), I.getOrdering()));
 								  return true;
 								}
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
+								void IRTranslator::finishPendingPhis() {
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								#ifndef NDEBUG
 								  DILocationVerifier Verifier(*MF);
 								#endif // ifndef NDEBUG
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  for (auto &Phi : PendingPHIs) {
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
+								    const PHINode *PI = Phi.first;
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								    EntryBuilder.setDebugLoc(PI->getDebugLoc());
 								#ifndef NDEBUG
 								    Verifier.setCurrentInst(PI);
 								#endif // ifndef NDEBUG
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
 								    // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator
 								    // won't create extra control flow here, otherwise we need to find the
 								    // dominating predecessor here (or perhaps force the weirder IRTranslators
 								    // to provide a simple boundary).
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								    SmallSet<const BasicBlock *, 4> HandledPreds;
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
+								    for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								      auto IRPred = PI->getIncomingBlock(i);
 								      if (HandledPreds.count(IRPred))
 								        continue;
 								      HandledPreds.insert(IRPred);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								      ArrayRef<unsigned> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								      for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								        assert(Pred->isSuccessor(ComponentPHIs[0]->getParent()) &&
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								               "incorrect CFG at MachineBasicBlock level");
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								        for (unsigned j = 0; j < ValRegs.size(); ++j) {
 								          MachineInstrBuilder MIB(*MF, ComponentPHIs[j]);
 								          MIB.addUse(ValRegs[j]);
 								          MIB.addMBB(Pred);
 								        }
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								      }
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
+								    }
 								  }
 								}
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								bool IRTranslator::valueIsSplit(const Value &V,
 								                                SmallVectorImpl<uint64_t> *Offsets) {
 								  SmallVector<LLT, 4> SplitTys;
-												[GlobalISel][IRTranslator] Fix a bug in handling repeating struct types during argument lowering.

Differential Revision: https://reviews.llvm.org/D49442

llvm-svn: 339674

											
										
										
											2018-08-14 20:04:25 +08:00
+								  if (Offsets && !Offsets->empty())
 								    Offsets->clear();
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets);
 								  return SplitTys.size() > 1;
 								}
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
+								bool IRTranslator::translate(const Instruction &Inst) {
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								  CurBuilder.setDebugLoc(Inst.getDebugLoc());
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								  EntryBuilder.setDebugLoc(Inst.getDebugLoc());
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
+								  switch(Inst.getOpcode()) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								#define HANDLE_INST(NUM, OPCODE, CLASS) \
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								    case Instruction::OPCODE: return translate##OPCODE(Inst, CurBuilder);
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								#include "llvm/IR/Instruction.def"
-												[GlobalISel] Teach the IRTranslator how to lower returns.

llvm-svn: 260562

											
										
										
											2016-02-12 02:53:28 +08:00
+								  default:
-												[IRTranslator] Simplify error handling for translating constants. NFC.

We don't need to check whether the fallback path is enabled to return
false. Just do that all the time on error cases, the caller knows (or
at least should know!) how to handle the failing case.

llvm-svn: 297535

											
										
										
											2017-03-11 08:28:33 +08:00
+								    return false;
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
+								  }
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
+								}
-												GlobalISel: first translation support for Constants.

For now put them all in the entry block. This should be correct but may give
poor runtime performance. Hopefully MachineSinking combined with
isReMaterializable can solve those issues, but if not the interface is sound
enough to support alternatives.

llvm-svn: 278168

											
										
										
											2016-08-10 05:28:04 +08:00
+								bool IRTranslator::translate(const Constant &C, unsigned Reg) {
-												GlobalISel: support 'undef' constant.

llvm-svn: 278174

											
										
										
											2016-08-10 07:01:30 +08:00
+								  if (auto CI = dyn_cast<ConstantInt>(&C))
-												GlobalISel: translate constants larger than 64 bits.

llvm-svn: 288713

											
										
										
											2016-12-06 05:54:17 +08:00
+								    EntryBuilder.buildConstant(Reg, *CI);
-												GlobalISel: translate floating-point constants

llvm-svn: 279311

											
										
										
											2016-08-20 04:09:15 +08:00
+								  else if (auto CF = dyn_cast<ConstantFP>(&C))
-												GlobalISel: move type information to MachineRegisterInfo.

We want each register to have a canonical type, which means the best place to
store this is in MachineRegisterInfo rather than on every MachineInstr that
happens to use or define that register.

Most changes following from this are pretty simple (you need an MRI anyway if
you're going to be doing any transformations, so just check the type there).
But legalization doesn't really want to check redundant operands (when, for
example, a G_ADD only ever has one type) so I've made use of MCInstrDesc's
operand type field to encode these constraints and limit legalization's work.

As an added bonus, more validation is possible, both in MachineVerifier and
MachineIRBuilder (coming soon).

llvm-svn: 281035

											
										
										
											2016-09-09 19:46:34 +08:00
+								    EntryBuilder.buildFConstant(Reg, *CF);
-												GlobalISel: support 'undef' constant.

llvm-svn: 278174

											
										
										
											2016-08-10 07:01:30 +08:00
+								  else if (isa<UndefValue>(C))
-												GlobalISel: add buildUndef method to MachineIRBuilder. NFC.

llvm-svn: 297044

											
										
										
											2017-03-07 02:36:40 +08:00
+								    EntryBuilder.buildUndef(Reg);
-												[GISel]: Fix incorrect IRTranslation while translating null pointer types

https://reviews.llvm.org/D44762

Currently IRTranslator produces
%vreg17<def>(p0) = G_CONSTANT 0;

instead we should build
%vreg16(s64) = G_CONSTANT 0
%vreg17(p0) = G_INTTOPTR %vreg16

reviewed by @aemerson.

llvm-svn: 328218

											
										
										
											2018-03-23 01:31:38 +08:00
+								  else if (isa<ConstantPointerNull>(C)) {
 								    // As we are trying to build a constant val of 0 into a pointer,
 								    // insert a cast to make them correct with respect to types.
 								    unsigned NullSize = DL->getTypeSizeInBits(C.getType());
 								    auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize);
 								    auto *ZeroVal = ConstantInt::get(ZeroTy, 0);
 								    unsigned ZeroReg = getOrCreateVReg(*ZeroVal);
 								    EntryBuilder.buildCast(Reg, ZeroReg);
 								  } else if (auto GV = dyn_cast<GlobalValue>(&C))
-												GlobalISel: support translation of global addresses.

llvm-svn: 281207

											
										
										
											2016-09-12 20:10:41 +08:00
+								    EntryBuilder.buildGlobalValue(Reg, GV);
-												GlobalISel: Translate ConstantAggregateZero vectors

Reviewers: qcolombet, aditya_nandakumar, dsanders, ab, t.p.northover, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30259

llvm-svn: 297509

											
										
										
											2017-03-11 05:23:13 +08:00
+								  else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
 								    if (!CAZ->getType()->isVectorTy())
 								      return false;
-												[GlobalISel] IRTranslator: Return the scalar for <1 x Ty> constant vectors

Summary:
<1 x Ty> is not a legal vector type in LLT, we shouldn’t build G_MERGE_VALUES
instruction for them.

Reviewers: qcolombet, aditya_nandakumar, dsanders, t.p.northover, ab, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, kristof.beyls, llvm-commits

Differential Revision: https://reviews.llvm.org/D30948

llvm-svn: 297792

											
										
										
											2017-03-15 07:45:06 +08:00
+								    // Return the scalar if it is a <1 x Ty> vector.
 								    if (CAZ->getNumElements() == 1)
 								      return translate(*CAZ->getElementValue(0u), Reg);
-												GlobalISel: Translate ConstantAggregateZero vectors

Reviewers: qcolombet, aditya_nandakumar, dsanders, ab, t.p.northover, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30259

llvm-svn: 297509

											
										
										
											2017-03-11 05:23:13 +08:00
+								    std::vector<unsigned> Ops;
 								    for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
 								      Constant &Elt = *CAZ->getElementValue(i);
 								      Ops.push_back(getOrCreateVReg(Elt));
 								    }
 								    EntryBuilder.buildMerge(Reg, Ops);
-												GlobalISel: Translate ConstantDataVector

Reviewers: qcolombet, aditya_nandakumar, dsanders, t.p.northover, javed.absar, ab

Reviewed By: qcolombet, dsanders, ab

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30216

llvm-svn: 297670

											
										
										
											2017-03-14 05:36:19 +08:00
+								  } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
-												[GlobalISel] IRTranslator: Return the scalar for <1 x Ty> constant vectors

Summary:
<1 x Ty> is not a legal vector type in LLT, we shouldn’t build G_MERGE_VALUES
instruction for them.

Reviewers: qcolombet, aditya_nandakumar, dsanders, t.p.northover, ab, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, kristof.beyls, llvm-commits

Differential Revision: https://reviews.llvm.org/D30948

llvm-svn: 297792

											
										
										
											2017-03-15 07:45:06 +08:00
+								    // Return the scalar if it is a <1 x Ty> vector.
 								    if (CV->getNumElements() == 1)
 								      return translate(*CV->getElementAsConstant(0), Reg);
-												GlobalISel: Translate ConstantDataVector

Reviewers: qcolombet, aditya_nandakumar, dsanders, t.p.northover, javed.absar, ab

Reviewed By: qcolombet, dsanders, ab

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30216

llvm-svn: 297670

											
										
										
											2017-03-14 05:36:19 +08:00
+								    std::vector<unsigned> Ops;
 								    for (unsigned i = 0; i < CV->getNumElements(); ++i) {
 								      Constant &Elt = *CV->getElementAsConstant(i);
 								      Ops.push_back(getOrCreateVReg(Elt));
 								    }
 								    EntryBuilder.buildMerge(Reg, Ops);
-												GlobalISel: Translate ConstantAggregateZero vectors

Reviewers: qcolombet, aditya_nandakumar, dsanders, ab, t.p.northover, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30259

llvm-svn: 297509

											
										
										
											2017-03-11 05:23:13 +08:00
+								  } else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								    switch(CE->getOpcode()) {
 								#define HANDLE_INST(NUM, OPCODE, CLASS)                         \
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								      case Instruction::OPCODE: return translate##OPCODE(*CE, EntryBuilder);
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								#include "llvm/IR/Instruction.def"
 								    default:
-												[IRTranslator] Simplify error handling for translating constants. NFC.

We don't need to check whether the fallback path is enabled to return
false. Just do that all the time on error cases, the caller knows (or
at least should know!) how to handle the failing case.

llvm-svn: 297535

											
										
										
											2017-03-11 08:28:33 +08:00
+								      return false;
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								    }
-												[GISel]: Add support to translate ConstantVectors

Reviewed by Quentin
https://reviews.llvm.org/D32814

llvm-svn: 302196

											
										
										
											2017-05-05 05:43:12 +08:00
+								  } else if (auto CV = dyn_cast<ConstantVector>(&C)) {
 								    if (CV->getNumOperands() == 1)
 								      return translate(*CV->getOperand(0), Reg);
 								    SmallVector<unsigned, 4> Ops;
 								    for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
 								      Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
 								    }
 								    EntryBuilder.buildMerge(Reg, Ops);
-												[GlobalISel] Add a G_BLOCK_ADDR opcode to handle IR blockaddress constants.

Differential Revision: https://reviews.llvm.org/D49900

llvm-svn: 338335

											
										
										
											2018-07-31 08:08:50 +08:00
+								  } else if (auto *BA = dyn_cast<BlockAddress>(&C)) {
 								    EntryBuilder.buildBlockAddress(Reg, BA);
-												[IRTranslator] Simplify error handling for translating constants. NFC.

We don't need to check whether the fallback path is enabled to return
false. Just do that all the time on error cases, the caller knows (or
at least should know!) how to handle the failing case.

llvm-svn: 297535

											
										
										
											2017-03-11 08:28:33 +08:00
+								  } else
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								    return false;
-												GlobalISel: first translation support for Constants.

For now put them all in the entry block. This should be correct but may give
poor runtime performance. Hopefully MachineSinking combined with
isReMaterializable can solve those issues, but if not the interface is sound
enough to support alternatives.

llvm-svn: 278168

											
										
										
											2016-08-10 05:28:04 +08:00
-												GlobalISel: support 'undef' constant.

llvm-svn: 278174

											
										
										
											2016-08-10 07:01:30 +08:00
+								  return true;
-												GlobalISel: first translation support for Constants.

For now put them all in the entry block. This should be correct but may give
poor runtime performance. Hopefully MachineSinking combined with
isReMaterializable can solve those issues, but if not the interface is sound
enough to support alternatives.

llvm-svn: 278168

											
										
										
											2016-08-10 05:28:04 +08:00
+								}
-												GlobalISel: clear vreg mapping after translating each function

Otherwise we only materialize (shared) constants in the first function they
appear in. This doesn't go well.

llvm-svn: 278351

											
										
										
											2016-08-12 00:21:29 +08:00
+								void IRTranslator::finalizeFunction() {
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
+								  // Release the memory used by the different maps we
 								  // needed during the translation.
-												GlobalISel: avoid looking too closely at PHIs when we bail.

The function used to finish off PHIs by adding the relevant basic blocks can
fail if we're aborting and still don't actually have the needed
MachineBasicBlocks. So avoid trying in that case.

llvm-svn: 288727

											
										
										
											2016-12-06 07:10:19 +08:00
+								  PendingPHIs.clear();
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  VMap.reset();
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								  FrameIndices.clear();
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								  MachinePreds.clear();
-												[GISel]: Fix undefined behavior in IRTranslator

Make sure IRTranslator->MachineIRBuilder->DebugLoc doesn't
outlive the DILocation. Clear it at the end of
IRTranslator::runOnMachineFunction

llvm-svn: 303277

											
										
										
											2017-05-18 01:41:55 +08:00
+								  // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
 								  // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
 								  // destroying it twice (in ~IRTranslator() and ~LLVMContext())
 								  EntryBuilder = MachineIRBuilder();
 								  CurBuilder = MachineIRBuilder();
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
+								}
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
 								  MF = &CurMF;
-												MachineFunction: Return reference from getFunction(); NFC

The Function can never be nullptr so we can return a reference.

llvm-svn: 320884

											
										
										
											2017-12-16 06:22:58 +08:00
+								  const Function &F = MF->getFunction();
-												[GlobalISel] Add the necessary plumbing to lower formal arguments.

llvm-svn: 260579

											
										
										
											2016-02-12 03:59:41 +08:00
+								  if (F.empty())
 								    return false;
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  CLI = MF->getSubtarget().getCallLowering();
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								  CurBuilder.setMF(*MF);
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  EntryBuilder.setMF(*MF);
 								  MRI = &MF->getRegInfo();
-												GlobalISel: implement alloca instruction

llvm-svn: 276433

											
										
										
											2016-07-23 00:59:52 +08:00
+								  DL = &F.getParent()->getDataLayout();
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								  TPC = &getAnalysis<TargetPassConfig>();
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								  ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);
-												GlobalISel: implement alloca instruction

llvm-svn: 276433

											
										
										
											2016-07-23 00:59:52 +08:00
-												GlobalISel: clear pending phis after MachineFunction translated

Test is just reordering the existing functions (it would trigger for any
function after one with a phi).

llvm-svn: 277841

											
										
										
											2016-08-06 01:50:36 +08:00
+								  assert(PendingPHIs.empty() && "stale PHIs");
-												[GlobalISel] Disable GISel for big endian.

This is due to PR26161 needing to be resolved before we can fix
big endian bugs like PR35359. The work to split aggregates into smaller LLTs
instead of using one large scalar will take some time, so in the mean time
we'll fall back to SDAG.

Some ARM BE tests xfailed for now as a result.

Differential Revision: https://reviews.llvm.org/D40789

llvm-svn: 320388

											
										
										
											2017-12-12 00:58:29 +08:00
+								  if (!DL->isLittleEndian()) {
 								    // Currently we don't properly handle big endian code.
 								    OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
-												MachineFunction: Return reference from getFunction(); NFC

The Function can never be nullptr so we can return a reference.

llvm-svn: 320884

											
										
										
											2017-12-16 06:22:58 +08:00
+								                               F.getSubprogram(), &F.getEntryBlock());
-												[GlobalISel] Disable GISel for big endian.

This is due to PR26161 needing to be resolved before we can fix
big endian bugs like PR35359. The work to split aggregates into smaller LLTs
instead of using one large scalar will take some time, so in the mean time
we'll fall back to SDAG.

Some ARM BE tests xfailed for now as a result.

Differential Revision: https://reviews.llvm.org/D40789

llvm-svn: 320388

											
										
										
											2017-12-12 00:58:29 +08:00
+								    R << "unable to translate in big endian mode";
 								    reportTranslationError(*MF, *TPC, *ORE, R);
 								  }
-												[GlobalISel] Finalize translated function on scope exit. NFC.

This is the compromise between having a per-function IRTranslator
and manually managing the per-function state.

llvm-svn: 296046

											
										
										
											2017-02-24 07:57:28 +08:00
+								  // Release the per-function state when we return, whether we succeeded or not.
 								  auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); });
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								  // Setup a separate basic-block for the arguments and constants
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
 								  MF->push_back(EntryBB);
-												GlobalISel: simplify MachineIRBuilder interface.

MachineIRBuilder had weird before/after and beginning/end flags for the insert
point. Unfortunately the non-default means that instructions will be inserted
in reverse order which is almost never what anyone wants.

Really, I think we just want (like IRBuilder has) the ability to insert at any
C++ iterator-style point (i.e. before any instruction or before MBB.end()). So
this fixes MIRBuilders to behave like IRBuilders in this respect.

llvm-svn: 288980

											
										
										
											2016-12-08 05:05:38 +08:00
+								  EntryBuilder.setMBB(*EntryBB);
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								  // Create all blocks, in IR order, to preserve the layout.
 								  for (const BasicBlock &BB: F) {
 								    auto *&MBB = BBToMBB[&BB];
 								    MBB = MF->CreateMachineBasicBlock(&BB);
 								    MF->push_back(MBB);
 								    if (BB.hasAddressTaken())
 								      MBB->setHasAddressTaken();
 								  }
 								  // Make our arguments/constants entry block fallthrough to the IR entry block.
 								  EntryBB->addSuccessor(&getMBB(F.front()));
-												GlobalISel: simplify MachineIRBuilder interface.

MachineIRBuilder had weird before/after and beginning/end flags for the insert
point. Unfortunately the non-default means that instructions will be inserted
in reverse order which is almost never what anyone wants.

Really, I think we just want (like IRBuilder has) the ability to insert at any
C++ iterator-style point (i.e. before any instruction or before MBB.end()). So
this fixes MIRBuilders to behave like IRBuilders in this respect.

llvm-svn: 288980

											
										
										
											2016-12-08 05:05:38 +08:00
+								  // Lower the actual args into this basic block.
-												[GlobalISel] Add the necessary plumbing to lower formal arguments.

llvm-svn: 260579

											
										
										
											2016-02-12 03:59:41 +08:00
+								  SmallVector<unsigned, 8> VRegArgs;
-												[GlobalISel][IRTranslator] Fix crash during translation of zero sized loads/stores/args/returns.

This fixes PR35358.

rdar://35619533

Differential Revision: https://reviews.llvm.org/D40604

llvm-svn: 319465

											
										
										
											2017-12-01 04:06:02 +08:00
+								  for (const Argument &Arg: F.args()) {
 								    if (DL->getTypeStoreSize(Arg.getType()) == 0)
 								      continue; // Don't handle zero sized types.
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    VRegArgs.push_back(
 								        MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL)));
-												[GlobalISel][IRTranslator] Fix crash during translation of zero sized loads/stores/args/returns.

This fixes PR35358.

rdar://35619533

Differential Revision: https://reviews.llvm.org/D40604

llvm-svn: 319465

											
										
										
											2017-12-01 04:06:02 +08:00
+								  }
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
-												[GlobalISel] Fall back to SDISel for swifterror/swiftself attributes.

We don't currently support these, fall back until we do.

llvm-svn: 337994

											
										
										
											2018-07-26 09:25:58 +08:00
+								  // We don't currently support translating swifterror or swiftself functions.
 								  for (auto &Arg : F.args()) {
 								    if (Arg.hasSwiftErrorAttr() || Arg.hasSwiftSelfAttr()) {
 								      OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
 								                                 F.getSubprogram(), &F.getEntryBlock());
 								      R << "unable to lower arguments due to swifterror/swiftself: "
 								        << ore::NV("Prototype", F.getType());
 								      reportTranslationError(*MF, *TPC, *ORE, R);
 								      return false;
 								    }
 								  }
-												[GlobalISel] Remove now-unnecessary variable. NFC.

Since r296047, we're able to return early on failures.
Don't track whether we succeeded.

llvm-svn: 296057

											
										
										
											2017-02-24 08:34:41 +08:00
+								  if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) {
-												[GlobalISel] Use the DISubprogram for translation failure remarks.

Justin added support for DISubprogram locs in r295531 and r296052.
Use that instead of no-loc for constants and arguments.

llvm-svn: 296058

											
										
										
											2017-02-24 08:34:44 +08:00
+								    OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
-												MachineFunction: Return reference from getFunction(); NFC

The Function can never be nullptr so we can return a reference.

llvm-svn: 320884

											
										
										
											2017-12-16 06:22:58 +08:00
+								                               F.getSubprogram(), &F.getEntryBlock());
-												[GlobalISel] Emit opt remarks on isel fallbacks.

Having more fine-grained information on the specific construct that
caused us to fallback is valuable for large-scale data collection.

We still have the fallback warning, that's also used for FastISel.
We still need to remove the fallback warning, and teach FastISel to also
emit remarks (it currently has a combination of the warning, stats, and
debug prints: the remarks could unify all three).

The abort-on-fallback path could also be better handled using remarks:
one could imagine a "-Rpass-error", analoguous to "-Werror", which would
promote missed/failed remarks to errors.  It's not clear whether that
would be useful for other remarks though, so we're not there yet.

llvm-svn: 296013

											
										
										
											2017-02-24 05:05:42 +08:00
+								    R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
 								    reportTranslationError(*MF, *TPC, *ORE, R);
 								    return false;
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								  }
-												[GlobalISel] Add the necessary plumbing to lower formal arguments.

llvm-svn: 260579

											
										
										
											2016-02-12 03:59:41 +08:00
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  auto ArgIt = F.arg_begin();
 								  for (auto &VArg : VRegArgs) {
 								    // If the argument is an unsplit scalar then don't use unpackRegs to avoid
 								    // creating redundant copies.
 								    if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) {
 								      auto &VRegs = *VMap.getVRegs(cast<Value>(*ArgIt));
 								      assert(VRegs.empty() && "VRegs already populated?");
 								      VRegs.push_back(VArg);
 								    } else {
 								      unpackRegs(*ArgIt, VArg, EntryBuilder);
 								    }
 								    ArgIt++;
 								  }
-												[GlobalISel][IRTranslator] Use RPO traversal when visiting blocks to translate.

Previously we were just visiting the blocks in the function in IR order, which
is rather arbitrary. Therefore we wouldn't always visit defs before uses, but
the translation code relies on this assumption in some places.

Only codegen change seen in tests is an elision of a redundant copy.

Fixes PR38396

llvm-svn: 338476

											
										
										
											2018-08-01 10:17:42 +08:00
+								  // Need to visit defs before uses when translating instructions.
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								  {
 								    ReversePostOrderTraversal<const Function *> RPOT(&F);
 								#ifndef NDEBUG
 								    DILocationVerifier Verifier(*MF);
 								#endif // ifndef NDEBUG
 								    for (const BasicBlock *BB : RPOT) {
 								      MachineBasicBlock &MBB = getMBB(*BB);
 								      // Set the insertion point of all the following translations to
 								      // the end of this basic block.
 								      CurBuilder.setMBB(MBB);
 								      for (const Instruction &Inst : *BB) {
 								#ifndef NDEBUG
 								        Verifier.setCurrentInst(&Inst);
 								#endif // ifndef NDEBUG
 								        if (translate(Inst))
 								          continue;
 								        OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
 								                                   Inst.getDebugLoc(), BB);
 								        R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
 								        if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
 								          std::string InstStrStorage;
 								          raw_string_ostream InstStr(InstStrStorage);
 								          InstStr << Inst;
 								          R << ": '" << InstStr.str() << "'";
 								        }
-												[GlobalISel] Only build expensive remarks if they're enabled. NFC.

r313390 taught 'allowExtraAnalysis' to check whether remarks are
enabled at all.  Use that to only do the expensive instruction printing
if they are.

llvm-svn: 313552

											
										
										
											2017-09-19 02:50:09 +08:00
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								        reportTranslationError(*MF, *TPC, *ORE, R);
 								        return false;
-												[GlobalISel] Only build expensive remarks if they're enabled. NFC.

r313390 taught 'allowExtraAnalysis' to check whether remarks are
enabled at all.  Use that to only do the expensive instruction printing
if they are.

llvm-svn: 313552

											
										
										
											2017-09-19 02:50:09 +08:00
+								      }
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
+								    }
 								  }
-												GlobalISel: freeze reserved regs after IRTranslator.

We can freeze the registers after the MachineFrameInfo has been configured (by
telling it about calls, inline asm, ...). This doesn't happen at all yet, but
will be part of IR translation.

Fixes -verify-machineinstrs assertion.

llvm-svn: 275221

											
										
										
											2016-07-13 06:23:42 +08:00
-												[GlobalISel] Don't translate other blocks when one failed.

We were stopping the translation of the parent block when the
translation of an instruction failed, but we were still trying to
translate the other blocks of the parent function.

Don't do that.

llvm-svn: 296047

											
										
										
											2017-02-24 07:57:36 +08:00
+								  finishPendingPhis();
 								  // Merge the argument lowering and constants block with its single
 								  // successor, the LLVM-IR entry block.  We want the basic block to
 								  // be maximal.
 								  assert(EntryBB->succ_size() == 1 &&
 								         "Custom BB used for lowering should have only one successor");
 								  // Get the successor of the current entry block.
 								  MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
 								  assert(NewEntryBB.pred_size() == 1 &&
 								         "LLVM-IR entry block has a predecessor!?");
 								  // Move all the instruction from the current entry block to the
 								  // new entry block.
 								  NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
 								                    EntryBB->end());
 								  // Update the live-in information for the new entry block.
 								  for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
 								    NewEntryBB.addLiveIn(LiveIn);
 								  NewEntryBB.sortUniqueLiveIns();
 								  // Get rid of the now empty basic block.
 								  EntryBB->removeSuccessor(&NewEntryBB);
 								  MF->remove(EntryBB);
 								  MF->DeleteMachineBasicBlock(EntryBB);
 								  assert(&MF->front() == &NewEntryBB &&
 								         "New entry wasn't next in the list of basic block!");
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
-												CodeGen: Remove pipeline dependencies on StackProtector; NFC

This re-applies r336929 with a fix to accomodate for the Mips target
scheduling multiple SelectionDAG instances into the pass pipeline.

PrologEpilogInserter and StackColoring depend on the StackProtector analysis
being alive from the point it is run until PEI, which requires that they are all
scheduled in the same FunctionPassManager. Inserting a (machine) ModulePass
between StackProtector and PEI results in these passes being in separate
FunctionPassManagers and the StackProtector is not available for PEI.

PEI and StackColoring don't use much information from the StackProtector pass,
so transfering the required information to MachineFrameInfo is cleaner than
keeping the StackProtector pass around. This commit moves the SSP layout
information to MFI instead of keeping it in the pass.

This patch set (D37580, D37581, D37582, D37583, D37584, D37585, D37586, D37587)
is a first draft of the pagerando implementation described in
http://lists.llvm.org/pipermail/llvm-dev/2017-June/113794.html.

Patch by Stephen Crane <sjc@immunant.com>

Differential Revision: https://reviews.llvm.org/D49256

llvm-svn: 336964

											
										
										
											2018-07-13 08:08:38 +08:00
+								  // Initialize stack protector information.
 								  StackProtector &SP = getAnalysis<StackProtector>();
 								  SP.copyToMachineFrameInfo(MF->getFrameInfo());
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
+								  return false;
 								}