llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

//===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements the IRTranslator class.
//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <string>
#include <utility>
#include <vector>

#define DEBUG_TYPE "irtranslator"

using namespace llvm;

static cl::opt<bool>
    EnableCSEInIRTranslator("enable-cse-in-irtranslator",
                            cl::desc("Should enable CSE in irtranslator"),
                            cl::Optional, cl::init(false));
char IRTranslator::ID = 0;

INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
                false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(StackProtector)
INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
                false, false)

static void reportTranslationError(MachineFunction &MF,
                                   const TargetPassConfig &TPC,
                                   OptimizationRemarkEmitter &ORE,
                                   OptimizationRemarkMissed &R) {
  MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);

  // Print the function name explicitly if we don't have a debug location (which
  // makes the diagnostic less useful) or if we're going to emit a raw error.
  if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
    R << (" (in function: " + MF.getName() + ")").str();

  if (TPC.isGlobalISelAbortEnabled())
    report_fatal_error(R.getMsg());
  else
    ORE.emit(R);
}

IRTranslator::IRTranslator(CodeGenOpt::Level optlevel)
    : MachineFunctionPass(ID), OptLevel(optlevel) {}

#ifndef NDEBUG
namespace {
/// Verify that every instruction created has the same DILocation as the
/// instruction being translated.
class DILocationVerifier : public GISelChangeObserver {
  const Instruction *CurrInst = nullptr;

public:
  DILocationVerifier() = default;
  ~DILocationVerifier() = default;

  const Instruction *getCurrentInst() const { return CurrInst; }
  void setCurrentInst(const Instruction *Inst) { CurrInst = Inst; }

  void erasingInstr(MachineInstr &MI) override {}
  void changingInstr(MachineInstr &MI) override {}
  void changedInstr(MachineInstr &MI) override {}

  void createdInstr(MachineInstr &MI) override {
    assert(getCurrentInst() && "Inserted instruction without a current MI");

    // Only print the check message if we're actually checking it.
#ifndef NDEBUG
    LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
                      << " was copied to " << MI);
#endif
    // We allow insts in the entry block to have a debug loc line of 0 because
    // they could have originated from constants, and we don't want a jumpy
    // debug experience.
    assert((CurrInst->getDebugLoc() == MI.getDebugLoc() ||
            MI.getDebugLoc().getLine() == 0) &&
           "Line info was not transferred to all instructions");
  }
};
} // namespace
#endif // ifndef NDEBUG


void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
  AU.addRequired<StackProtector>();
  AU.addRequired<TargetPassConfig>();
  AU.addRequired<GISelCSEAnalysisWrapperPass>();
  if (OptLevel != CodeGenOpt::None)
    AU.addRequired<BranchProbabilityInfoWrapperPass>();
  getSelectionDAGFallbackAnalysisUsage(AU);
  MachineFunctionPass::getAnalysisUsage(AU);
}

IRTranslator::ValueToVRegInfo::VRegListT &
IRTranslator::allocateVRegs(const Value &Val) {
  auto VRegsIt = VMap.findVRegs(Val);
  if (VRegsIt != VMap.vregs_end())
    return *VRegsIt->second;
  auto *Regs = VMap.getVRegs(Val);
  auto *Offsets = VMap.getOffsets(Val);
  SmallVector<LLT, 4> SplitTys;
  computeValueLLTs(*DL, *Val.getType(), SplitTys,
                   Offsets->empty() ? Offsets : nullptr);
  for (unsigned i = 0; i < SplitTys.size(); ++i)
    Regs->push_back(0);
  return *Regs;
}

ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
  auto VRegsIt = VMap.findVRegs(Val);
  if (VRegsIt != VMap.vregs_end())
    return *VRegsIt->second;

  if (Val.getType()->isVoidTy())
    return *VMap.getVRegs(Val);

  // Create entry for this type.
  auto *VRegs = VMap.getVRegs(Val);
  auto *Offsets = VMap.getOffsets(Val);

  assert(Val.getType()->isSized() &&
         "Don't know how to create an empty vreg");

  SmallVector<LLT, 4> SplitTys;
  computeValueLLTs(*DL, *Val.getType(), SplitTys,
                   Offsets->empty() ? Offsets : nullptr);

  if (!isa<Constant>(Val)) {
    for (auto Ty : SplitTys)
      VRegs->push_back(MRI->createGenericVirtualRegister(Ty));
    return *VRegs;
  }

  if (Val.getType()->isAggregateType()) {
    // UndefValue, ConstantAggregateZero
    auto &C = cast<Constant>(Val);
    unsigned Idx = 0;
    while (auto Elt = C.getAggregateElement(Idx++)) {
      auto EltRegs = getOrCreateVRegs(*Elt);
      llvm::copy(EltRegs, std::back_inserter(*VRegs));
    }
  } else {
    assert(SplitTys.size() == 1 && "unexpectedly split LLT");
    VRegs->push_back(MRI->createGenericVirtualRegister(SplitTys[0]));
    bool Success = translate(cast<Constant>(Val), VRegs->front());
    if (!Success) {
      OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                                 MF->getFunction().getSubprogram(),
                                 &MF->getFunction().getEntryBlock());
      R << "unable to translate constant: " << ore::NV("Type", Val.getType());
      reportTranslationError(*MF, *TPC, *ORE, R);
      return *VRegs;
    }
  }

  return *VRegs;
}

int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
  auto MapEntry = FrameIndices.find(&AI);
  if (MapEntry != FrameIndices.end())
    return MapEntry->second;

  uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
  uint64_t Size =
      ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();

  // Always allocate at least one byte.
  Size = std::max<uint64_t>(Size, 1u);

  int &FI = FrameIndices[&AI];
  FI = MF->getFrameInfo().CreateStackObject(Size, AI.getAlign(), false, &AI);
  return FI;
}

Align IRTranslator::getMemOpAlign(const Instruction &I) {
  if (const StoreInst *SI = dyn_cast<StoreInst>(&I))
    return SI->getAlign();
  if (const LoadInst *LI = dyn_cast<LoadInst>(&I))
    return LI->getAlign();
  if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I))
    return AI->getAlign();
  if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I))
    return AI->getAlign();

  OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
  R << "unable to translate memop: " << ore::NV("Opcode", &I);
  reportTranslationError(*MF, *TPC, *ORE, R);
  return Align(1);
}

MachineBasicBlock &IRTranslator::getMBB(const BasicBlock &BB) {
  MachineBasicBlock *&MBB = BBToMBB[&BB];
  assert(MBB && "BasicBlock was not encountered before");
  return *MBB;
}

void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
  assert(NewPred && "new predecessor must be a real MachineBasicBlock");
  MachinePreds[Edge].push_back(NewPred);
}

bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
                                     MachineIRBuilder &MIRBuilder) {
  // Get or create a virtual register for each value.
  // Unless the value is a Constant => loadimm cst?
  // or inline constant each time?
  // Creation of a virtual register needs to have a size.
  Register Op0 = getOrCreateVReg(*U.getOperand(0));
  Register Op1 = getOrCreateVReg(*U.getOperand(1));
  Register Res = getOrCreateVReg(U);
  uint16_t Flags = 0;
  if (isa<Instruction>(U)) {
    const Instruction &I = cast<Instruction>(U);
    Flags = MachineInstr::copyFlagsFromInstruction(I);
  }

  MIRBuilder.buildInstr(Opcode, {Res}, {Op0, Op1}, Flags);
  return true;
}

bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
                                    MachineIRBuilder &MIRBuilder) {
  Register Op0 = getOrCreateVReg(*U.getOperand(0));
  Register Res = getOrCreateVReg(U);
  uint16_t Flags = 0;
  if (isa<Instruction>(U)) {
    const Instruction &I = cast<Instruction>(U);
    Flags = MachineInstr::copyFlagsFromInstruction(I);
  }
  MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags);
  return true;
}

bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
  return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder);
}

bool IRTranslator::translateCompare(const User &U,
                                    MachineIRBuilder &MIRBuilder) {
  auto *CI = dyn_cast<CmpInst>(&U);
  Register Op0 = getOrCreateVReg(*U.getOperand(0));
  Register Op1 = getOrCreateVReg(*U.getOperand(1));
  Register Res = getOrCreateVReg(U);
  CmpInst::Predicate Pred =
      CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
                                    cast<ConstantExpr>(U).getPredicate());
  if (CmpInst::isIntPredicate(Pred))
    MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
  else if (Pred == CmpInst::FCMP_FALSE)
    MIRBuilder.buildCopy(
        Res, getOrCreateVReg(*Constant::getNullValue(U.getType())));
  else if (Pred == CmpInst::FCMP_TRUE)
    MIRBuilder.buildCopy(
        Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
  else {
    assert(CI && "Instruction should be CmpInst");
    MIRBuilder.buildFCmp(Pred, Res, Op0, Op1,
                         MachineInstr::copyFlagsFromInstruction(*CI));
  }

  return true;
}

bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
  const ReturnInst &RI = cast<ReturnInst>(U);
  const Value *Ret = RI.getReturnValue();
  if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
    Ret = nullptr;

  ArrayRef<Register> VRegs;
  if (Ret)
    VRegs = getOrCreateVRegs(*Ret);

  Register SwiftErrorVReg = 0;
  if (CLI->supportSwiftError() && SwiftError.getFunctionArg()) {
    SwiftErrorVReg = SwiftError.getOrCreateVRegUseAt(
        &RI, &MIRBuilder.getMBB(), SwiftError.getFunctionArg());
  }

  // The target may mess up with the insertion point, but
  // this is not important as a return is the last instruction
  // of the block anyway.
  return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg);
}

void IRTranslator::emitBranchForMergedCondition(
    const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
    MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
    BranchProbability TProb, BranchProbability FProb, bool InvertCond) {
  // If the leaf of the tree is a comparison, merge the condition into
  // the caseblock.
  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
    CmpInst::Predicate Condition;
    if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
      Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate();
    } else {
      const FCmpInst *FC = cast<FCmpInst>(Cond);
      Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate();
    }

    SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0),
                           BOp->getOperand(1), nullptr, TBB, FBB, CurBB,
                           CurBuilder->getDebugLoc(), TProb, FProb);
    SL->SwitchCases.push_back(CB);
    return;
  }

  // Create a CaseBlock record representing this branch.
  CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
  SwitchCG::CaseBlock CB(
      Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()),
      nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb);
  SL->SwitchCases.push_back(CB);
}

static bool isValInBlock(const Value *V, const BasicBlock *BB) {
  if (const Instruction *I = dyn_cast<Instruction>(V))
    return I->getParent() == BB;
  return true;
}

void IRTranslator::findMergedConditions(
    const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
    MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
    Instruction::BinaryOps Opc, BranchProbability TProb,
    BranchProbability FProb, bool InvertCond) {
  using namespace PatternMatch;
  assert((Opc == Instruction::And || Opc == Instruction::Or) &&
         "Expected Opc to be AND/OR");
  // Skip over not part of the tree and remember to invert op and operands at
  // next level.
  Value *NotCond;
  if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
      isValInBlock(NotCond, CurBB->getBasicBlock())) {
    findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
                         !InvertCond);
    return;
  }

  const Instruction *BOp = dyn_cast<Instruction>(Cond);
  const Value *BOpOp0, *BOpOp1;
  // Compute the effective opcode for Cond, taking into account whether it needs
  // to be inverted, e.g.
  //   and (not (or A, B)), C
  // gets lowered as
  //   and (and (not A, not B), C)
  Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
  if (BOp) {
    BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
               ? Instruction::And
               : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
                      ? Instruction::Or
                      : (Instruction::BinaryOps)0);
    if (InvertCond) {
      if (BOpc == Instruction::And)
        BOpc = Instruction::Or;
      else if (BOpc == Instruction::Or)
        BOpc = Instruction::And;
    }
  }

  // If this node is not part of the or/and tree, emit it as a branch.
  // Note that all nodes in the tree should have same opcode.
  bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
  if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
      !isValInBlock(BOpOp0, CurBB->getBasicBlock()) ||
      !isValInBlock(BOpOp1, CurBB->getBasicBlock())) {
    emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb,
                                 InvertCond);
    return;
  }

  //  Create TmpBB after CurBB.
  MachineFunction::iterator BBI(CurBB);
  MachineBasicBlock *TmpBB =
      MF->CreateMachineBasicBlock(CurBB->getBasicBlock());
  CurBB->getParent()->insert(++BBI, TmpBB);

  if (Opc == Instruction::Or) {
    // Codegen X | Y as:
    // BB1:
    //   jmp_if_X TBB
    //   jmp TmpBB
    // TmpBB:
    //   jmp_if_Y TBB
    //   jmp FBB
    //

    // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
    // The requirement is that
    //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
    //     = TrueProb for original BB.
    // Assuming the original probabilities are A and B, one choice is to set
    // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
    // A/(1+B) and 2B/(1+B). This choice assumes that
    //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
    // Another choice is to assume TrueProb for BB1 equals to TrueProb for
    // TmpBB, but the math is more complicated.

    auto NewTrueProb = TProb / 2;
    auto NewFalseProb = TProb / 2 + FProb;
    // Emit the LHS condition.
    findMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
                         NewFalseProb, InvertCond);

    // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
    SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
    // Emit the RHS condition into TmpBB.
    findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
                         Probs[1], InvertCond);
  } else {
    assert(Opc == Instruction::And && "Unknown merge op!");
    // Codegen X & Y as:
    // BB1:
    //   jmp_if_X TmpBB
    //   jmp FBB
    // TmpBB:
    //   jmp_if_Y TBB
    //   jmp FBB
    //
    //  This requires creation of TmpBB after CurBB.

    // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
    // The requirement is that
    //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
    //     = FalseProb for original BB.
    // Assuming the original probabilities are A and B, one choice is to set
    // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
    // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
    // TrueProb for BB1 * FalseProb for TmpBB.

    auto NewTrueProb = TProb + FProb / 2;
    auto NewFalseProb = FProb / 2;
    // Emit the LHS condition.
    findMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
                         NewFalseProb, InvertCond);

    // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
    SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
    // Emit the RHS condition into TmpBB.
    findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
                         Probs[1], InvertCond);
  }
}

bool IRTranslator::shouldEmitAsBranches(
    const std::vector<SwitchCG::CaseBlock> &Cases) {
  // For multiple cases, it's better to emit as branches.
  if (Cases.size() != 2)
    return true;

  // If this is two comparisons of the same values or'd or and'd together, they
  // will get folded into a single comparison, so don't emit two blocks.
  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
       Cases[0].CmpRHS == Cases[1].CmpRHS) ||
      (Cases[0].CmpRHS == Cases[1].CmpLHS &&
       Cases[0].CmpLHS == Cases[1].CmpRHS)) {
    return false;
  }

  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
      Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred &&
      isa<Constant>(Cases[0].CmpRHS) &&
      cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
    if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ &&
        Cases[0].TrueBB == Cases[1].ThisBB)
      return false;
    if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE &&
        Cases[0].FalseBB == Cases[1].ThisBB)
      return false;
  }

  return true;
}

bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
  const BranchInst &BrInst = cast<BranchInst>(U);
  auto &CurMBB = MIRBuilder.getMBB();
  auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0));

  if (BrInst.isUnconditional()) {
    // If the unconditional target is the layout successor, fallthrough.
    if (!CurMBB.isLayoutSuccessor(Succ0MBB))
      MIRBuilder.buildBr(*Succ0MBB);

    // Link successors.
    for (const BasicBlock *Succ : successors(&BrInst))
      CurMBB.addSuccessor(&getMBB(*Succ));
    return true;
  }

  // If this condition is one of the special cases we handle, do special stuff
  // now.
  const Value *CondVal = BrInst.getCondition();
  MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1));

  const auto &TLI = *MF->getSubtarget().getTargetLowering();

  // If this is a series of conditions that are or'd or and'd together, emit
  // this as a sequence of branches instead of setcc's with and/or operations.
  // As long as jumps are not expensive (exceptions for multi-use logic ops,
  // unpredictable branches, and vector extracts because those jumps are likely
  // expensive for any target), this should improve performance.
  // For example, instead of something like:
  //     cmp A, B
  //     C = seteq
  //     cmp D, E
  //     F = setle
  //     or C, F
  //     jnz foo
  // Emit:
  //     cmp A, B
  //     je foo
  //     cmp D, E
  //     jle foo
  using namespace PatternMatch;
  const Instruction *CondI = dyn_cast<Instruction>(CondVal);
  if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() &&
      !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) {
    Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
    Value *Vec;
    const Value *BOp0, *BOp1;
    if (match(CondI, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
      Opcode = Instruction::And;
    else if (match(CondI, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
      Opcode = Instruction::Or;

    if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
                    match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
      findMergedConditions(CondI, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode,
                           getEdgeProbability(&CurMBB, Succ0MBB),
                           getEdgeProbability(&CurMBB, Succ1MBB),
                           /*InvertCond=*/false);
      assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!");

      // Allow some cases to be rejected.
      if (shouldEmitAsBranches(SL->SwitchCases)) {
        // Emit the branch for this block.
        emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder);
        SL->SwitchCases.erase(SL->SwitchCases.begin());
        return true;
      }

      // Okay, we decided not to do this, remove any inserted MBB's and clear
      // SwitchCases.
      for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I)
        MF->erase(SL->SwitchCases[I].ThisBB);

      SL->SwitchCases.clear();
    }
  }

  // Create a CaseBlock record representing this branch.
  SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal,
                         ConstantInt::getTrue(MF->getFunction().getContext()),
                         nullptr, Succ0MBB, Succ1MBB, &CurMBB,
                         CurBuilder->getDebugLoc());

  // Use emitSwitchCase to actually insert the fast branch sequence for this
  // cond branch.
  emitSwitchCase(CB, &CurMBB, *CurBuilder);
  return true;
}

void IRTranslator::addSuccessorWithProb(MachineBasicBlock *Src,
                                        MachineBasicBlock *Dst,
                                        BranchProbability Prob) {
  if (!FuncInfo.BPI) {
    Src->addSuccessorWithoutProb(Dst);
    return;
  }
  if (Prob.isUnknown())
    Prob = getEdgeProbability(Src, Dst);
  Src->addSuccessor(Dst, Prob);
}

BranchProbability
IRTranslator::getEdgeProbability(const MachineBasicBlock *Src,
                                 const MachineBasicBlock *Dst) const {
  const BasicBlock *SrcBB = Src->getBasicBlock();
  const BasicBlock *DstBB = Dst->getBasicBlock();
  if (!FuncInfo.BPI) {
    // If BPI is not available, set the default probability as 1 / N, where N is
    // the number of successors.
    auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
    return BranchProbability(1, SuccSize);
  }
  return FuncInfo.BPI->getEdgeProbability(SrcBB, DstBB);
}

bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
  using namespace SwitchCG;
  // Extract cases from the switch.
  const SwitchInst &SI = cast<SwitchInst>(U);
  BranchProbabilityInfo *BPI = FuncInfo.BPI;
  CaseClusterVector Clusters;
  Clusters.reserve(SI.getNumCases());
  for (auto &I : SI.cases()) {
    MachineBasicBlock *Succ = &getMBB(*I.getCaseSuccessor());
    assert(Succ && "Could not find successor mbb in mapping");
    const ConstantInt *CaseVal = I.getCaseValue();
    BranchProbability Prob =
        BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
            : BranchProbability(1, SI.getNumCases() + 1);
    Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
  }

  MachineBasicBlock *DefaultMBB = &getMBB(*SI.getDefaultDest());

  // Cluster adjacent cases with the same destination. We do this at all
  // optimization levels because it's cheap to do and will make codegen faster
  // if there are many clusters.
  sortAndRangeify(Clusters);

  MachineBasicBlock *SwitchMBB = &getMBB(*SI.getParent());

  // If there is only the default destination, jump there directly.
  if (Clusters.empty()) {
    SwitchMBB->addSuccessor(DefaultMBB);
    if (DefaultMBB != SwitchMBB->getNextNode())
      MIB.buildBr(*DefaultMBB);
    return true;
  }

  SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr);
  SL->findBitTestClusters(Clusters, &SI);

  LLVM_DEBUG({
    dbgs() << "Case clusters: ";
    for (const CaseCluster &C : Clusters) {
      if (C.Kind == CC_JumpTable)
        dbgs() << "JT:";
      if (C.Kind == CC_BitTests)
        dbgs() << "BT:";

      C.Low->getValue().print(dbgs(), true);
      if (C.Low != C.High) {
        dbgs() << '-';
        C.High->getValue().print(dbgs(), true);
      }
      dbgs() << ' ';
    }
    dbgs() << '\n';
  });

  assert(!Clusters.empty());
  SwitchWorkList WorkList;
  CaseClusterIt First = Clusters.begin();
  CaseClusterIt Last = Clusters.end() - 1;
  auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
  WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});

  // FIXME: At the moment we don't do any splitting optimizations here like
  // SelectionDAG does, so this worklist only has one entry.
  while (!WorkList.empty()) {
    SwitchWorkListItem W = WorkList.back();
    WorkList.pop_back();
    if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB))
      return false;
  }
  return true;
}

void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT,
                                 MachineBasicBlock *MBB) {
  // Emit the code for the jump table
  assert(JT.Reg != -1U && "Should lower JT Header first!");
  MachineIRBuilder MIB(*MBB->getParent());
  MIB.setMBB(*MBB);
  MIB.setDebugLoc(CurBuilder->getDebugLoc());

  Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
  const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);

  auto Table = MIB.buildJumpTable(PtrTy, JT.JTI);
  MIB.buildBrJT(Table.getReg(0), JT.JTI, JT.Reg);
}

bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
                                       SwitchCG::JumpTableHeader &JTH,
                                       MachineBasicBlock *HeaderBB) {
  MachineIRBuilder MIB(*HeaderBB->getParent());
  MIB.setMBB(*HeaderBB);
  MIB.setDebugLoc(CurBuilder->getDebugLoc());

  const Value &SValue = *JTH.SValue;
  // Subtract the lowest switch case value from the value being switched on.
  const LLT SwitchTy = getLLTForType(*SValue.getType(), *DL);
  Register SwitchOpReg = getOrCreateVReg(SValue);
  auto FirstCst = MIB.buildConstant(SwitchTy, JTH.First);
  auto Sub = MIB.buildSub({SwitchTy}, SwitchOpReg, FirstCst);

  // This value may be smaller or larger than the target's pointer type, and
  // therefore require extension or truncating.
  Type *PtrIRTy = SValue.getType()->getPointerTo();
  const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy));
  Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub);

  JT.Reg = Sub.getReg(0);

  if (JTH.OmitRangeCheck) {
    if (JT.MBB != HeaderBB->getNextNode())
      MIB.buildBr(*JT.MBB);
    return true;
  }

  // Emit the range check for the jump table, and branch to the default block
  // for the switch statement if the value being switched on exceeds the
  // largest case in the switch.
  auto Cst = getOrCreateVReg(
      *ConstantInt::get(SValue.getType(), JTH.Last - JTH.First));
  Cst = MIB.buildZExtOrTrunc(PtrScalarTy, Cst).getReg(0);
  auto Cmp = MIB.buildICmp(CmpInst::ICMP_UGT, LLT::scalar(1), Sub, Cst);

  auto BrCond = MIB.buildBrCond(Cmp.getReg(0), *JT.Default);

  // Avoid emitting unnecessary branches to the next block.
  if (JT.MBB != HeaderBB->getNextNode())
    BrCond = MIB.buildBr(*JT.MBB);
  return true;
}

void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
                                  MachineBasicBlock *SwitchBB,
                                  MachineIRBuilder &MIB) {
  Register CondLHS = getOrCreateVReg(*CB.CmpLHS);
  Register Cond;
  DebugLoc OldDbgLoc = MIB.getDebugLoc();
  MIB.setDebugLoc(CB.DbgLoc);
  MIB.setMBB(*CB.ThisBB);

  if (CB.PredInfo.NoCmp) {
    // Branch or fall through to TrueBB.
    addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
    addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
                      CB.ThisBB);
    CB.ThisBB->normalizeSuccProbs();
    if (CB.TrueBB != CB.ThisBB->getNextNode())
      MIB.buildBr(*CB.TrueBB);
    MIB.setDebugLoc(OldDbgLoc);
    return;
  }

  const LLT i1Ty = LLT::scalar(1);
  // Build the compare.
  if (!CB.CmpMHS) {
    const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS);
    // For conditional branch lowering, we might try to do something silly like
    // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
    // just re-use the existing condition vreg.
    if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI &&
        CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
      Cond = CondLHS;
    } else {
      Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
      if (CmpInst::isFPPredicate(CB.PredInfo.Pred))
        Cond =
            MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
      else
        Cond =
            MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
    }
  } else {
    assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
           "Can only handle SLE ranges");

    const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
    const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();

    Register CmpOpReg = getOrCreateVReg(*CB.CmpMHS);
    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
      Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
      Cond =
          MIB.buildICmp(CmpInst::ICMP_SLE, i1Ty, CmpOpReg, CondRHS).getReg(0);
    } else {
      const LLT CmpTy = MRI->getType(CmpOpReg);
      auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS);
      auto Diff = MIB.buildConstant(CmpTy, High - Low);
      Cond = MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, Sub, Diff).getReg(0);
    }
  }

  // Update successor info
  addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);

  addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
                    CB.ThisBB);

  // TrueBB and FalseBB are always different unless the incoming IR is
  // degenerate. This only happens when running llc on weird IR.
  if (CB.TrueBB != CB.FalseBB)
    addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb);
  CB.ThisBB->normalizeSuccProbs();

  addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
                    CB.ThisBB);

  MIB.buildBrCond(Cond, *CB.TrueBB);
  MIB.buildBr(*CB.FalseBB);
  MIB.setDebugLoc(OldDbgLoc);
}

bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
                                          MachineBasicBlock *SwitchMBB,
                                          MachineBasicBlock *CurMBB,
                                          MachineBasicBlock *DefaultMBB,
                                          MachineIRBuilder &MIB,
                                          MachineFunction::iterator BBI,
                                          BranchProbability UnhandledProbs,
                                          SwitchCG::CaseClusterIt I,
                                          MachineBasicBlock *Fallthrough,
                                          bool FallthroughUnreachable) {
  using namespace SwitchCG;
  MachineFunction *CurMF = SwitchMBB->getParent();
  // FIXME: Optimize away range check based on pivot comparisons.
  JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
  SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
  BranchProbability DefaultProb = W.DefaultProb;

  // The jump block hasn't been inserted yet; insert it here.
  MachineBasicBlock *JumpMBB = JT->MBB;
  CurMF->insert(BBI, JumpMBB);

  // Since the jump table block is separate from the switch block, we need
  // to keep track of it as a machine predecessor to the default block,
  // otherwise we lose the phi edges.
  addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
                    CurMBB);
  addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
                    JumpMBB);

  auto JumpProb = I->Prob;
  auto FallthroughProb = UnhandledProbs;

  // If the default statement is a target of the jump table, we evenly
  // distribute the default probability to successors of CurMBB. Also
  // update the probability on the edge from JumpMBB to Fallthrough.
  for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
                                        SE = JumpMBB->succ_end();
       SI != SE; ++SI) {
    if (*SI == DefaultMBB) {
      JumpProb += DefaultProb / 2;
      FallthroughProb -= DefaultProb / 2;
      JumpMBB->setSuccProbability(SI, DefaultProb / 2);
      JumpMBB->normalizeSuccProbs();
    } else {
      // Also record edges from the jump table block to it's successors.
      addMachineCFGPred({SwitchMBB->getBasicBlock(), (*SI)->getBasicBlock()},
                        JumpMBB);
    }
  }

  // Skip the range check if the fallthrough block is unreachable.
  if (FallthroughUnreachable)
    JTH->OmitRangeCheck = true;

  if (!JTH->OmitRangeCheck)
    addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
  addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
  CurMBB->normalizeSuccProbs();

  // The jump table header will be inserted in our current block, do the
  // range check, and fall through to our fallthrough block.
  JTH->HeaderBB = CurMBB;
  JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.

  // If we're in the right place, emit the jump table header right now.
  if (CurMBB == SwitchMBB) {
    if (!emitJumpTableHeader(*JT, *JTH, CurMBB))
      return false;
    JTH->Emitted = true;
  }
  return true;
}
bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
                                            Value *Cond,
                                            MachineBasicBlock *Fallthrough,
                                            bool FallthroughUnreachable,
                                            BranchProbability UnhandledProbs,
                                            MachineBasicBlock *CurMBB,
                                            MachineIRBuilder &MIB,
                                            MachineBasicBlock *SwitchMBB) {
  using namespace SwitchCG;
  const Value *RHS, *LHS, *MHS;
  CmpInst::Predicate Pred;
  if (I->Low == I->High) {
    // Check Cond == I->Low.
    Pred = CmpInst::ICMP_EQ;
    LHS = Cond;
    RHS = I->Low;
    MHS = nullptr;
  } else {
    // Check I->Low <= Cond <= I->High.
    Pred = CmpInst::ICMP_SLE;
    LHS = I->Low;
    MHS = Cond;
    RHS = I->High;
  }

  // If Fallthrough is unreachable, fold away the comparison.
  // The false probability is the sum of all unhandled cases.
  CaseBlock CB(Pred, FallthroughUnreachable, LHS, RHS, MHS, I->MBB, Fallthrough,
               CurMBB, MIB.getDebugLoc(), I->Prob, UnhandledProbs);

  emitSwitchCase(CB, SwitchMBB, MIB);
  return true;
}

void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
                                     MachineBasicBlock *SwitchBB) {
  MachineIRBuilder &MIB = *CurBuilder;
  MIB.setMBB(*SwitchBB);

  // Subtract the minimum value.
  Register SwitchOpReg = getOrCreateVReg(*B.SValue);

  LLT SwitchOpTy = MRI->getType(SwitchOpReg);
  Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
  auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);

  // Ensure that the type will fit the mask value.
  LLT MaskTy = SwitchOpTy;
  for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
    if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
      // Switch table case range are encoded into series of masks.
      // Just use pointer type, it's guaranteed to fit.
      MaskTy = LLT::scalar(64);
      break;
    }
  }
  Register SubReg = RangeSub.getReg(0);
  if (SwitchOpTy != MaskTy)
    SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0);

  B.RegVT = getMVTForLLT(MaskTy);
  B.Reg = SubReg;

  MachineBasicBlock *MBB = B.Cases[0].ThisBB;

  if (!B.OmitRangeCheck)
    addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
  addSuccessorWithProb(SwitchBB, MBB, B.Prob);

  SwitchBB->normalizeSuccProbs();

  if (!B.OmitRangeCheck) {
    // Conditional branch to the default block.
    auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
    auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1),
                                  RangeSub, RangeCst);
    MIB.buildBrCond(RangeCmp, *B.Default);
  }

  // Avoid emitting unnecessary branches to the next block.
  if (MBB != SwitchBB->getNextNode())
    MIB.buildBr(*MBB);
}

void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
                                   MachineBasicBlock *NextMBB,
                                   BranchProbability BranchProbToNext,
                                   Register Reg, SwitchCG::BitTestCase &B,
                                   MachineBasicBlock *SwitchBB) {
  MachineIRBuilder &MIB = *CurBuilder;
  MIB.setMBB(*SwitchBB);

  LLT SwitchTy = getLLTForMVT(BB.RegVT);
  Register Cmp;
  unsigned PopCount = countPopulation(B.Mask);
  if (PopCount == 1) {
    // Testing for a single bit; just compare the shift count with what it
    // would need to be to shift a 1 bit in that position.
    auto MaskTrailingZeros =
        MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask));
    Cmp =
        MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros)
            .getReg(0);
  } else if (PopCount == BB.Range) {
    // There is only one zero bit in the range, test for it directly.
    auto MaskTrailingOnes =
        MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask));
    Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes)
              .getReg(0);
  } else {
    // Make desired shift.
    auto CstOne = MIB.buildConstant(SwitchTy, 1);
    auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg);

    // Emit bit tests and jumps.
    auto CstMask = MIB.buildConstant(SwitchTy, B.Mask);
    auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask);
    auto CstZero = MIB.buildConstant(SwitchTy, 0);
    Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero)
              .getReg(0);
  }

  // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
  addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
  // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
  addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
  // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
  // one as they are relative probabilities (and thus work more like weights),
  // and hence we need to normalize them to let the sum of them become one.
  SwitchBB->normalizeSuccProbs();

  // Record the fact that the IR edge from the header to the bit test target
  // will go through our new block. Neeeded for PHIs to have nodes added.
  addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()},
                    SwitchBB);

  MIB.buildBrCond(Cmp, *B.TargetBB);

  // Avoid emitting unnecessary branches to the next block.
  if (NextMBB != SwitchBB->getNextNode())
    MIB.buildBr(*NextMBB);
}

bool IRTranslator::lowerBitTestWorkItem(
    SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
    MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
    MachineIRBuilder &MIB, MachineFunction::iterator BBI,
    BranchProbability DefaultProb, BranchProbability UnhandledProbs,
    SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough,
    bool FallthroughUnreachable) {
  using namespace SwitchCG;
  MachineFunction *CurMF = SwitchMBB->getParent();
  // FIXME: Optimize away range check based on pivot comparisons.
  BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
  // The bit test blocks haven't been inserted yet; insert them here.
  for (BitTestCase &BTC : BTB->Cases)
    CurMF->insert(BBI, BTC.ThisBB);

  // Fill in fields of the BitTestBlock.
  BTB->Parent = CurMBB;
  BTB->Default = Fallthrough;

  BTB->DefaultProb = UnhandledProbs;
  // If the cases in bit test don't form a contiguous range, we evenly
  // distribute the probability on the edge to Fallthrough to two
  // successors of CurMBB.
  if (!BTB->ContiguousRange) {
    BTB->Prob += DefaultProb / 2;
    BTB->DefaultProb -= DefaultProb / 2;
  }

  if (FallthroughUnreachable) {
    // Skip the range check if the fallthrough block is unreachable.
    BTB->OmitRangeCheck = true;
  }

  // If we're in the right place, emit the bit test header right now.
  if (CurMBB == SwitchMBB) {
    emitBitTestHeader(*BTB, SwitchMBB);
    BTB->Emitted = true;
  }
  return true;
}

bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
                                       Value *Cond,
                                       MachineBasicBlock *SwitchMBB,
                                       MachineBasicBlock *DefaultMBB,
                                       MachineIRBuilder &MIB) {
  using namespace SwitchCG;
  MachineFunction *CurMF = FuncInfo.MF;
  MachineBasicBlock *NextMBB = nullptr;
  MachineFunction::iterator BBI(W.MBB);
  if (++BBI != FuncInfo.MF->end())
    NextMBB = &*BBI;

  if (EnableOpts) {
    // Here, we order cases by probability so the most likely case will be
    // checked first. However, two clusters can have the same probability in
    // which case their relative ordering is non-deterministic. So we use Low
    // as a tie-breaker as clusters are guaranteed to never overlap.
    llvm::sort(W.FirstCluster, W.LastCluster + 1,
               [](const CaseCluster &a, const CaseCluster &b) {
                 return a.Prob != b.Prob
                            ? a.Prob > b.Prob
                            : a.Low->getValue().slt(b.Low->getValue());
               });

    // Rearrange the case blocks so that the last one falls through if possible
    // without changing the order of probabilities.
    for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster;) {
      --I;
      if (I->Prob > W.LastCluster->Prob)
        break;
      if (I->Kind == CC_Range && I->MBB == NextMBB) {
        std::swap(*I, *W.LastCluster);
        break;
      }
    }
  }

  // Compute total probability.
  BranchProbability DefaultProb = W.DefaultProb;
  BranchProbability UnhandledProbs = DefaultProb;
  for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
    UnhandledProbs += I->Prob;

  MachineBasicBlock *CurMBB = W.MBB;
  for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
    bool FallthroughUnreachable = false;
    MachineBasicBlock *Fallthrough;
    if (I == W.LastCluster) {
      // For the last cluster, fall through to the default destination.
      Fallthrough = DefaultMBB;
      FallthroughUnreachable = isa<UnreachableInst>(
          DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
    } else {
      Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
      CurMF->insert(BBI, Fallthrough);
    }
    UnhandledProbs -= I->Prob;

    switch (I->Kind) {
    case CC_BitTests: {
      if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
                                DefaultProb, UnhandledProbs, I, Fallthrough,
                                FallthroughUnreachable)) {
        LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch");
        return false;
      }
      break;
    }

    case CC_JumpTable: {
      if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
                                  UnhandledProbs, I, Fallthrough,
                                  FallthroughUnreachable)) {
        LLVM_DEBUG(dbgs() << "Failed to lower jump table");
        return false;
      }
      break;
    }
    case CC_Range: {
      if (!lowerSwitchRangeWorkItem(I, Cond, Fallthrough,
                                    FallthroughUnreachable, UnhandledProbs,
                                    CurMBB, MIB, SwitchMBB)) {
        LLVM_DEBUG(dbgs() << "Failed to lower switch range");
        return false;
      }
      break;
    }
    }
    CurMBB = Fallthrough;
  }

  return true;
}

bool IRTranslator::translateIndirectBr(const User &U,
                                       MachineIRBuilder &MIRBuilder) {
  const IndirectBrInst &BrInst = cast<IndirectBrInst>(U);

  const Register Tgt = getOrCreateVReg(*BrInst.getAddress());
  MIRBuilder.buildBrIndirect(Tgt);

  // Link successors.
  SmallPtrSet<const BasicBlock *, 32> AddedSuccessors;
  MachineBasicBlock &CurBB = MIRBuilder.getMBB();
  for (const BasicBlock *Succ : successors(&BrInst)) {
    // It's legal for indirectbr instructions to have duplicate blocks in the
    // destination list. We don't allow this in MIR. Skip anything that's
    // already a successor.
    if (!AddedSuccessors.insert(Succ).second)
      continue;
    CurBB.addSuccessor(&getMBB(*Succ));
  }

  return true;
}

static bool isSwiftError(const Value *V) {
  if (auto Arg = dyn_cast<Argument>(V))
    return Arg->hasSwiftErrorAttr();
  if (auto AI = dyn_cast<AllocaInst>(V))
    return AI->isSwiftError();
  return false;
}

bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
  const LoadInst &LI = cast<LoadInst>(U);
  if (DL->getTypeStoreSize(LI.getType()) == 0)
    return true;

  ArrayRef<Register> Regs = getOrCreateVRegs(LI);
  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI);
  Register Base = getOrCreateVReg(*LI.getPointerOperand());

  Type *OffsetIRTy = DL->getIntPtrType(LI.getPointerOperandType());
  LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);

  if (CLI->supportSwiftError() && isSwiftError(LI.getPointerOperand())) {
    assert(Regs.size() == 1 && "swifterror should be single pointer");
    Register VReg = SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(),
                                                    LI.getPointerOperand());
    MIRBuilder.buildCopy(Regs[0], VReg);
    return true;
  }

  auto &TLI = *MF->getSubtarget().getTargetLowering();
  MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL);

  const MDNode *Ranges =
      Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr;
  for (unsigned i = 0; i < Regs.size(); ++i) {
    Register Addr;
    MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);

    MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
    Align BaseAlign = getMemOpAlign(LI);
    AAMDNodes AAMetadata;
    LI.getAAMetadata(AAMetadata);
    auto MMO = MF->getMachineMemOperand(
        Ptr, Flags, MRI->getType(Regs[i]).getSizeInBytes(),
        commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges,
        LI.getSyncScopeID(), LI.getOrdering());
    MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
  }

  return true;
}

bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
  const StoreInst &SI = cast<StoreInst>(U);
  if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
    return true;

  ArrayRef<Register> Vals = getOrCreateVRegs(*SI.getValueOperand());
  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
  Register Base = getOrCreateVReg(*SI.getPointerOperand());

  Type *OffsetIRTy = DL->getIntPtrType(SI.getPointerOperandType());
  LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);

  if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) {
    assert(Vals.size() == 1 && "swifterror should be single pointer");

    Register VReg = SwiftError.getOrCreateVRegDefAt(&SI, &MIRBuilder.getMBB(),
                                                    SI.getPointerOperand());
    MIRBuilder.buildCopy(VReg, Vals[0]);
    return true;
  }

  auto &TLI = *MF->getSubtarget().getTargetLowering();
  MachineMemOperand::Flags Flags = TLI.getStoreMemOperandFlags(SI, *DL);

  for (unsigned i = 0; i < Vals.size(); ++i) {
    Register Addr;
    MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);

    MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
    Align BaseAlign = getMemOpAlign(SI);
    AAMDNodes AAMetadata;
    SI.getAAMetadata(AAMetadata);
    auto MMO = MF->getMachineMemOperand(
        Ptr, Flags, MRI->getType(Vals[i]).getSizeInBytes(),
        commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr,
        SI.getSyncScopeID(), SI.getOrdering());
    MIRBuilder.buildStore(Vals[i], Addr, *MMO);
  }
  return true;
}

static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) {
  const Value *Src = U.getOperand(0);
  Type *Int32Ty = Type::getInt32Ty(U.getContext());

  // getIndexedOffsetInType is designed for GEPs, so the first index is the
  // usual array element rather than looking into the actual aggregate.
  SmallVector<Value *, 1> Indices;
  Indices.push_back(ConstantInt::get(Int32Ty, 0));

  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) {
    for (auto Idx : EVI->indices())
      Indices.push_back(ConstantInt::get(Int32Ty, Idx));
  } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) {
    for (auto Idx : IVI->indices())
      Indices.push_back(ConstantInt::get(Int32Ty, Idx));
  } else {
    for (unsigned i = 1; i < U.getNumOperands(); ++i)
      Indices.push_back(U.getOperand(i));
  }

  return 8 * static_cast<uint64_t>(
                 DL.getIndexedOffsetInType(Src->getType(), Indices));
}

bool IRTranslator::translateExtractValue(const User &U,
                                         MachineIRBuilder &MIRBuilder) {
  const Value *Src = U.getOperand(0);
  uint64_t Offset = getOffsetFromIndices(U, *DL);
  ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src);
  unsigned Idx = llvm::lower_bound(Offsets, Offset) - Offsets.begin();
  auto &DstRegs = allocateVRegs(U);

  for (unsigned i = 0; i < DstRegs.size(); ++i)
    DstRegs[i] = SrcRegs[Idx++];

  return true;
}

bool IRTranslator::translateInsertValue(const User &U,
                                        MachineIRBuilder &MIRBuilder) {
  const Value *Src = U.getOperand(0);
  uint64_t Offset = getOffsetFromIndices(U, *DL);
  auto &DstRegs = allocateVRegs(U);
  ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U);
  ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
  ArrayRef<Register> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
  auto InsertedIt = InsertedRegs.begin();

  for (unsigned i = 0; i < DstRegs.size(); ++i) {
    if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end())
      DstRegs[i] = *InsertedIt++;
    else
      DstRegs[i] = SrcRegs[i];
  }

  return true;
}

bool IRTranslator::translateSelect(const User &U,
                                   MachineIRBuilder &MIRBuilder) {
  Register Tst = getOrCreateVReg(*U.getOperand(0));
  ArrayRef<Register> ResRegs = getOrCreateVRegs(U);
  ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
  ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2));

  uint16_t Flags = 0;
  if (const SelectInst *SI = dyn_cast<SelectInst>(&U))
    Flags = MachineInstr::copyFlagsFromInstruction(*SI);

  for (unsigned i = 0; i < ResRegs.size(); ++i) {
    MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i], Flags);
  }

  return true;
}

bool IRTranslator::translateCopy(const User &U, const Value &V,
                                 MachineIRBuilder &MIRBuilder) {
  Register Src = getOrCreateVReg(V);
  auto &Regs = *VMap.getVRegs(U);
  if (Regs.empty()) {
    Regs.push_back(Src);
    VMap.getOffsets(U)->push_back(0);
  } else {
    // If we already assigned a vreg for this instruction, we can't change that.
    // Emit a copy to satisfy the users we already emitted.
    MIRBuilder.buildCopy(Regs[0], Src);
  }
  return true;
}

bool IRTranslator::translateBitCast(const User &U,
                                    MachineIRBuilder &MIRBuilder) {
  // If we're bitcasting to the source type, we can reuse the source vreg.
  if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
      getLLTForType(*U.getType(), *DL))
    return translateCopy(U, *U.getOperand(0), MIRBuilder);

  return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
}

bool IRTranslator::translateCast(unsigned Opcode, const User &U,
                                 MachineIRBuilder &MIRBuilder) {
  Register Op = getOrCreateVReg(*U.getOperand(0));
  Register Res = getOrCreateVReg(U);
  MIRBuilder.buildInstr(Opcode, {Res}, {Op});
  return true;
}

bool IRTranslator::translateGetElementPtr(const User &U,
                                          MachineIRBuilder &MIRBuilder) {
  Value &Op0 = *U.getOperand(0);
  Register BaseReg = getOrCreateVReg(Op0);
  Type *PtrIRTy = Op0.getType();
  LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
  Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
  LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);

  // Normalize Vector GEP - all scalar operands should be converted to the
  // splat vector.
  unsigned VectorWidth = 0;
  if (auto *VT = dyn_cast<VectorType>(U.getType()))
    VectorWidth = cast<FixedVectorType>(VT)->getNumElements();

  // We might need to splat the base pointer into a vector if the offsets
  // are vectors.
  if (VectorWidth && !PtrTy.isVector()) {
    BaseReg =
        MIRBuilder.buildSplatVector(LLT::vector(VectorWidth, PtrTy), BaseReg)
            .getReg(0);
    PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth);
    PtrTy = getLLTForType(*PtrIRTy, *DL);
    OffsetIRTy = DL->getIntPtrType(PtrIRTy);
    OffsetTy = getLLTForType(*OffsetIRTy, *DL);
  }

  int64_t Offset = 0;
  for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U);
       GTI != E; ++GTI) {
    const Value *Idx = GTI.getOperand();
    if (StructType *StTy = GTI.getStructTypeOrNull()) {
      unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
      Offset += DL->getStructLayout(StTy)->getElementOffset(Field);
      continue;
    } else {
      uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());

      // If this is a scalar constant or a splat vector of constants,
      // handle it quickly.
      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
        Offset += ElementSize * CI->getSExtValue();
        continue;
      }

      if (Offset != 0) {
        auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
        BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0))
                      .getReg(0);
        Offset = 0;
      }

      Register IdxReg = getOrCreateVReg(*Idx);
      LLT IdxTy = MRI->getType(IdxReg);
      if (IdxTy != OffsetTy) {
        if (!IdxTy.isVector() && VectorWidth) {
          IdxReg = MIRBuilder.buildSplatVector(
            OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0);
        }

        IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0);
      }

      // N = N + Idx * ElementSize;
      // Avoid doing it for ElementSize of 1.
      Register GepOffsetReg;
      if (ElementSize != 1) {
        auto ElementSizeMIB = MIRBuilder.buildConstant(
            getLLTForType(*OffsetIRTy, *DL), ElementSize);
        GepOffsetReg =
            MIRBuilder.buildMul(OffsetTy, IdxReg, ElementSizeMIB).getReg(0);
      } else
        GepOffsetReg = IdxReg;

      BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg).getReg(0);
    }
  }

  if (Offset != 0) {
    auto OffsetMIB =
        MIRBuilder.buildConstant(OffsetTy, Offset);
    MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
    return true;
  }

  MIRBuilder.buildCopy(getOrCreateVReg(U), BaseReg);
  return true;
}

bool IRTranslator::translateMemFunc(const CallInst &CI,
                                    MachineIRBuilder &MIRBuilder,
                                    unsigned Opcode) {

  // If the source is undef, then just emit a nop.
  if (isa<UndefValue>(CI.getArgOperand(1)))
    return true;

  SmallVector<Register, 3> SrcRegs;

  unsigned MinPtrSize = UINT_MAX;
  for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) {
    Register SrcReg = getOrCreateVReg(**AI);
    LLT SrcTy = MRI->getType(SrcReg);
    if (SrcTy.isPointer())
      MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize);
    SrcRegs.push_back(SrcReg);
  }

  LLT SizeTy = LLT::scalar(MinPtrSize);

  // The size operand should be the minimum of the pointer sizes.
  Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1];
  if (MRI->getType(SizeOpReg) != SizeTy)
    SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0);

  auto ICall = MIRBuilder.buildInstr(Opcode);
  for (Register SrcReg : SrcRegs)
    ICall.addUse(SrcReg);

  Align DstAlign;
  Align SrcAlign;
  unsigned IsVol =
      cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1))
          ->getZExtValue();

  if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
    DstAlign = MCI->getDestAlign().valueOrOne();
    SrcAlign = MCI->getSourceAlign().valueOrOne();
  } else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
    DstAlign = MMI->getDestAlign().valueOrOne();
    SrcAlign = MMI->getSourceAlign().valueOrOne();
  } else {
    auto *MSI = cast<MemSetInst>(&CI);
    DstAlign = MSI->getDestAlign().valueOrOne();
  }

  // We need to propagate the tail call flag from the IR inst as an argument.
  // Otherwise, we have to pessimize and assume later that we cannot tail call
  // any memory intrinsics.
  ICall.addImm(CI.isTailCall() ? 1 : 0);

  // Create mem operands to store the alignment and volatile info.
  auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
  ICall.addMemOperand(MF->getMachineMemOperand(
      MachinePointerInfo(CI.getArgOperand(0)),
      MachineMemOperand::MOStore | VolFlag, 1, DstAlign));
  if (Opcode != TargetOpcode::G_MEMSET)
    ICall.addMemOperand(MF->getMachineMemOperand(
        MachinePointerInfo(CI.getArgOperand(1)),
        MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign));

  return true;
}

void IRTranslator::getStackGuard(Register DstReg,
                                 MachineIRBuilder &MIRBuilder) {
  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
  MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
  auto MIB =
      MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, {DstReg}, {});

  auto &TLI = *MF->getSubtarget().getTargetLowering();
  Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent());
  if (!Global)
    return;

  MachinePointerInfo MPInfo(Global);
  auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
               MachineMemOperand::MODereferenceable;
  MachineMemOperand *MemRef =
      MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
                               DL->getPointerABIAlignment(0));
  MIB.setMemRefs({MemRef});
}

bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
                                              MachineIRBuilder &MIRBuilder) {
  ArrayRef<Register> ResRegs = getOrCreateVRegs(CI);
  MIRBuilder.buildInstr(
      Op, {ResRegs[0], ResRegs[1]},
      {getOrCreateVReg(*CI.getOperand(0)), getOrCreateVReg(*CI.getOperand(1))});

  return true;
}

bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
                                                MachineIRBuilder &MIRBuilder) {
  Register Dst = getOrCreateVReg(CI);
  Register Src0 = getOrCreateVReg(*CI.getOperand(0));
  Register Src1 = getOrCreateVReg(*CI.getOperand(1));
  uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue();
  MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale });
  return true;
}

unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
  switch (ID) {
    default:
      break;
    case Intrinsic::bswap:
      return TargetOpcode::G_BSWAP;
    case Intrinsic::bitreverse:
      return TargetOpcode::G_BITREVERSE;
    case Intrinsic::fshl:
      return TargetOpcode::G_FSHL;
    case Intrinsic::fshr:
      return TargetOpcode::G_FSHR;
    case Intrinsic::ceil:
      return TargetOpcode::G_FCEIL;
    case Intrinsic::cos:
      return TargetOpcode::G_FCOS;
    case Intrinsic::ctpop:
      return TargetOpcode::G_CTPOP;
    case Intrinsic::exp:
      return TargetOpcode::G_FEXP;
    case Intrinsic::exp2:
      return TargetOpcode::G_FEXP2;
    case Intrinsic::fabs:
      return TargetOpcode::G_FABS;
    case Intrinsic::copysign:
      return TargetOpcode::G_FCOPYSIGN;
    case Intrinsic::minnum:
      return TargetOpcode::G_FMINNUM;
    case Intrinsic::maxnum:
      return TargetOpcode::G_FMAXNUM;
    case Intrinsic::minimum:
      return TargetOpcode::G_FMINIMUM;
    case Intrinsic::maximum:
      return TargetOpcode::G_FMAXIMUM;
    case Intrinsic::canonicalize:
      return TargetOpcode::G_FCANONICALIZE;
    case Intrinsic::floor:
      return TargetOpcode::G_FFLOOR;
    case Intrinsic::fma:
      return TargetOpcode::G_FMA;
    case Intrinsic::log:
      return TargetOpcode::G_FLOG;
    case Intrinsic::log2:
      return TargetOpcode::G_FLOG2;
    case Intrinsic::log10:
      return TargetOpcode::G_FLOG10;
    case Intrinsic::nearbyint:
      return TargetOpcode::G_FNEARBYINT;
    case Intrinsic::pow:
      return TargetOpcode::G_FPOW;
    case Intrinsic::powi:
      return TargetOpcode::G_FPOWI;
    case Intrinsic::rint:
      return TargetOpcode::G_FRINT;
    case Intrinsic::round:
      return TargetOpcode::G_INTRINSIC_ROUND;
    case Intrinsic::roundeven:
      return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
    case Intrinsic::sin:
      return TargetOpcode::G_FSIN;
    case Intrinsic::sqrt:
      return TargetOpcode::G_FSQRT;
    case Intrinsic::trunc:
      return TargetOpcode::G_INTRINSIC_TRUNC;
    case Intrinsic::readcyclecounter:
      return TargetOpcode::G_READCYCLECOUNTER;
    case Intrinsic::ptrmask:
      return TargetOpcode::G_PTRMASK;
    case Intrinsic::lrint:
      return TargetOpcode::G_INTRINSIC_LRINT;
    // FADD/FMUL require checking the FMF, so are handled elsewhere.
    case Intrinsic::vector_reduce_fmin:
      return TargetOpcode::G_VECREDUCE_FMIN;
    case Intrinsic::vector_reduce_fmax:
      return TargetOpcode::G_VECREDUCE_FMAX;
    case Intrinsic::vector_reduce_add:
      return TargetOpcode::G_VECREDUCE_ADD;
    case Intrinsic::vector_reduce_mul:
      return TargetOpcode::G_VECREDUCE_MUL;
    case Intrinsic::vector_reduce_and:
      return TargetOpcode::G_VECREDUCE_AND;
    case Intrinsic::vector_reduce_or:
      return TargetOpcode::G_VECREDUCE_OR;
    case Intrinsic::vector_reduce_xor:
      return TargetOpcode::G_VECREDUCE_XOR;
    case Intrinsic::vector_reduce_smax:
      return TargetOpcode::G_VECREDUCE_SMAX;
    case Intrinsic::vector_reduce_smin:
      return TargetOpcode::G_VECREDUCE_SMIN;
    case Intrinsic::vector_reduce_umax:
      return TargetOpcode::G_VECREDUCE_UMAX;
    case Intrinsic::vector_reduce_umin:
      return TargetOpcode::G_VECREDUCE_UMIN;
  }
  return Intrinsic::not_intrinsic;
}

bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
                                            Intrinsic::ID ID,
                                            MachineIRBuilder &MIRBuilder) {

  unsigned Op = getSimpleIntrinsicOpcode(ID);

  // Is this a simple intrinsic?
  if (Op == Intrinsic::not_intrinsic)
    return false;

  // Yes. Let's translate it.
  SmallVector<llvm::SrcOp, 4> VRegs;
  for (auto &Arg : CI.arg_operands())
    VRegs.push_back(getOrCreateVReg(*Arg));

  MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs,
                        MachineInstr::copyFlagsFromInstruction(CI));
  return true;
}

// TODO: Include ConstainedOps.def when all strict instructions are defined.
static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
  switch (ID) {
  case Intrinsic::experimental_constrained_fadd:
    return TargetOpcode::G_STRICT_FADD;
  case Intrinsic::experimental_constrained_fsub:
    return TargetOpcode::G_STRICT_FSUB;
  case Intrinsic::experimental_constrained_fmul:
    return TargetOpcode::G_STRICT_FMUL;
  case Intrinsic::experimental_constrained_fdiv:
    return TargetOpcode::G_STRICT_FDIV;
  case Intrinsic::experimental_constrained_frem:
    return TargetOpcode::G_STRICT_FREM;
  case Intrinsic::experimental_constrained_fma:
    return TargetOpcode::G_STRICT_FMA;
  case Intrinsic::experimental_constrained_sqrt:
    return TargetOpcode::G_STRICT_FSQRT;
  default:
    return 0;
  }
}

bool IRTranslator::translateConstrainedFPIntrinsic(
  const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) {
  fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue();

  unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID());
  if (!Opcode)
    return false;

  unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI);
  if (EB == fp::ExceptionBehavior::ebIgnore)
    Flags |= MachineInstr::NoFPExcept;

  SmallVector<llvm::SrcOp, 4> VRegs;
  VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(0)));
  if (!FPI.isUnaryOp())
    VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(1)));
  if (FPI.isTernaryOp())
    VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(2)));

  MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(FPI)}, VRegs, Flags);
  return true;
}

bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
                                           MachineIRBuilder &MIRBuilder) {

  // If this is a simple intrinsic (that is, we just need to add a def of
  // a vreg, and uses for each arg operand, then translate it.
  if (translateSimpleIntrinsic(CI, ID, MIRBuilder))
    return true;

  switch (ID) {
  default:
    break;
  case Intrinsic::lifetime_start:
  case Intrinsic::lifetime_end: {
    // No stack colouring in O0, discard region information.
    if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
      return true;

    unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
                                                  : TargetOpcode::LIFETIME_END;

    // Get the underlying objects for the location passed on the lifetime
    // marker.
    SmallVector<const Value *, 4> Allocas;
    getUnderlyingObjects(CI.getArgOperand(1), Allocas);

    // Iterate over each underlying object, creating lifetime markers for each
    // static alloca. Quit if we find a non-static alloca.
    for (const Value *V : Allocas) {
      const AllocaInst *AI = dyn_cast<AllocaInst>(V);
      if (!AI)
        continue;

      if (!AI->isStaticAlloca())
        return true;

      MIRBuilder.buildInstr(Op).addFrameIndex(getOrCreateFrameIndex(*AI));
    }
    return true;
  }
  case Intrinsic::dbg_declare: {
    const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
    assert(DI.getVariable() && "Missing variable");

    const Value *Address = DI.getAddress();
    if (!Address || isa<UndefValue>(Address)) {
      LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
      return true;
    }

    assert(DI.getVariable()->isValidLocationForIntrinsic(
               MIRBuilder.getDebugLoc()) &&
           "Expected inlined-at fields to agree");
    auto AI = dyn_cast<AllocaInst>(Address);
    if (AI && AI->isStaticAlloca()) {
      // Static allocas are tracked at the MF level, no need for DBG_VALUE
      // instructions (in fact, they get ignored if they *do* exist).
      MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
                             getOrCreateFrameIndex(*AI), DI.getDebugLoc());
    } else {
      // A dbg.declare describes the address of a source variable, so lower it
      // into an indirect DBG_VALUE.
      MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
                                       DI.getVariable(), DI.getExpression());
    }
    return true;
  }
  case Intrinsic::dbg_label: {
    const DbgLabelInst &DI = cast<DbgLabelInst>(CI);
    assert(DI.getLabel() && "Missing label");

    assert(DI.getLabel()->isValidLocationForIntrinsic(
               MIRBuilder.getDebugLoc()) &&
           "Expected inlined-at fields to agree");

    MIRBuilder.buildDbgLabel(DI.getLabel());
    return true;
  }
  case Intrinsic::vaend:
    // No target I know of cares about va_end. Certainly no in-tree target
    // does. Simplest intrinsic ever!
    return true;
  case Intrinsic::vastart: {
    auto &TLI = *MF->getSubtarget().getTargetLowering();
    Value *Ptr = CI.getArgOperand(0);
    unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;

    // FIXME: Get alignment
    MIRBuilder.buildInstr(TargetOpcode::G_VASTART, {}, {getOrCreateVReg(*Ptr)})
        .addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Ptr),
                                                MachineMemOperand::MOStore,
                                                ListSize, Align(1)));
    return true;
  }
  case Intrinsic::dbg_value: {
    // This form of DBG_VALUE is target-independent.
    const DbgValueInst &DI = cast<DbgValueInst>(CI);
    const Value *V = DI.getValue();
    assert(DI.getVariable()->isValidLocationForIntrinsic(
               MIRBuilder.getDebugLoc()) &&
           "Expected inlined-at fields to agree");
    if (!V) {
      // Currently the optimizer can produce this; insert an undef to
      // help debugging.  Probably the optimizer should not do this.
      MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
    } else if (const auto *CI = dyn_cast<Constant>(V)) {
      MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
    } else {
      for (Register Reg : getOrCreateVRegs(*V)) {
        // FIXME: This does not handle register-indirect values at offset 0. The
        // direct/indirect thing shouldn't really be handled by something as
        // implicit as reg+noreg vs reg+imm in the first place, but it seems
        // pretty baked in right now.
        MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
      }
    }
    return true;
  }
  case Intrinsic::uadd_with_overflow:
    return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDO, MIRBuilder);
  case Intrinsic::sadd_with_overflow:
    return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder);
  case Intrinsic::usub_with_overflow:
    return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBO, MIRBuilder);
  case Intrinsic::ssub_with_overflow:
    return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder);
  case Intrinsic::umul_with_overflow:
    return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
  case Intrinsic::smul_with_overflow:
    return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
  case Intrinsic::uadd_sat:
    return translateBinaryOp(TargetOpcode::G_UADDSAT, CI, MIRBuilder);
  case Intrinsic::sadd_sat:
    return translateBinaryOp(TargetOpcode::G_SADDSAT, CI, MIRBuilder);
  case Intrinsic::usub_sat:
    return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder);
  case Intrinsic::ssub_sat:
    return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder);
  case Intrinsic::ushl_sat:
    return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder);
  case Intrinsic::sshl_sat:
    return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder);
  case Intrinsic::umin:
    return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder);
  case Intrinsic::umax:
    return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder);
  case Intrinsic::smin:
    return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder);
  case Intrinsic::smax:
    return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder);
  case Intrinsic::abs:
    // TODO: Preserve "int min is poison" arg in GMIR?
    return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder);
  case Intrinsic::smul_fix:
    return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder);
  case Intrinsic::umul_fix:
    return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder);
  case Intrinsic::smul_fix_sat:
    return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder);
  case Intrinsic::umul_fix_sat:
    return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder);
  case Intrinsic::sdiv_fix:
    return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder);
  case Intrinsic::udiv_fix:
    return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder);
  case Intrinsic::sdiv_fix_sat:
    return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder);
  case Intrinsic::udiv_fix_sat:
    return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder);
  case Intrinsic::fmuladd: {
    const TargetMachine &TM = MF->getTarget();
    const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
    Register Dst = getOrCreateVReg(CI);
    Register Op0 = getOrCreateVReg(*CI.getArgOperand(0));
    Register Op1 = getOrCreateVReg(*CI.getArgOperand(1));
    Register Op2 = getOrCreateVReg(*CI.getArgOperand(2));
    if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
        TLI.isFMAFasterThanFMulAndFAdd(*MF,
                                       TLI.getValueType(*DL, CI.getType()))) {
      // TODO: Revisit this to see if we should move this part of the
      // lowering to the combiner.
      MIRBuilder.buildFMA(Dst, Op0, Op1, Op2,
                          MachineInstr::copyFlagsFromInstruction(CI));
    } else {
      LLT Ty = getLLTForType(*CI.getType(), *DL);
      auto FMul = MIRBuilder.buildFMul(
          Ty, Op0, Op1, MachineInstr::copyFlagsFromInstruction(CI));
      MIRBuilder.buildFAdd(Dst, FMul, Op2,
                           MachineInstr::copyFlagsFromInstruction(CI));
    }
    return true;
  }
  case Intrinsic::convert_from_fp16:
    // FIXME: This intrinsic should probably be removed from the IR.
    MIRBuilder.buildFPExt(getOrCreateVReg(CI),
                          getOrCreateVReg(*CI.getArgOperand(0)),
                          MachineInstr::copyFlagsFromInstruction(CI));
    return true;
  case Intrinsic::convert_to_fp16:
    // FIXME: This intrinsic should probably be removed from the IR.
    MIRBuilder.buildFPTrunc(getOrCreateVReg(CI),
                            getOrCreateVReg(*CI.getArgOperand(0)),
                            MachineInstr::copyFlagsFromInstruction(CI));
    return true;
  case Intrinsic::memcpy:
    return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
  case Intrinsic::memmove:
    return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE);
  case Intrinsic::memset:
    return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET);
  case Intrinsic::eh_typeid_for: {
    GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
    Register Reg = getOrCreateVReg(CI);
    unsigned TypeID = MF->getTypeIDFor(GV);
    MIRBuilder.buildConstant(Reg, TypeID);
    return true;
  }
  case Intrinsic::objectsize:
    llvm_unreachable("llvm.objectsize.* should have been lowered already");

  case Intrinsic::is_constant:
    llvm_unreachable("llvm.is.constant.* should have been lowered already");

  case Intrinsic::stackguard:
    getStackGuard(getOrCreateVReg(CI), MIRBuilder);
    return true;
  case Intrinsic::stackprotector: {
    LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
    Register GuardVal = MRI->createGenericVirtualRegister(PtrTy);
    getStackGuard(GuardVal, MIRBuilder);

    AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
    int FI = getOrCreateFrameIndex(*Slot);
    MF->getFrameInfo().setStackProtectorIndex(FI);

    MIRBuilder.buildStore(
        GuardVal, getOrCreateVReg(*Slot),
        *MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
                                  MachineMemOperand::MOStore |
                                      MachineMemOperand::MOVolatile,
                                  PtrTy.getSizeInBits() / 8, Align(8)));
    return true;
  }
  case Intrinsic::stacksave: {
    // Save the stack pointer to the location provided by the intrinsic.
    Register Reg = getOrCreateVReg(CI);
    Register StackPtr = MF->getSubtarget()
                            .getTargetLowering()
                            ->getStackPointerRegisterToSaveRestore();

    // If the target doesn't specify a stack pointer, then fall back.
    if (!StackPtr)
      return false;

    MIRBuilder.buildCopy(Reg, StackPtr);
    return true;
  }
  case Intrinsic::stackrestore: {
    // Restore the stack pointer from the location provided by the intrinsic.
    Register Reg = getOrCreateVReg(*CI.getArgOperand(0));
    Register StackPtr = MF->getSubtarget()
                            .getTargetLowering()
                            ->getStackPointerRegisterToSaveRestore();

    // If the target doesn't specify a stack pointer, then fall back.
    if (!StackPtr)
      return false;

    MIRBuilder.buildCopy(StackPtr, Reg);
    return true;
  }
  case Intrinsic::cttz:
  case Intrinsic::ctlz: {
    ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
    bool isTrailing = ID == Intrinsic::cttz;
    unsigned Opcode = isTrailing
                          ? Cst->isZero() ? TargetOpcode::G_CTTZ
                                          : TargetOpcode::G_CTTZ_ZERO_UNDEF
                          : Cst->isZero() ? TargetOpcode::G_CTLZ
                                          : TargetOpcode::G_CTLZ_ZERO_UNDEF;
    MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(CI)},
                          {getOrCreateVReg(*CI.getArgOperand(0))});
    return true;
  }
  case Intrinsic::invariant_start: {
    LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
    Register Undef = MRI->createGenericVirtualRegister(PtrTy);
    MIRBuilder.buildUndef(Undef);
    return true;
  }
  case Intrinsic::invariant_end:
    return true;
  case Intrinsic::expect:
  case Intrinsic::annotation:
  case Intrinsic::ptr_annotation:
  case Intrinsic::launder_invariant_group:
  case Intrinsic::strip_invariant_group: {
    // Drop the intrinsic, but forward the value.
    MIRBuilder.buildCopy(getOrCreateVReg(CI),
                         getOrCreateVReg(*CI.getArgOperand(0)));
    return true;
  }
  case Intrinsic::assume:
  case Intrinsic::experimental_noalias_scope_decl:
  case Intrinsic::var_annotation:
  case Intrinsic::sideeffect:
    // Discard annotate attributes, assumptions, and artificial side-effects.
    return true;
  case Intrinsic::read_volatile_register:
  case Intrinsic::read_register: {
    Value *Arg = CI.getArgOperand(0);
    MIRBuilder
        .buildInstr(TargetOpcode::G_READ_REGISTER, {getOrCreateVReg(CI)}, {})
        .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()));
    return true;
  }
  case Intrinsic::write_register: {
    Value *Arg = CI.getArgOperand(0);
    MIRBuilder.buildInstr(TargetOpcode::G_WRITE_REGISTER)
      .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()))
      .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
    return true;
  }
  case Intrinsic::localescape: {
    MachineBasicBlock &EntryMBB = MF->front();
    StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName());

    // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
    // is the same on all targets.
    for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) {
      Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts();
      if (isa<ConstantPointerNull>(Arg))
        continue; // Skip null pointers. They represent a hole in index space.

      int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg));
      MCSymbol *FrameAllocSym =
          MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName,
                                                                Idx);

      // This should be inserted at the start of the entry block.
      auto LocalEscape =
          MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE)
              .addSym(FrameAllocSym)
              .addFrameIndex(FI);

      EntryMBB.insert(EntryMBB.begin(), LocalEscape);
    }

    return true;
  }
  case Intrinsic::vector_reduce_fadd:
  case Intrinsic::vector_reduce_fmul: {
    // Need to check for the reassoc flag to decide whether we want a
    // sequential reduction opcode or not.
    Register Dst = getOrCreateVReg(CI);
    Register ScalarSrc = getOrCreateVReg(*CI.getArgOperand(0));
    Register VecSrc = getOrCreateVReg(*CI.getArgOperand(1));
    unsigned Opc = 0;
    if (!CI.hasAllowReassoc()) {
      // The sequential ordering case.
      Opc = ID == Intrinsic::vector_reduce_fadd
                ? TargetOpcode::G_VECREDUCE_SEQ_FADD
                : TargetOpcode::G_VECREDUCE_SEQ_FMUL;
      MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc},
                            MachineInstr::copyFlagsFromInstruction(CI));
      return true;
    }
    // We split the operation into a separate G_FADD/G_FMUL + the reduce,
    // since the associativity doesn't matter.
    unsigned ScalarOpc;
    if (ID == Intrinsic::vector_reduce_fadd) {
      Opc = TargetOpcode::G_VECREDUCE_FADD;
      ScalarOpc = TargetOpcode::G_FADD;
    } else {
      Opc = TargetOpcode::G_VECREDUCE_FMUL;
      ScalarOpc = TargetOpcode::G_FMUL;
    }
    LLT DstTy = MRI->getType(Dst);
    auto Rdx = MIRBuilder.buildInstr(
        Opc, {DstTy}, {VecSrc}, MachineInstr::copyFlagsFromInstruction(CI));
    MIRBuilder.buildInstr(ScalarOpc, {Dst}, {ScalarSrc, Rdx},
                          MachineInstr::copyFlagsFromInstruction(CI));

    return true;
  }
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC)  \
  case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
    return translateConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(CI),
                                           MIRBuilder);

  }
  return false;
}

bool IRTranslator::translateInlineAsm(const CallBase &CB,
                                      MachineIRBuilder &MIRBuilder) {

  const InlineAsmLowering *ALI = MF->getSubtarget().getInlineAsmLowering();

  if (!ALI) {
    LLVM_DEBUG(
        dbgs() << "Inline asm lowering is not supported for this target yet\n");
    return false;
  }

  return ALI->lowerInlineAsm(
      MIRBuilder, CB, [&](const Value &Val) { return getOrCreateVRegs(Val); });
}

bool IRTranslator::translateCallBase(const CallBase &CB,
                                     MachineIRBuilder &MIRBuilder) {
  ArrayRef<Register> Res = getOrCreateVRegs(CB);

  SmallVector<ArrayRef<Register>, 8> Args;
  Register SwiftInVReg = 0;
  Register SwiftErrorVReg = 0;
  for (auto &Arg : CB.args()) {
    if (CLI->supportSwiftError() && isSwiftError(Arg)) {
      assert(SwiftInVReg == 0 && "Expected only one swift error argument");
      LLT Ty = getLLTForType(*Arg->getType(), *DL);
      SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
      MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
                                            &CB, &MIRBuilder.getMBB(), Arg));
      Args.emplace_back(makeArrayRef(SwiftInVReg));
      SwiftErrorVReg =
          SwiftError.getOrCreateVRegDefAt(&CB, &MIRBuilder.getMBB(), Arg);
      continue;
    }
    Args.push_back(getOrCreateVRegs(*Arg));
  }

  // We don't set HasCalls on MFI here yet because call lowering may decide to
  // optimize into tail calls. Instead, we defer that to selection where a final
  // scan is done to check if any instructions are calls.
  bool Success =
      CLI->lowerCall(MIRBuilder, CB, Res, Args, SwiftErrorVReg,
                     [&]() { return getOrCreateVReg(*CB.getCalledOperand()); });

  // Check if we just inserted a tail call.
  if (Success) {
    assert(!HasTailCall && "Can't tail call return twice from block?");
    const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
    HasTailCall = TII->isTailCall(*std::prev(MIRBuilder.getInsertPt()));
  }

  return Success;
}

bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
  const CallInst &CI = cast<CallInst>(U);
  auto TII = MF->getTarget().getIntrinsicInfo();
  const Function *F = CI.getCalledFunction();

  // FIXME: support Windows dllimport function calls.
  if (F && (F->hasDLLImportStorageClass() ||
            (MF->getTarget().getTargetTriple().isOSWindows() &&
             F->hasExternalWeakLinkage())))
    return false;

  // FIXME: support control flow guard targets.
  if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
    return false;

  if (CI.isInlineAsm())
    return translateInlineAsm(CI, MIRBuilder);

  Intrinsic::ID ID = Intrinsic::not_intrinsic;
  if (F && F->isIntrinsic()) {
    ID = F->getIntrinsicID();
    if (TII && ID == Intrinsic::not_intrinsic)
      ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
  }

  if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic)
    return translateCallBase(CI, MIRBuilder);

  assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");

  if (translateKnownIntrinsic(CI, ID, MIRBuilder))
    return true;

  ArrayRef<Register> ResultRegs;
  if (!CI.getType()->isVoidTy())
    ResultRegs = getOrCreateVRegs(CI);

  // Ignore the callsite attributes. Backend code is most likely not expecting
  // an intrinsic to sometimes have side effects and sometimes not.
  MachineInstrBuilder MIB =
      MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory());
  if (isa<FPMathOperator>(CI))
    MIB->copyIRFlags(CI);

  for (auto &Arg : enumerate(CI.arg_operands())) {
    // If this is required to be an immediate, don't materialize it in a
    // register.
    if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
      if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
        // imm arguments are more convenient than cimm (and realistically
        // probably sufficient), so use them.
        assert(CI->getBitWidth() <= 64 &&
               "large intrinsic immediates not handled");
        MIB.addImm(CI->getSExtValue());
      } else {
        MIB.addFPImm(cast<ConstantFP>(Arg.value()));
      }
    } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) {
      auto *MDN = dyn_cast<MDNode>(MD->getMetadata());
      if (!MDN) // This was probably an MDString.
        return false;
      MIB.addMetadata(MDN);
    } else {
      ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
      if (VRegs.size() > 1)
        return false;
      MIB.addUse(VRegs[0]);
    }
  }

  // Add a MachineMemOperand if it is a target mem intrinsic.
  const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
  TargetLowering::IntrinsicInfo Info;
  // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
  if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
    Align Alignment = Info.align.getValueOr(
        DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));

    uint64_t Size = Info.memVT.getStoreSize();
    MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
                                               Info.flags, Size, Alignment));
  }

  return true;
}

bool IRTranslator::findUnwindDestinations(
    const BasicBlock *EHPadBB,
    BranchProbability Prob,
    SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
        &UnwindDests) {
  EHPersonality Personality = classifyEHPersonality(
      EHPadBB->getParent()->getFunction().getPersonalityFn());
  bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
  bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
  bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
  bool IsSEH = isAsynchronousEHPersonality(Personality);

  if (IsWasmCXX) {
    // Ignore this for now.
    return false;
  }

  while (EHPadBB) {
    const Instruction *Pad = EHPadBB->getFirstNonPHI();
    BasicBlock *NewEHPadBB = nullptr;
    if (isa<LandingPadInst>(Pad)) {
      // Stop on landingpads. They are not funclets.
      UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
      break;
    }
    if (isa<CleanupPadInst>(Pad)) {
      // Stop on cleanup pads. Cleanups are always funclet entries for all known
      // personalities.
      UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
      UnwindDests.back().first->setIsEHScopeEntry();
      UnwindDests.back().first->setIsEHFuncletEntry();
      break;
    }
    if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
      // Add the catchpad handlers to the possible destinations.
      for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
        UnwindDests.emplace_back(&getMBB(*CatchPadBB), Prob);
        // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
        if (IsMSVCCXX || IsCoreCLR)
          UnwindDests.back().first->setIsEHFuncletEntry();
        if (!IsSEH)
          UnwindDests.back().first->setIsEHScopeEntry();
      }
      NewEHPadBB = CatchSwitch->getUnwindDest();
    } else {
      continue;
    }

    BranchProbabilityInfo *BPI = FuncInfo.BPI;
    if (BPI && NewEHPadBB)
      Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
    EHPadBB = NewEHPadBB;
  }
  return true;
}

bool IRTranslator::translateInvoke(const User &U,
                                   MachineIRBuilder &MIRBuilder) {
  const InvokeInst &I = cast<InvokeInst>(U);
  MCContext &Context = MF->getContext();

  const BasicBlock *ReturnBB = I.getSuccessor(0);
  const BasicBlock *EHPadBB = I.getSuccessor(1);

  const Function *Fn = I.getCalledFunction();
  if (I.isInlineAsm())
    return false;

  // FIXME: support invoking patchpoint and statepoint intrinsics.
  if (Fn && Fn->isIntrinsic())
    return false;

  // FIXME: support whatever these are.
  if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
    return false;

  // FIXME: support control flow guard targets.
  if (I.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
    return false;

  // FIXME: support Windows exception handling.
  if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
    return false;

  // Emit the actual call, bracketed by EH_LABELs so that the MF knows about
  // the region covered by the try.
  MCSymbol *BeginSymbol = Context.createTempSymbol();
  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);

  if (!translateCallBase(I, MIRBuilder))
    return false;

  MCSymbol *EndSymbol = Context.createTempSymbol();
  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);

  SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
  BranchProbabilityInfo *BPI = FuncInfo.BPI;
  MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB();
  BranchProbability EHPadBBProb =
      BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
          : BranchProbability::getZero();

  if (!findUnwindDestinations(EHPadBB, EHPadBBProb, UnwindDests))
    return false;

  MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB),
                    &ReturnMBB = getMBB(*ReturnBB);
  // Update successor info.
  addSuccessorWithProb(InvokeMBB, &ReturnMBB);
  for (auto &UnwindDest : UnwindDests) {
    UnwindDest.first->setIsEHPad();
    addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
  }
  InvokeMBB->normalizeSuccProbs();

  MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
  MIRBuilder.buildBr(ReturnMBB);
  return true;
}

bool IRTranslator::translateCallBr(const User &U,
                                   MachineIRBuilder &MIRBuilder) {
  // FIXME: Implement this.
  return false;
}

bool IRTranslator::translateLandingPad(const User &U,
                                       MachineIRBuilder &MIRBuilder) {
  const LandingPadInst &LP = cast<LandingPadInst>(U);

  MachineBasicBlock &MBB = MIRBuilder.getMBB();

  MBB.setIsEHPad();

  // If there aren't registers to copy the values into (e.g., during SjLj
  // exceptions), then don't bother.
  auto &TLI = *MF->getSubtarget().getTargetLowering();
  const Constant *PersonalityFn = MF->getFunction().getPersonalityFn();
  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
      TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
    return true;

  // If landingpad's return type is token type, we don't create DAG nodes
  // for its exception pointer and selector value. The extraction of exception
  // pointer or selector value from token type landingpads is not currently
  // supported.
  if (LP.getType()->isTokenTy())
    return true;

  // Add a label to mark the beginning of the landing pad.  Deletion of the
  // landing pad can thus be detected via the MachineModuleInfo.
  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
    .addSym(MF->addLandingPad(&MBB));

  // If the unwinder does not preserve all registers, ensure that the
  // function marks the clobbered registers as used.
  const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
  if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
    MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);

  LLT Ty = getLLTForType(*LP.getType(), *DL);
  Register Undef = MRI->createGenericVirtualRegister(Ty);
  MIRBuilder.buildUndef(Undef);

  SmallVector<LLT, 2> Tys;
  for (Type *Ty : cast<StructType>(LP.getType())->elements())
    Tys.push_back(getLLTForType(*Ty, *DL));
  assert(Tys.size() == 2 && "Only two-valued landingpads are supported");

  // Mark exception register as live in.
  Register ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
  if (!ExceptionReg)
    return false;

  MBB.addLiveIn(ExceptionReg);
  ArrayRef<Register> ResRegs = getOrCreateVRegs(LP);
  MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);

  Register SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
  if (!SelectorReg)
    return false;

  MBB.addLiveIn(SelectorReg);
  Register PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
  MIRBuilder.buildCopy(PtrVReg, SelectorReg);
  MIRBuilder.buildCast(ResRegs[1], PtrVReg);

  return true;
}

bool IRTranslator::translateAlloca(const User &U,
                                   MachineIRBuilder &MIRBuilder) {
  auto &AI = cast<AllocaInst>(U);

  if (AI.isSwiftError())
    return true;

  if (AI.isStaticAlloca()) {
    Register Res = getOrCreateVReg(AI);
    int FI = getOrCreateFrameIndex(AI);
    MIRBuilder.buildFrameIndex(Res, FI);
    return true;
  }

  // FIXME: support stack probing for Windows.
  if (MF->getTarget().getTargetTriple().isOSWindows())
    return false;

  // Now we're in the harder dynamic case.
  Register NumElts = getOrCreateVReg(*AI.getArraySize());
  Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
  LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL);
  if (MRI->getType(NumElts) != IntPtrTy) {
    Register ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
    MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts);
    NumElts = ExtElts;
  }

  Type *Ty = AI.getAllocatedType();

  Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
  Register TySize =
      getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty)));
  MIRBuilder.buildMul(AllocSize, NumElts, TySize);

  // Round the size of the allocation up to the stack alignment size
  // by add SA-1 to the size. This doesn't overflow because we're computing
  // an address inside an alloca.
  Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
  auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
  auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
                                      MachineInstr::NoUWrap);
  auto AlignCst =
      MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
  auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);

  Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty));
  if (Alignment <= StackAlign)
    Alignment = Align(1);
  MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);

  MF->getFrameInfo().CreateVariableSizedObject(Alignment, &AI);
  assert(MF->getFrameInfo().hasVarSizedObjects());
  return true;
}

bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
  // FIXME: We may need more info about the type. Because of how LLT works,
  // we're completely discarding the i64/double distinction here (amongst
  // others). Fortunately the ABIs I know of where that matters don't use va_arg
  // anyway but that's not guaranteed.
  MIRBuilder.buildInstr(TargetOpcode::G_VAARG, {getOrCreateVReg(U)},
                        {getOrCreateVReg(*U.getOperand(0)),
                         DL->getABITypeAlign(U.getType()).value()});
  return true;
}

bool IRTranslator::translateInsertElement(const User &U,
                                          MachineIRBuilder &MIRBuilder) {
  // If it is a <1 x Ty> vector, use the scalar as it is
  // not a legal vector type in LLT.
  if (cast<FixedVectorType>(U.getType())->getNumElements() == 1)
    return translateCopy(U, *U.getOperand(1), MIRBuilder);

  Register Res = getOrCreateVReg(U);
  Register Val = getOrCreateVReg(*U.getOperand(0));
  Register Elt = getOrCreateVReg(*U.getOperand(1));
  Register Idx = getOrCreateVReg(*U.getOperand(2));
  MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
  return true;
}

bool IRTranslator::translateExtractElement(const User &U,
                                           MachineIRBuilder &MIRBuilder) {
  // If it is a <1 x Ty> vector, use the scalar as it is
  // not a legal vector type in LLT.
  if (cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements() == 1)
    return translateCopy(U, *U.getOperand(0), MIRBuilder);

  Register Res = getOrCreateVReg(U);
  Register Val = getOrCreateVReg(*U.getOperand(0));
  const auto &TLI = *MF->getSubtarget().getTargetLowering();
  unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
  Register Idx;
  if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
    if (CI->getBitWidth() != PreferredVecIdxWidth) {
      APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
      auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
      Idx = getOrCreateVReg(*NewIdxCI);
    }
  }
  if (!Idx)
    Idx = getOrCreateVReg(*U.getOperand(1));
  if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
    const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
    Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx).getReg(0);
  }
  MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
  return true;
}

bool IRTranslator::translateShuffleVector(const User &U,
                                          MachineIRBuilder &MIRBuilder) {
  ArrayRef<int> Mask;
  if (auto *SVI = dyn_cast<ShuffleVectorInst>(&U))
    Mask = SVI->getShuffleMask();
  else
    Mask = cast<ConstantExpr>(U).getShuffleMask();
  ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask);
  MIRBuilder
      .buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {getOrCreateVReg(U)},
                  {getOrCreateVReg(*U.getOperand(0)),
                   getOrCreateVReg(*U.getOperand(1))})
      .addShuffleMask(MaskAlloc);
  return true;
}

bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
  const PHINode &PI = cast<PHINode>(U);

  SmallVector<MachineInstr *, 4> Insts;
  for (auto Reg : getOrCreateVRegs(PI)) {
    auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, {Reg}, {});
    Insts.push_back(MIB.getInstr());
  }

  PendingPHIs.emplace_back(&PI, std::move(Insts));
  return true;
}

bool IRTranslator::translateAtomicCmpXchg(const User &U,
                                          MachineIRBuilder &MIRBuilder) {
  const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U);

  auto &TLI = *MF->getSubtarget().getTargetLowering();
  auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);

  Type *ResType = I.getType();
  Type *ValType = ResType->Type::getStructElementType(0);

  auto Res = getOrCreateVRegs(I);
  Register OldValRes = Res[0];
  Register SuccessRes = Res[1];
  Register Addr = getOrCreateVReg(*I.getPointerOperand());
  Register Cmp = getOrCreateVReg(*I.getCompareOperand());
  Register NewVal = getOrCreateVReg(*I.getNewValOperand());

  AAMDNodes AAMetadata;
  I.getAAMetadata(AAMetadata);

  MIRBuilder.buildAtomicCmpXchgWithSuccess(
      OldValRes, SuccessRes, Addr, Cmp, NewVal,
      *MF->getMachineMemOperand(
          MachinePointerInfo(I.getPointerOperand()), Flags,
          DL->getTypeStoreSize(ValType), getMemOpAlign(I), AAMetadata, nullptr,
          I.getSyncScopeID(), I.getSuccessOrdering(), I.getFailureOrdering()));
  return true;
}

bool IRTranslator::translateAtomicRMW(const User &U,
                                      MachineIRBuilder &MIRBuilder) {
  const AtomicRMWInst &I = cast<AtomicRMWInst>(U);
  auto &TLI = *MF->getSubtarget().getTargetLowering();
  auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);

  Type *ResType = I.getType();

  Register Res = getOrCreateVReg(I);
  Register Addr = getOrCreateVReg(*I.getPointerOperand());
  Register Val = getOrCreateVReg(*I.getValOperand());

  unsigned Opcode = 0;
  switch (I.getOperation()) {
  default:
    return false;
  case AtomicRMWInst::Xchg:
    Opcode = TargetOpcode::G_ATOMICRMW_XCHG;
    break;
  case AtomicRMWInst::Add:
    Opcode = TargetOpcode::G_ATOMICRMW_ADD;
    break;
  case AtomicRMWInst::Sub:
    Opcode = TargetOpcode::G_ATOMICRMW_SUB;
    break;
  case AtomicRMWInst::And:
    Opcode = TargetOpcode::G_ATOMICRMW_AND;
    break;
  case AtomicRMWInst::Nand:
    Opcode = TargetOpcode::G_ATOMICRMW_NAND;
    break;
  case AtomicRMWInst::Or:
    Opcode = TargetOpcode::G_ATOMICRMW_OR;
    break;
  case AtomicRMWInst::Xor:
    Opcode = TargetOpcode::G_ATOMICRMW_XOR;
    break;
  case AtomicRMWInst::Max:
    Opcode = TargetOpcode::G_ATOMICRMW_MAX;
    break;
  case AtomicRMWInst::Min:
    Opcode = TargetOpcode::G_ATOMICRMW_MIN;
    break;
  case AtomicRMWInst::UMax:
    Opcode = TargetOpcode::G_ATOMICRMW_UMAX;
    break;
  case AtomicRMWInst::UMin:
    Opcode = TargetOpcode::G_ATOMICRMW_UMIN;
    break;
  case AtomicRMWInst::FAdd:
    Opcode = TargetOpcode::G_ATOMICRMW_FADD;
    break;
  case AtomicRMWInst::FSub:
    Opcode = TargetOpcode::G_ATOMICRMW_FSUB;
    break;
  }

  AAMDNodes AAMetadata;
  I.getAAMetadata(AAMetadata);

  MIRBuilder.buildAtomicRMW(
      Opcode, Res, Addr, Val,
      *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
                                Flags, DL->getTypeStoreSize(ResType),
                                getMemOpAlign(I), AAMetadata, nullptr,
                                I.getSyncScopeID(), I.getOrdering()));
  return true;
}

bool IRTranslator::translateFence(const User &U,
                                  MachineIRBuilder &MIRBuilder) {
  const FenceInst &Fence = cast<FenceInst>(U);
  MIRBuilder.buildFence(static_cast<unsigned>(Fence.getOrdering()),
                        Fence.getSyncScopeID());
  return true;
}

bool IRTranslator::translateFreeze(const User &U,
                                   MachineIRBuilder &MIRBuilder) {
  const ArrayRef<Register> DstRegs = getOrCreateVRegs(U);
  const ArrayRef<Register> SrcRegs = getOrCreateVRegs(*U.getOperand(0));

  assert(DstRegs.size() == SrcRegs.size() &&
         "Freeze with different source and destination type?");

  for (unsigned I = 0; I < DstRegs.size(); ++I) {
    MIRBuilder.buildFreeze(DstRegs[I], SrcRegs[I]);
  }

  return true;
}

void IRTranslator::finishPendingPhis() {
#ifndef NDEBUG
  DILocationVerifier Verifier;
  GISelObserverWrapper WrapperObserver(&Verifier);
  RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
#endif // ifndef NDEBUG
  for (auto &Phi : PendingPHIs) {
    const PHINode *PI = Phi.first;
    ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
    MachineBasicBlock *PhiMBB = ComponentPHIs[0]->getParent();
    EntryBuilder->setDebugLoc(PI->getDebugLoc());
#ifndef NDEBUG
    Verifier.setCurrentInst(PI);
#endif // ifndef NDEBUG

    SmallSet<const MachineBasicBlock *, 16> SeenPreds;
    for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
      auto IRPred = PI->getIncomingBlock(i);
      ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
      for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
        if (SeenPreds.count(Pred) || !PhiMBB->isPredecessor(Pred))
          continue;
        SeenPreds.insert(Pred);
        for (unsigned j = 0; j < ValRegs.size(); ++j) {
          MachineInstrBuilder MIB(*MF, ComponentPHIs[j]);
          MIB.addUse(ValRegs[j]);
          MIB.addMBB(Pred);
        }
      }
    }
  }
}

bool IRTranslator::valueIsSplit(const Value &V,
                                SmallVectorImpl<uint64_t> *Offsets) {
  SmallVector<LLT, 4> SplitTys;
  if (Offsets && !Offsets->empty())
    Offsets->clear();
  computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets);
  return SplitTys.size() > 1;
}

bool IRTranslator::translate(const Instruction &Inst) {
  CurBuilder->setDebugLoc(Inst.getDebugLoc());

  auto &TLI = *MF->getSubtarget().getTargetLowering();
  if (TLI.fallBackToDAGISel(Inst))
    return false;

  switch (Inst.getOpcode()) {
#define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
  case Instruction::OPCODE:                                                    \
    return translate##OPCODE(Inst, *CurBuilder.get());
#include "llvm/IR/Instruction.def"
  default:
    return false;
  }
}

bool IRTranslator::translate(const Constant &C, Register Reg) {
  // We only emit constants into the entry block from here. To prevent jumpy
  // debug behaviour set the line to 0.
  if (auto CurrInstDL = CurBuilder->getDL())
    EntryBuilder->setDebugLoc(DILocation::get(C.getContext(), 0, 0,
                                              CurrInstDL.getScope(),
                                              CurrInstDL.getInlinedAt()));

  if (auto CI = dyn_cast<ConstantInt>(&C))
    EntryBuilder->buildConstant(Reg, *CI);
  else if (auto CF = dyn_cast<ConstantFP>(&C))
    EntryBuilder->buildFConstant(Reg, *CF);
  else if (isa<UndefValue>(C))
    EntryBuilder->buildUndef(Reg);
  else if (isa<ConstantPointerNull>(C))
    EntryBuilder->buildConstant(Reg, 0);
  else if (auto GV = dyn_cast<GlobalValue>(&C))
    EntryBuilder->buildGlobalValue(Reg, GV);
  else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
    if (!CAZ->getType()->isVectorTy())
      return false;
    // Return the scalar if it is a <1 x Ty> vector.
    if (CAZ->getNumElements() == 1)
      return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder.get());
    SmallVector<Register, 4> Ops;
    for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
      Constant &Elt = *CAZ->getElementValue(i);
      Ops.push_back(getOrCreateVReg(Elt));
    }
    EntryBuilder->buildBuildVector(Reg, Ops);
  } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
    // Return the scalar if it is a <1 x Ty> vector.
    if (CV->getNumElements() == 1)
      return translateCopy(C, *CV->getElementAsConstant(0),
                           *EntryBuilder.get());
    SmallVector<Register, 4> Ops;
    for (unsigned i = 0; i < CV->getNumElements(); ++i) {
      Constant &Elt = *CV->getElementAsConstant(i);
      Ops.push_back(getOrCreateVReg(Elt));
    }
    EntryBuilder->buildBuildVector(Reg, Ops);
  } else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
    switch(CE->getOpcode()) {
#define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
  case Instruction::OPCODE:                                                    \
    return translate##OPCODE(*CE, *EntryBuilder.get());
#include "llvm/IR/Instruction.def"
    default:
      return false;
    }
  } else if (auto CV = dyn_cast<ConstantVector>(&C)) {
    if (CV->getNumOperands() == 1)
      return translateCopy(C, *CV->getOperand(0), *EntryBuilder.get());
    SmallVector<Register, 4> Ops;
    for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
      Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
    }
    EntryBuilder->buildBuildVector(Reg, Ops);
  } else if (auto *BA = dyn_cast<BlockAddress>(&C)) {
    EntryBuilder->buildBlockAddress(Reg, BA);
  } else
    return false;

  return true;
}

void IRTranslator::finalizeBasicBlock() {
  for (auto &BTB : SL->BitTestCases) {
    // Emit header first, if it wasn't already emitted.
    if (!BTB.Emitted)
      emitBitTestHeader(BTB, BTB.Parent);

    BranchProbability UnhandledProb = BTB.Prob;
    for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
      UnhandledProb -= BTB.Cases[j].ExtraProb;
      // Set the current basic block to the mbb we wish to insert the code into
      MachineBasicBlock *MBB = BTB.Cases[j].ThisBB;
      // If all cases cover a contiguous range, it is not necessary to jump to
      // the default block after the last bit test fails. This is because the
      // range check during bit test header creation has guaranteed that every
      // case here doesn't go outside the range. In this case, there is no need
      // to perform the last bit test, as it will always be true. Instead, make
      // the second-to-last bit-test fall through to the target of the last bit
      // test, and delete the last bit test.

      MachineBasicBlock *NextMBB;
      if (BTB.ContiguousRange && j + 2 == ej) {
        // Second-to-last bit-test with contiguous range: fall through to the
        // target of the final bit test.
        NextMBB = BTB.Cases[j + 1].TargetBB;
      } else if (j + 1 == ej) {
        // For the last bit test, fall through to Default.
        NextMBB = BTB.Default;
      } else {
        // Otherwise, fall through to the next bit test.
        NextMBB = BTB.Cases[j + 1].ThisBB;
      }

      emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB);

      // FIXME delete this block below?
      if (BTB.ContiguousRange && j + 2 == ej) {
        // Since we're not going to use the final bit test, remove it.
        BTB.Cases.pop_back();
        break;
      }
    }
    // This is "default" BB. We have two jumps to it. From "header" BB and from
    // last "case" BB, unless the latter was skipped.
    CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(),
                                   BTB.Default->getBasicBlock()};
    addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent);
    if (!BTB.ContiguousRange) {
      addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB);
    }
  }
  SL->BitTestCases.clear();

  for (auto &JTCase : SL->JTCases) {
    // Emit header first, if it wasn't already emitted.
    if (!JTCase.first.Emitted)
      emitJumpTableHeader(JTCase.second, JTCase.first, JTCase.first.HeaderBB);

    emitJumpTable(JTCase.second, JTCase.second.MBB);
  }
  SL->JTCases.clear();

  for (auto &SwCase : SL->SwitchCases)
    emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);
  SL->SwitchCases.clear();
}

void IRTranslator::finalizeFunction() {
  // Release the memory used by the different maps we
  // needed during the translation.
  PendingPHIs.clear();
  VMap.reset();
  FrameIndices.clear();
  MachinePreds.clear();
  // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
  // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
  // destroying it twice (in ~IRTranslator() and ~LLVMContext())
  EntryBuilder.reset();
  CurBuilder.reset();
  FuncInfo.clear();
}

/// Returns true if a BasicBlock \p BB within a variadic function contains a
/// variadic musttail call.
static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
  if (!IsVarArg)
    return false;

  // Walk the block backwards, because tail calls usually only appear at the end
  // of a block.
  return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) {
    const auto *CI = dyn_cast<CallInst>(&I);
    return CI && CI->isMustTailCall();
  });
}

bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
  MF = &CurMF;
  const Function &F = MF->getFunction();
  if (F.empty())
    return false;
  GISelCSEAnalysisWrapper &Wrapper =
      getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
  // Set the CSEConfig and run the analysis.
  GISelCSEInfo *CSEInfo = nullptr;
  TPC = &getAnalysis<TargetPassConfig>();
  bool EnableCSE = EnableCSEInIRTranslator.getNumOccurrences()
                       ? EnableCSEInIRTranslator
                       : TPC->isGISelCSEEnabled();

  if (EnableCSE) {
    EntryBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
    CSEInfo = &Wrapper.get(TPC->getCSEConfig());
    EntryBuilder->setCSEInfo(CSEInfo);
    CurBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
    CurBuilder->setCSEInfo(CSEInfo);
  } else {
    EntryBuilder = std::make_unique<MachineIRBuilder>();
    CurBuilder = std::make_unique<MachineIRBuilder>();
  }
  CLI = MF->getSubtarget().getCallLowering();
  CurBuilder->setMF(*MF);
  EntryBuilder->setMF(*MF);
  MRI = &MF->getRegInfo();
  DL = &F.getParent()->getDataLayout();
  ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
  const TargetMachine &TM = MF->getTarget();
  TM.resetTargetOptions(F);
  EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F);
  FuncInfo.MF = MF;
  if (EnableOpts)
    FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
  else
    FuncInfo.BPI = nullptr;

  FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF);

  const auto &TLI = *MF->getSubtarget().getTargetLowering();

  SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo);
  SL->init(TLI, TM, *DL);


  assert(PendingPHIs.empty() && "stale PHIs");

  if (!DL->isLittleEndian()) {
    // Currently we don't properly handle big endian code.
    OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                               F.getSubprogram(), &F.getEntryBlock());
    R << "unable to translate in big endian mode";
    reportTranslationError(*MF, *TPC, *ORE, R);
  }

  // Release the per-function state when we return, whether we succeeded or not.
  auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); });

  // Setup a separate basic-block for the arguments and constants
  MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
  MF->push_back(EntryBB);
  EntryBuilder->setMBB(*EntryBB);

  DebugLoc DbgLoc = F.getEntryBlock().getFirstNonPHI()->getDebugLoc();
  SwiftError.setFunction(CurMF);
  SwiftError.createEntriesInEntryBlock(DbgLoc);

  bool IsVarArg = F.isVarArg();
  bool HasMustTailInVarArgFn = false;

  // Create all blocks, in IR order, to preserve the layout.
  for (const BasicBlock &BB: F) {
    auto *&MBB = BBToMBB[&BB];

    MBB = MF->CreateMachineBasicBlock(&BB);
    MF->push_back(MBB);

    if (BB.hasAddressTaken())
      MBB->setHasAddressTaken();

    if (!HasMustTailInVarArgFn)
      HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB);
  }

  MF->getFrameInfo().setHasMustTailInVarArgFunc(HasMustTailInVarArgFn);

  // Make our arguments/constants entry block fallthrough to the IR entry block.
  EntryBB->addSuccessor(&getMBB(F.front()));

  if (CLI->fallBackToDAGISel(*MF)) {
    OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                               F.getSubprogram(), &F.getEntryBlock());
    R << "unable to lower function: " << ore::NV("Prototype", F.getType());
    reportTranslationError(*MF, *TPC, *ORE, R);
    return false;
  }

  // Lower the actual args into this basic block.
  SmallVector<ArrayRef<Register>, 8> VRegArgs;
  for (const Argument &Arg: F.args()) {
    if (DL->getTypeStoreSize(Arg.getType()).isZero())
      continue; // Don't handle zero sized types.
    ArrayRef<Register> VRegs = getOrCreateVRegs(Arg);
    VRegArgs.push_back(VRegs);

    if (Arg.hasSwiftErrorAttr()) {
      assert(VRegs.size() == 1 && "Too many vregs for Swift error");
      SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(), VRegs[0]);
    }
  }

  if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) {
    OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                               F.getSubprogram(), &F.getEntryBlock());
    R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
    reportTranslationError(*MF, *TPC, *ORE, R);
    return false;
  }

  // Need to visit defs before uses when translating instructions.
  GISelObserverWrapper WrapperObserver;
  if (EnableCSE && CSEInfo)
    WrapperObserver.addObserver(CSEInfo);
  {
    ReversePostOrderTraversal<const Function *> RPOT(&F);
#ifndef NDEBUG
    DILocationVerifier Verifier;
    WrapperObserver.addObserver(&Verifier);
#endif // ifndef NDEBUG
    RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
    RAIIMFObserverInstaller ObsInstall(*MF, WrapperObserver);
    for (const BasicBlock *BB : RPOT) {
      MachineBasicBlock &MBB = getMBB(*BB);
      // Set the insertion point of all the following translations to
      // the end of this basic block.
      CurBuilder->setMBB(MBB);
      HasTailCall = false;
      for (const Instruction &Inst : *BB) {
        // If we translated a tail call in the last step, then we know
        // everything after the call is either a return, or something that is
        // handled by the call itself. (E.g. a lifetime marker or assume
        // intrinsic.) In this case, we should stop translating the block and
        // move on.
        if (HasTailCall)
          break;
#ifndef NDEBUG
        Verifier.setCurrentInst(&Inst);
#endif // ifndef NDEBUG
        if (translate(Inst))
          continue;

        OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
                                   Inst.getDebugLoc(), BB);
        R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);

        if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
          std::string InstStrStorage;
          raw_string_ostream InstStr(InstStrStorage);
          InstStr << Inst;

          R << ": '" << InstStr.str() << "'";
        }

        reportTranslationError(*MF, *TPC, *ORE, R);
        return false;
      }

      finalizeBasicBlock();
    }
#ifndef NDEBUG
    WrapperObserver.removeObserver(&Verifier);
#endif
  }

  finishPendingPhis();

  SwiftError.propagateVRegs();

  // Merge the argument lowering and constants block with its single
  // successor, the LLVM-IR entry block.  We want the basic block to
  // be maximal.
  assert(EntryBB->succ_size() == 1 &&
         "Custom BB used for lowering should have only one successor");
  // Get the successor of the current entry block.
  MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
  assert(NewEntryBB.pred_size() == 1 &&
         "LLVM-IR entry block has a predecessor!?");
  // Move all the instruction from the current entry block to the
  // new entry block.
  NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
                    EntryBB->end());

  // Update the live-in information for the new entry block.
  for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
    NewEntryBB.addLiveIn(LiveIn);
  NewEntryBB.sortUniqueLiveIns();

  // Get rid of the now empty basic block.
  EntryBB->removeSuccessor(&NewEntryBB);
  MF->remove(EntryBB);
  MF->DeleteMachineBasicBlock(EntryBB);

  assert(&MF->front() == &NewEntryBB &&
         "New entry wasn't next in the list of basic block!");

  // Initialize stack protector information.
  StackProtector &SP = getAnalysis<StackProtector>();
  SP.copyToMachineFrameInfo(MF->getFrameInfo());

  return false;
}
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								//===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
+								//
-												Update the file headers across all of the LLVM projects in the monorepo
to reflect the new license.

We understand that people may be surprised that we're moving the header
entirely to discuss the new license. We checked this carefully with the
Foundation's lawyer and we believe this is the correct approach.

Essentially, all code in the project is now made available by the LLVM
project under our new license, so you will see that the license headers
include that license only. Some of our contributors have contributed
code under our old license, and accordingly, we have retained a copy of
our old license notice in the top-level files in each project and
repository.

llvm-svn: 351636

											
										
										
											2019-01-19 16:50:56 +08:00
+								// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 								// See https://llvm.org/LICENSE.txt for license information.
 								// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
+								//
 								//===----------------------------------------------------------------------===//
 								/// \file
 								/// This file implements the IRTranslator class.
 								//===----------------------------------------------------------------------===//
 								#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
-												[GlobalISel][IRTranslator] Use RPO traversal when visiting blocks to translate.

Previously we were just visiting the blocks in the function in IR order, which
is rather arbitrary. Therefore we wouldn't always visit defs before uses, but
the translation code relies on this assumption in some places.

Only codegen change seen in tests is an elision of a redundant copy.

Fixes PR38396

llvm-svn: 338476

											
										
										
											2018-08-01 10:17:42 +08:00
+								#include "llvm/ADT/PostOrderIterator.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/ADT/STLExtras.h"
-												[GlobalISel] Finalize translated function on scope exit. NFC.

This is the compromise between having a per-function IRTranslator
and manually managing the per-function state.

llvm-svn: 296046

											
										
										
											2017-02-24 07:57:28 +08:00
+								#include "llvm/ADT/ScopeExit.h"
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								#include "llvm/ADT/SmallSet.h"
-												[GlobalISel] Add the necessary plumbing to lower formal arguments.

llvm-svn: 260579

											
										
										
											2016-02-12 03:59:41 +08:00
+								#include "llvm/ADT/SmallVector.h"
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								#include "llvm/Analysis/BranchProbabilityInfo.h"
-												GlobalISel: Preserve load/store metadata in IRTranslator

This was dropping the invariant metadata on dead argument loads, so
they weren't deleted.

Atomics still need to be fixed the same way. Also, apparently store
was never preserving dereferencable which should also be fixed.

											
										
										
											2020-01-13 03:10:42 +08:00
+								#include "llvm/Analysis/Loads.h"
-												Rename OptimizationDiagnosticInfo.* to OptimizationRemarkEmitter.*

Sync it up with the name of the class actually defined here.  This has been
bothering me for a while...

llvm-svn: 315249

											
										
										
											2017-10-10 07:19:02 +08:00
+								#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-												[GlobalISel] Add ISel support for @llvm.lifetime.start and @llvm.lifetime.end

This adds ISel support for lifetime markers in opt levels above O0.

It also updates the arm64-irtranslator test, and updates some AArch64 tests that
use them for added coverage.

It also adds a testcase taken from the X86 codegen tests which verified a bug
caused by lifetime markers + stack colouring in the past. This is intended to
make sure that GISel doesn't re-introduce the bug.

(This is basically a straight copy from what SelectionDAG does in
SelectionDAGBuilder.cpp)

https://reviews.llvm.org/D57187

llvm-svn: 352410

											
										
										
											2019-01-29 03:22:29 +08:00
+								#include "llvm/Analysis/ValueTracking.h"
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								#include "llvm/CodeGen/Analysis.h"
-												Sort the remaining #include lines in include/... and lib/....

I did this a long time ago with a janky python script, but now
clang-format has built-in support for this. I fed clang-format every
line with a #include and let it re-sort things according to the precise
LLVM rules for include ordering baked into clang-format these days.

I've reverted a number of files where the results of sorting includes
isn't healthy. Either places where we have legacy code relying on
particular include ordering (where possible, I'll fix these separately)
or where we have particular formatting around #include lines that
I didn't want to disturb in this patch.

This patch is *entirely* mechanical. If you get merge conflicts or
anything, just ignore the changes in this patch and run clang-format
over your #include lines in the files.

Sorry for any noise here, but it is important to keep these things
stable. I was seeing an increasing number of patches with irrelevant
re-ordering of #include lines because clang-format was used. This patch
at least isolates that churn, makes it easy to skip when resolving
conflicts, and gets us to a clean baseline (again).

llvm-svn: 304787

											
										
										
											2017-06-06 19:49:48 +08:00
+								#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
-												[GlobalISel] Introduce InlineAsmLowering class

Summary:
Similar to the CallLowering class used for lowering LLVM IR calls to MIR calls,
we introduce a separate class for lowering LLVM IR inline asm to MIR INLINEASM.

There is no functional change yet, all existing tests should pass.

Reviewers: arsenm, dsanders, aemerson, volkan, t.p.northover, paquette

Reviewed By: aemerson

Subscribers: gargaroff, wdng, mgorny, rovka, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78316

											
										
										
											2020-04-08 20:40:43 +08:00
+								#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/CodeGen/LowLevelType.h"
 								#include "llvm/CodeGen/MachineBasicBlock.h"
-												GlobalISel: implement alloca instruction

llvm-svn: 276433

											
										
										
											2016-07-23 00:59:52 +08:00
+								#include "llvm/CodeGen/MachineFrameInfo.h"
-												Sort the remaining #include lines in include/... and lib/....

I did this a long time ago with a janky python script, but now
clang-format has built-in support for this. I fed clang-format every
line with a #include and let it re-sort things according to the precise
LLVM rules for include ordering baked into clang-format these days.

I've reverted a number of files where the results of sorting includes
isn't healthy. Either places where we have legacy code relying on
particular include ordering (where possible, I'll fix these separately)
or where we have particular formatting around #include lines that
I didn't want to disturb in this patch.

This patch is *entirely* mechanical. If you get merge conflicts or
anything, just ignore the changes in this patch and run clang-format
over your #include lines in the files.

Sorry for any noise here, but it is important to keep these things
stable. I was seeing an increasing number of patches with irrelevant
re-ordering of #include lines because clang-format was used. This patch
at least isolates that churn, makes it easy to skip when resolving
conflicts, and gets us to a clean baseline (again).

llvm-svn: 304787

											
										
										
											2017-06-06 19:49:48 +08:00
+								#include "llvm/CodeGen/MachineFunction.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/CodeGen/MachineInstrBuilder.h"
 								#include "llvm/CodeGen/MachineMemOperand.h"
-												GlobalISel: Handle llvm.localescape

This one is pretty easy and shrinks the list of unhandled
intrinsics. I'm not sure how relevant the insert point is. Using the
insert position of EntryBuilder will place this after
constants. SelectionDAG seems to end up emitting these after argument
copies and before anything else, but I don't think it really
matters. This also ends up emitting these in the opposite order from
SelectionDAG, but I don't think that matters either.

This also needs a fix to stop the later passes dropping this as a dead
instruction. DeadMachineInstructionElim's version of isDead special
cases LOCAL_ESCAPE for some reason, and I'm not sure why it's excluded
from MachineInstr::isLabel (or why isDead doesn't check it).

I also noticed DeadMachineInstructionElim never considers inline asm
as dead, but GlobalISel will drop asm with no constraints.

											
										
										
											2020-07-29 21:48:26 +08:00
+								#include "llvm/CodeGen/MachineModuleInfo.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/CodeGen/MachineOperand.h"
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								#include "llvm/CodeGen/MachineRegisterInfo.h"
-												CodeGen: Remove pipeline dependencies on StackProtector; NFC

This re-applies r336929 with a fix to accomodate for the Mips target
scheduling multiple SelectionDAG instances into the pass pipeline.

PrologEpilogInserter and StackColoring depend on the StackProtector analysis
being alive from the point it is run until PEI, which requires that they are all
scheduled in the same FunctionPassManager. Inserting a (machine) ModulePass
between StackProtector and PEI results in these passes being in separate
FunctionPassManagers and the StackProtector is not available for PEI.

PEI and StackColoring don't use much information from the StackProtector pass,
so transfering the required information to MachineFrameInfo is cleaner than
keeping the StackProtector pass around. This commit moves the SSP layout
information to MFI instead of keeping it in the pass.

This patch set (D37580, D37581, D37582, D37583, D37584, D37585, D37586, D37587)
is a first draft of the pagerando implementation described in
http://lists.llvm.org/pipermail/llvm-dev/2017-June/113794.html.

Patch by Stephen Crane <sjc@immunant.com>

Differential Revision: https://reviews.llvm.org/D49256

llvm-svn: 336964

											
										
										
											2018-07-13 08:08:38 +08:00
+								#include "llvm/CodeGen/StackProtector.h"
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								#include "llvm/CodeGen/SwitchLoweringUtils.h"
-												Fix a bunch more layering of CodeGen headers that are in Target

All these headers already depend on CodeGen headers so moving them into
CodeGen fixes the layering (since CodeGen depends on Target, not the
other way around).

llvm-svn: 318490

											
										
										
											2017-11-17 09:07:10 +08:00
+								#include "llvm/CodeGen/TargetFrameLowering.h"
-												[GlobalISel] When a tail call is emitted in a block, stop translating it

This fixes a crash in tail call translation caused by assume and lifetime_end
intrinsics.

It's possible to have instructions other than a return after a tail call which
will still have `Analysis::isInTailCallPosition` return true. (Namely,
lifetime_end and assume intrinsics.)

If we emit a tail call, we should stop translating instructions in the block.
Otherwise, we can end up emitting an extra return, or dead instructions in
general. This makes the verifier unhappy, and is generally unfortunate for
codegen.

This also removes the code from AArch64CallLowering that checks if we have a
tail call when lowering a return. This is covered by the new code now.

Also update call-translator-tail-call.ll to show that we now properly tail call
in the presence of lifetime_end and assume.

Differential Revision: https://reviews.llvm.org/D67415

llvm-svn: 371572

											
										
										
											2019-09-11 07:34:45 +08:00
+								#include "llvm/CodeGen/TargetInstrInfo.h"
-												Fix a bunch more layering of CodeGen headers that are in Target

All these headers already depend on CodeGen headers so moving them into
CodeGen fixes the layering (since CodeGen depends on Target, not the
other way around).

llvm-svn: 318490

											
										
										
											2017-11-17 09:07:10 +08:00
+								#include "llvm/CodeGen/TargetLowering.h"
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								#include "llvm/CodeGen/TargetPassConfig.h"
-												Fix a bunch more layering of CodeGen headers that are in Target

All these headers already depend on CodeGen headers so moving them into
CodeGen fixes the layering (since CodeGen depends on Target, not the
other way around).

llvm-svn: 318490

											
										
										
											2017-11-17 09:07:10 +08:00
+								#include "llvm/CodeGen/TargetRegisterInfo.h"
 								#include "llvm/CodeGen/TargetSubtargetInfo.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/IR/BasicBlock.h"
-												[GlobalISel][IRTranslator] Use RPO traversal when visiting blocks to translate.

Previously we were just visiting the blocks in the function in IR order, which
is rather arbitrary. Therefore we wouldn't always visit defs before uses, but
the translation code relies on this assumption in some places.

Only codegen change seen in tests is an elision of a redundant copy.

Fixes PR38396

llvm-svn: 338476

											
										
										
											2018-08-01 10:17:42 +08:00
+								#include "llvm/IR/CFG.h"
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								#include "llvm/IR/Constant.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/IR/Constants.h"
 								#include "llvm/IR/DataLayout.h"
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								#include "llvm/IR/DebugInfo.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/IR/DerivedTypes.h"
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
+								#include "llvm/IR/Function.h"
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
+								#include "llvm/IR/GetElementPtrTypeIterator.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/IR/InlineAsm.h"
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								#include "llvm/IR/InstrTypes.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/IR/Instructions.h"
-												GlobalISel: support translation of intrinsic calls.

These come in two variants for now: G_INTRINSIC and G_INTRINSIC_W_SIDE_EFFECTS.
We may decide to split the latter up with finer-grained restrictions later, if
necessary.

llvm-svn: 277224

											
										
										
											2016-07-30 06:32:36 +08:00
+								#include "llvm/IR/IntrinsicInst.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/IR/Intrinsics.h"
 								#include "llvm/IR/LLVMContext.h"
 								#include "llvm/IR/Metadata.h"
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								#include "llvm/IR/PatternMatch.h"
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								#include "llvm/IR/Type.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/IR/User.h"
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								#include "llvm/IR/Value.h"
-												Sink all InitializePasses.h includes

This file lists every pass in LLVM, and is included by Pass.h, which is
very popular. Every time we add, remove, or rename a pass in LLVM, it
caused lots of recompilation.

I found this fact by looking at this table, which is sorted by the
number of times a file was changed over the last 100,000 git commits
multiplied by the number of object files that depend on it in the
current checkout:
  recompiles    touches affected_files  header
  342380        95      3604    llvm/include/llvm/ADT/STLExtras.h
  314730        234     1345    llvm/include/llvm/InitializePasses.h
  307036        118     2602    llvm/include/llvm/ADT/APInt.h
  213049        59      3611    llvm/include/llvm/Support/MathExtras.h
  170422        47      3626    llvm/include/llvm/Support/Compiler.h
  162225        45      3605    llvm/include/llvm/ADT/Optional.h
  158319        63      2513    llvm/include/llvm/ADT/Triple.h
  140322        39      3598    llvm/include/llvm/ADT/StringRef.h
  137647        59      2333    llvm/include/llvm/Support/Error.h
  131619        73      1803    llvm/include/llvm/Support/FileSystem.h

Before this change, touching InitializePasses.h would cause 1345 files
to recompile. After this change, touching it only causes 550 compiles in
an incremental rebuild.

Reviewers: bkramer, asbirlea, bollu, jdoerfert

Differential Revision: https://reviews.llvm.org/D70211

											
										
										
											2019-11-14 05:15:01 +08:00
+								#include "llvm/InitializePasses.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/MC/MCContext.h"
 								#include "llvm/Pass.h"
 								#include "llvm/Support/Casting.h"
 								#include "llvm/Support/CodeGen.h"
 								#include "llvm/Support/Debug.h"
 								#include "llvm/Support/ErrorHandling.h"
 								#include "llvm/Support/LowLevelTypeImpl.h"
 								#include "llvm/Support/MathExtras.h"
 								#include "llvm/Support/raw_ostream.h"
-												GlobalISel: support translation of intrinsic calls.

These come in two variants for now: G_INTRINSIC and G_INTRINSIC_W_SIDE_EFFECTS.
We may decide to split the latter up with finer-grained restrictions later, if
necessary.

llvm-svn: 277224

											
										
										
											2016-07-30 06:32:36 +08:00
+								#include "llvm/Target/TargetIntrinsicInfo.h"
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include "llvm/Target/TargetMachine.h"
 								#include <algorithm>
 								#include <cassert>
-												[GlobalISel] Enable usage of BranchProbabilityInfo in IRTranslator.

We weren't using this before, so none of the MachineFunction CFG edges had the
branch probability information added. As a result, block placement later in the
pipeline was flying blind.

This is enabled only with optimizations enabled like SelectionDAG.

Differential Revision: https://reviews.llvm.org/D86824

											
										
										
											2020-08-29 07:21:34 +08:00
+								#include <cstddef>
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
+								#include <cstdint>
 								#include <iterator>
 								#include <string>
 								#include <utility>
 								#include <vector>
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
 								#define DEBUG_TYPE "irtranslator"
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
+								using namespace llvm;
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								static cl::opt<bool>
 								    EnableCSEInIRTranslator("enable-cse-in-irtranslator",
 								                            cl::desc("Should enable CSE in irtranslator"),
 								                            cl::Optional, cl::init(false));
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
+								char IRTranslator::ID = 0;
-												[CodeGen] Fix some Clang-tidy modernize-use-using and Include What You Use warnings; other minor fixes (NFC).

llvm-svn: 306341

											
										
										
											2017-06-27 06:44:03 +08:00
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
 								                false, false)
 								INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
-												[GlobalISel] Add missing pass dependencies for IRTranslator

The IRTranslator depends on the branch probability info pass when the
optimization level is different than None and it depends all the time on
the StackProtector pass.

We have to explicitly call out pass dependencies otherwise the pass manager
may not be able to schedule the IRTranslator.

Before this patch, we were lucky because previous passes depend on the branch
probability info pass (like the Global Variable Optimization) and the stack
protector pass is initialized in initializeCodeGen.
However, if the target has a custom pipeline without any passes like Global
Variable Optimization, the pipeline creation will fail, at least because of
the branch probability info pass dependency (it is unlikely that
initializeCodeGen is not called).

This patch adds the missing dependencies to the IRTranslator.

Differential Revision: https://reviews.llvm.org/D89063

											
										
										
											2020-10-09 04:54:24 +08:00
+								INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
 								INITIALIZE_PASS_DEPENDENCY(StackProtector)
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
-												GlobalISel: remove redundant ';'s. NFC

llvm-svn: 276723

											
										
										
											2016-07-26 11:29:18 +08:00
+								                false, false)
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
-												[GlobalISel] Emit opt remarks on isel fallbacks.

Having more fine-grained information on the specific construct that
caused us to fallback is valuable for large-scale data collection.

We still have the fallback warning, that's also used for FastISel.
We still need to remove the fallback warning, and teach FastISel to also
emit remarks (it currently has a combination of the warning, stats, and
debug prints: the remarks could unify all three).

The abort-on-fallback path could also be better handled using remarks:
one could imagine a "-Rpass-error", analoguous to "-Werror", which would
promote missed/failed remarks to errors.  It's not clear whether that
would be useful for other remarks though, so we're not there yet.

llvm-svn: 296013

											
										
										
											2017-02-24 05:05:42 +08:00
+								static void reportTranslationError(MachineFunction &MF,
 								                                   const TargetPassConfig &TPC,
 								                                   OptimizationRemarkEmitter &ORE,
 								                                   OptimizationRemarkMissed &R) {
 								  MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
 								  // Print the function name explicitly if we don't have a debug location (which
 								  // makes the diagnostic less useful) or if we're going to emit a raw error.
 								  if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
 								    R << (" (in function: " + MF.getName() + ")").str();
 								  if (TPC.isGlobalISelAbortEnabled())
 								    report_fatal_error(R.getMsg());
 								  else
 								    ORE.emit(R);
-												GlobalISel: improve error diagnostics when IRTranslation fails.

llvm-svn: 286190

											
										
										
											2016-11-08 09:12:17 +08:00
+								}
-												[GlobalISel] Enable usage of BranchProbabilityInfo in IRTranslator.

We weren't using this before, so none of the MachineFunction CFG edges had the
branch probability information added. As a result, block placement later in the
pipeline was flying blind.

This is enabled only with optimizations enabled like SelectionDAG.

Differential Revision: https://reviews.llvm.org/D86824

											
										
										
											2020-08-29 07:21:34 +08:00
+								IRTranslator::IRTranslator(CodeGenOpt::Level optlevel)
 								    : MachineFunctionPass(ID), OptLevel(optlevel) {}
-												[GlobalISel][IRTranslator] Change the ownership of the MIRBuilder field.

llvm-svn: 260551

											
										
										
											2016-02-12 01:53:23 +08:00
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								#ifndef NDEBUG
-												Give helper classes/functions local linkage. NFC.

llvm-svn: 351016

											
										
										
											2019-01-13 02:36:22 +08:00
+								namespace {
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								/// Verify that every instruction created has the same DILocation as the
 								/// instruction being translated.
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								class DILocationVerifier : public GISelChangeObserver {
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								  const Instruction *CurrInst = nullptr;
 								public:
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								  DILocationVerifier() = default;
 								  ~DILocationVerifier() = default;
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
 								  const Instruction *getCurrentInst() const { return CurrInst; }
 								  void setCurrentInst(const Instruction *Inst) { CurrInst = Inst; }
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								  void erasingInstr(MachineInstr &MI) override {}
 								  void changingInstr(MachineInstr &MI) override {}
 								  void changedInstr(MachineInstr &MI) override {}
 								  void createdInstr(MachineInstr &MI) override {
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								    assert(getCurrentInst() && "Inserted instruction without a current MI");
 								    // Only print the check message if we're actually checking it.
 								#ifndef NDEBUG
 								    LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
 								                      << " was copied to " << MI);
 								#endif
-												[GlobalISel][IRTranslator] Add debug loc with line 0 to constants emitted into the entry block.

Constants, including G_GLOBAL_VALUE, are all emitted into the entry block which
lets us use the vreg def assuming it dominates all other users. However, it can
cause jumpy debug behaviour since the DebugLoc attached to these MIs are from
a user instruction that could be in a different block.

Fixes PR40887.

Differential Revision: https://reviews.llvm.org/D63286

llvm-svn: 363331

											
										
										
											2019-06-14 06:15:35 +08:00
+								    // We allow insts in the entry block to have a debug loc line of 0 because
 								    // they could have originated from constants, and we don't want a jumpy
 								    // debug experience.
 								    assert((CurrInst->getDebugLoc() == MI.getDebugLoc() ||
 								            MI.getDebugLoc().getLine() == 0) &&
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								           "Line info was not transferred to all instructions");
 								  }
 								};
-												Give helper classes/functions local linkage. NFC.

llvm-svn: 351016

											
										
										
											2019-01-13 02:36:22 +08:00
+								} // namespace
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								#endif // ifndef NDEBUG
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
-												CodeGen: Remove pipeline dependencies on StackProtector; NFC

This re-applies r336929 with a fix to accomodate for the Mips target
scheduling multiple SelectionDAG instances into the pass pipeline.

PrologEpilogInserter and StackColoring depend on the StackProtector analysis
being alive from the point it is run until PEI, which requires that they are all
scheduled in the same FunctionPassManager. Inserting a (machine) ModulePass
between StackProtector and PEI results in these passes being in separate
FunctionPassManagers and the StackProtector is not available for PEI.

PEI and StackColoring don't use much information from the StackProtector pass,
so transfering the required information to MachineFrameInfo is cleaner than
keeping the StackProtector pass around. This commit moves the SSP layout
information to MFI instead of keeping it in the pass.

This patch set (D37580, D37581, D37582, D37583, D37584, D37585, D37586, D37587)
is a first draft of the pagerando implementation described in
http://lists.llvm.org/pipermail/llvm-dev/2017-June/113794.html.

Patch by Stephen Crane <sjc@immunant.com>

Differential Revision: https://reviews.llvm.org/D49256

llvm-svn: 336964

											
										
										
											2018-07-13 08:08:38 +08:00
+								  AU.addRequired<StackProtector>();
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								  AU.addRequired<TargetPassConfig>();
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								  AU.addRequired<GISelCSEAnalysisWrapperPass>();
-												[GlobalISel] Enable usage of BranchProbabilityInfo in IRTranslator.

We weren't using this before, so none of the MachineFunction CFG edges had the
branch probability information added. As a result, block placement later in the
pipeline was flying blind.

This is enabled only with optimizations enabled like SelectionDAG.

Differential Revision: https://reviews.llvm.org/D86824

											
										
										
											2020-08-29 07:21:34 +08:00
+								  if (OptLevel != CodeGenOpt::None)
 								    AU.addRequired<BranchProbabilityInfoWrapperPass>();
-												CodeGen: Remove pipeline dependencies on StackProtector; NFC

This re-applies r336929 with a fix to accomodate for the Mips target
scheduling multiple SelectionDAG instances into the pass pipeline.

PrologEpilogInserter and StackColoring depend on the StackProtector analysis
being alive from the point it is run until PEI, which requires that they are all
scheduled in the same FunctionPassManager. Inserting a (machine) ModulePass
between StackProtector and PEI results in these passes being in separate
FunctionPassManagers and the StackProtector is not available for PEI.

PEI and StackColoring don't use much information from the StackProtector pass,
so transfering the required information to MachineFrameInfo is cleaner than
keeping the StackProtector pass around. This commit moves the SSP layout
information to MFI instead of keeping it in the pass.

This patch set (D37580, D37581, D37582, D37583, D37584, D37585, D37586, D37587)
is a first draft of the pagerando implementation described in
http://lists.llvm.org/pipermail/llvm-dev/2017-June/113794.html.

Patch by Stephen Crane <sjc@immunant.com>

Differential Revision: https://reviews.llvm.org/D49256

llvm-svn: 336964

											
										
										
											2018-07-13 08:08:38 +08:00
+								  getSelectionDAGFallbackAnalysisUsage(AU);
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								  MachineFunctionPass::getAnalysisUsage(AU);
 								}
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								IRTranslator::ValueToVRegInfo::VRegListT &
 								IRTranslator::allocateVRegs(const Value &Val) {
-												[[GlobalISel][IRTranslator] Fix a crash when the use of an extractvalue is a non-dominated metadata use.

We don't expect uses to come before defs in the CFG, so allocateVRegs() asserted.

Fixes PR48211

											
										
										
											2020-12-13 06:57:36 +08:00
+								  auto VRegsIt = VMap.findVRegs(Val);
 								  if (VRegsIt != VMap.vregs_end())
 								    return *VRegsIt->second;
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  auto *Regs = VMap.getVRegs(Val);
 								  auto *Offsets = VMap.getOffsets(Val);
 								  SmallVector<LLT, 4> SplitTys;
 								  computeValueLLTs(*DL, *Val.getType(), SplitTys,
 								                   Offsets->empty() ? Offsets : nullptr);
 								  for (unsigned i = 0; i < SplitTys.size(); ++i)
 								    Regs->push_back(0);
 								  return *Regs;
 								}
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  auto VRegsIt = VMap.findVRegs(Val);
 								  if (VRegsIt != VMap.vregs_end())
 								    return *VRegsIt->second;
 								  if (Val.getType()->isVoidTy())
 								    return *VMap.getVRegs(Val);
 								  // Create entry for this type.
 								  auto *VRegs = VMap.getVRegs(Val);
 								  auto *Offsets = VMap.getOffsets(Val);
-												GlobalISel: rework getOrCreateVReg to avoid double lookup. NFC.

Thanks to Quentin for suggesting the refactoring.

llvm-svn: 293087

											
										
										
											2017-01-26 04:58:22 +08:00
 								  assert(Val.getType()->isSized() &&
 								         "Don't know how to create an empty vreg");
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  SmallVector<LLT, 4> SplitTys;
 								  computeValueLLTs(*DL, *Val.getType(), SplitTys,
 								                   Offsets->empty() ? Offsets : nullptr);
 								  if (!isa<Constant>(Val)) {
 								    for (auto Ty : SplitTys)
 								      VRegs->push_back(MRI->createGenericVirtualRegister(Ty));
 								    return *VRegs;
 								  }
 								  if (Val.getType()->isAggregateType()) {
 								    // UndefValue, ConstantAggregateZero
 								    auto &C = cast<Constant>(Val);
 								    unsigned Idx = 0;
 								    while (auto Elt = C.getAggregateElement(Idx++)) {
 								      auto EltRegs = getOrCreateVRegs(*Elt);
-												Use llvm::copy. NFC

llvm-svn: 347126

											
										
										
											2018-11-17 09:44:25 +08:00
+								      llvm::copy(EltRegs, std::back_inserter(*VRegs));
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    }
 								  } else {
 								    assert(SplitTys.size() == 1 && "unexpectedly split LLT");
 								    VRegs->push_back(MRI->createGenericVirtualRegister(SplitTys[0]));
 								    bool Success = translate(cast<Constant>(Val), VRegs->front());
-												GlobalISel: rework getOrCreateVReg to avoid double lookup. NFC.

Thanks to Quentin for suggesting the refactoring.

llvm-svn: 293087

											
										
										
											2017-01-26 04:58:22 +08:00
+								    if (!Success) {
-												[GlobalISel] Emit opt remarks on isel fallbacks.

Having more fine-grained information on the specific construct that
caused us to fallback is valuable for large-scale data collection.

We still have the fallback warning, that's also used for FastISel.
We still need to remove the fallback warning, and teach FastISel to also
emit remarks (it currently has a combination of the warning, stats, and
debug prints: the remarks could unify all three).

The abort-on-fallback path could also be better handled using remarks:
one could imagine a "-Rpass-error", analoguous to "-Werror", which would
promote missed/failed remarks to errors.  It's not clear whether that
would be useful for other remarks though, so we're not there yet.

llvm-svn: 296013

											
										
										
											2017-02-24 05:05:42 +08:00
+								      OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
-												MachineFunction: Return reference from getFunction(); NFC

The Function can never be nullptr so we can return a reference.

llvm-svn: 320884

											
										
										
											2017-12-16 06:22:58 +08:00
+								                                 MF->getFunction().getSubprogram(),
 								                                 &MF->getFunction().getEntryBlock());
-												[GlobalISel] Emit opt remarks on isel fallbacks.

Having more fine-grained information on the specific construct that
caused us to fallback is valuable for large-scale data collection.

We still have the fallback warning, that's also used for FastISel.
We still need to remove the fallback warning, and teach FastISel to also
emit remarks (it currently has a combination of the warning, stats, and
debug prints: the remarks could unify all three).

The abort-on-fallback path could also be better handled using remarks:
one could imagine a "-Rpass-error", analoguous to "-Werror", which would
promote missed/failed remarks to errors.  It's not clear whether that
would be useful for other remarks though, so we're not there yet.

llvm-svn: 296013

											
										
										
											2017-02-24 05:05:42 +08:00
+								      R << "unable to translate constant: " << ore::NV("Type", Val.getType());
 								      reportTranslationError(*MF, *TPC, *ORE, R);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								      return *VRegs;
-												GlobalISel: first translation support for Constants.

For now put them all in the entry block. This should be correct but may give
poor runtime performance. Hopefully MachineSinking combined with
isReMaterializable can solve those issues, but if not the interface is sound
enough to support alternatives.

llvm-svn: 278168

											
										
										
											2016-08-10 05:28:04 +08:00
+								    }
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								  }
-												GlobalISel: prevent heap use-after-free when looking up VReg.

Translating the constant can create more VRegs, which can invalidate the
reference into the DenseMap. So we have to look up the value again after all
that's happened.

llvm-svn: 292675

											
										
										
											2017-01-21 07:25:17 +08:00
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  return *VRegs;
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								}
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
-												GlobalISel: Use result of find rather than rechecking map

											
										
										
											2020-07-29 21:42:03 +08:00
+								  auto MapEntry = FrameIndices.find(&AI);
 								  if (MapEntry != FrameIndices.end())
 								    return MapEntry->second;
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
-												[AArch64] Fix issues with large arrays on stack

Summary:
This patch fixes a few issues when large arrays are allocated on the
stack. Currently, clang has inconsistent behaviour, for debug builds
there is an assertion failure when the array size on stack is around 2GB
but there is no assertion when the stack is around 8GB. For release
builds there is no assertion, the compilation succeeds but generates
incorrect code. The incorrect code generated is due to using
int/unsigned int instead of their 64-bit counterparts. This patch,
1) Removes the assertion in frame legality check.
2) Converts int/unsigned int in some places to the 64-bit variants. This
helps in generating correct code and removes the inconsistent behaviour.
3) Adds a test which runs without optimisations.

Reviewers: sdesmalen, efriedma, fhahn, aemerson

Reviewed By: efriedma

Subscribers: eli.friedman, fpetrogalli, kristof.beyls, hiraditya,
llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70496

											
										
										
											2019-11-20 20:45:26 +08:00
+								  uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
 								  uint64_t Size =
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								      ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();
 								  // Always allocate at least one byte.
-												[AArch64] Fix issues with large arrays on stack

Summary:
This patch fixes a few issues when large arrays are allocated on the
stack. Currently, clang has inconsistent behaviour, for debug builds
there is an assertion failure when the array size on stack is around 2GB
but there is no assertion when the stack is around 8GB. For release
builds there is no assertion, the compilation succeeds but generates
incorrect code. The incorrect code generated is due to using
int/unsigned int instead of their 64-bit counterparts. This patch,
1) Removes the assertion in frame legality check.
2) Converts int/unsigned int in some places to the 64-bit variants. This
helps in generating correct code and removes the inconsistent behaviour.
3) Adds a test which runs without optimisations.

Reviewers: sdesmalen, efriedma, fhahn, aemerson

Reviewed By: efriedma

Subscribers: eli.friedman, fpetrogalli, kristof.beyls, hiraditya,
llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70496

											
										
										
											2019-11-20 20:45:26 +08:00
+								  Size = std::max<uint64_t>(Size, 1u);
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
 								  int &FI = FrameIndices[&AI];
-												[Alignment][NFC] Migrate MachineFrameInfo::CreateStackObject to Align

This patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Differential Revision: https://reviews.llvm.org/D82894

											
										
										
											2020-07-01 15:28:11 +08:00
+								  FI = MF->getFrameInfo().CreateStackObject(Size, AI.getAlign(), false, &AI);
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								  return FI;
 								}
-												[Alignment][NFC] Simplify IRTranslator::getMemOpAlignment

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77078

											
										
										
											2020-03-31 02:50:49 +08:00
+								Align IRTranslator::getMemOpAlign(const Instruction &I) {
-												StoreInst should store Align, not MaybeAlign

This is D77454, except for stores.  All the infrastructure work was done
for loads, so the remaining changes necessary are relatively small.

Differential Revision: https://reviews.llvm.org/D79968

											
										
										
											2020-05-15 05:48:10 +08:00
+								  if (const StoreInst *SI = dyn_cast<StoreInst>(&I))
 								    return SI->getAlign();
-												Use getAlign() on atomicrmw/cmpxchg instructions, now that it's available.

These locations were missed as part of adding alignment to the
instructions, and were still making their own alignment assumptions.

											
										
										
											2021-02-23 04:36:20 +08:00
+								  if (const LoadInst *LI = dyn_cast<LoadInst>(&I))
-												[Alignment] Remove unnecessary getValueOrABITypeAlignment calls (NFC)

Now that load/store alignment is required, we no longer need most
of them. Also switch the getLoadStoreAlignment() helper to return
Align instead of MaybeAlign.

											
										
										
											2020-05-18 04:14:42 +08:00
+								    return LI->getAlign();
-												Use getAlign() on atomicrmw/cmpxchg instructions, now that it's available.

These locations were missed as part of adding alignment to the
instructions, and were still making their own alignment assumptions.

											
										
										
											2021-02-23 04:36:20 +08:00
+								  if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I))
 								    return AI->getAlign();
 								  if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I))
 								    return AI->getAlign();
-												[Alignment][NFC] Simplify IRTranslator::getMemOpAlignment

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77078

											
										
										
											2020-03-31 02:50:49 +08:00
+								  OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
 								  R << "unable to translate memop: " << ore::NV("Opcode", &I);
 								  reportTranslationError(*MF, *TPC, *ORE, R);
 								  return Align(1);
-												GlobalISel: add generic load and store instructions.

Pretty straightforward, the only oddity is the MachineMemOperand (which it's
surprisingly difficult to share code for).

llvm-svn: 276799

											
										
										
											2016-07-27 04:23:26 +08:00
+								}
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								MachineBasicBlock &IRTranslator::getMBB(const BasicBlock &BB) {
-												[IRTranslator] Update getOrCreateBB API to use references.
A null basic block is invalid, so just pass a reference.

llvm-svn: 263260

											
										
										
											2016-03-12 01:27:43 +08:00
+								  MachineBasicBlock *&MBB = BBToMBB[&BB];
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								  assert(MBB && "BasicBlock was not encountered before");
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								  return *MBB;
 								}
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
 								  assert(NewPred && "new predecessor must be a real MachineBasicBlock");
 								  MachinePreds[Edge].push_back(NewPred);
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
 								                                     MachineIRBuilder &MIRBuilder) {
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
+								  // Get or create a virtual register for each value.
 								  // Unless the value is a Constant => loadimm cst?
 								  // or inline constant each time?
 								  // Creation of a virtual register needs to have a size.
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register Op0 = getOrCreateVReg(*U.getOperand(0));
 								  Register Op1 = getOrCreateVReg(*U.getOperand(1));
 								  Register Res = getOrCreateVReg(U);
-												Move IR flag handling directly into builder calls for cases translated from Instructions in GlobalIsel

Reviewers: aditya_nandakumar, volkan

Reviewed By: aditya_nandakumar

Subscribers: rovka, kristof.beyls, volkan, Petar.Avramovic

Differential Revision: https://reviews.llvm.org/D57630

llvm-svn: 353336

											
										
										
											2019-02-07 03:57:06 +08:00
+								  uint16_t Flags = 0;
-												Copy utilities updated and added for MI flags

Summary: This patch adds a GlobalIsel copy utility into MI for flags and updates the instruction emitter for the SDAG path.  Some tests show new behavior and I added one for GlobalIsel which mirrors an SDAG test for handling nsw/nuw.

Reviewers: spatel, wristow, arsenm

Reviewed By: arsenm

Subscribers: wdng

Differential Revision: https://reviews.llvm.org/D52006

llvm-svn: 342576

											
										
										
											2018-09-20 02:52:08 +08:00
+								  if (isa<Instruction>(U)) {
 								    const Instruction &I = cast<Instruction>(U);
-												Move IR flag handling directly into builder calls for cases translated from Instructions in GlobalIsel

Reviewers: aditya_nandakumar, volkan

Reviewed By: aditya_nandakumar

Subscribers: rovka, kristof.beyls, volkan, Petar.Avramovic

Differential Revision: https://reviews.llvm.org/D57630

llvm-svn: 353336

											
										
										
											2019-02-07 03:57:06 +08:00
+								    Flags = MachineInstr::copyFlagsFromInstruction(I);
-												Copy utilities updated and added for MI flags

Summary: This patch adds a GlobalIsel copy utility into MI for flags and updates the instruction emitter for the SDAG path.  Some tests show new behavior and I added one for GlobalIsel which mirrors an SDAG test for handling nsw/nuw.

Reviewers: spatel, wristow, arsenm

Reviewed By: arsenm

Subscribers: wdng

Differential Revision: https://reviews.llvm.org/D52006

llvm-svn: 342576

											
										
										
											2018-09-20 02:52:08 +08:00
+								  }
-												Move IR flag handling directly into builder calls for cases translated from Instructions in GlobalIsel

Reviewers: aditya_nandakumar, volkan

Reviewed By: aditya_nandakumar

Subscribers: rovka, kristof.beyls, volkan, Petar.Avramovic

Differential Revision: https://reviews.llvm.org/D57630

llvm-svn: 353336

											
										
										
											2019-02-07 03:57:06 +08:00
 								  MIRBuilder.buildInstr(Opcode, {Res}, {Op0, Op1}, Flags);
-												[GlobalISel][IRTranslator] Teach the pass how to translate Add instructions.

llvm-svn: 260549

											
										
										
											2016-02-12 01:51:31 +08:00
+								  return true;
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
+								}
-												[GlobalISel] Add G_ABS

This is equivalent to the new llvm.abs intrinsic added by D84125 with
is_int_min_poison=0.

Differential Revision: https://reviews.llvm.org/D85718

											
										
										
											2020-08-11 17:50:58 +08:00
+								bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
 								                                    MachineIRBuilder &MIRBuilder) {
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register Op0 = getOrCreateVReg(*U.getOperand(0));
 								  Register Res = getOrCreateVReg(U);
-												Propagate fmf in IRTranslate for fneg

Summary: This case is related to D63405 in that we need to be propagating FMF on negates.

Reviewers: volkan, spatel, arsenm

Reviewed By: arsenm

Subscribers: wdng, javed.absar

Differential Revision: https://reviews.llvm.org/D63458

llvm-svn: 363631

											
										
										
											2019-06-18 07:19:40 +08:00
+								  uint16_t Flags = 0;
 								  if (isa<Instruction>(U)) {
 								    const Instruction &I = cast<Instruction>(U);
 								    Flags = MachineInstr::copyFlagsFromInstruction(I);
 								  }
-												[GlobalISel] Add G_ABS

This is equivalent to the new llvm.abs intrinsic added by D84125 with
is_int_min_poison=0.

Differential Revision: https://reviews.llvm.org/D85718

											
										
										
											2020-08-11 17:50:58 +08:00
+								  MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags);
-												[IR] Add a dedicated FNeg IR Instruction

The IEEE-754 Standard makes it clear that fneg(x) and
fsub(-0.0, x) are two different operations. The former is a bitwise
operation, while the latter is an arithmetic operation. This patch
creates a dedicated FNeg IR Instruction to model that behavior.

Differential Revision: https://reviews.llvm.org/D53877

llvm-svn: 346774

											
										
										
											2018-11-14 02:15:47 +08:00
+								  return true;
 								}
-												[GlobalISel] Add G_ABS

This is equivalent to the new llvm.abs intrinsic added by D84125 with
is_int_min_poison=0.

Differential Revision: https://reviews.llvm.org/D85718

											
										
										
											2020-08-11 17:50:58 +08:00
+								bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
 								  return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder);
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateCompare(const User &U,
 								                                    MachineIRBuilder &MIRBuilder) {
-												IRTranslator - silence static analyzer null dereference warnings. NFCI.

The CmpInst::getType() calls can be replaced by just using User::getType() that it was dyn_cast from, and we then need to assert that any default predicate cases came from the CmpInst.

llvm-svn: 374716

											
										
										
											2019-10-13 19:29:35 +08:00
+								  auto *CI = dyn_cast<CmpInst>(&U);
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register Op0 = getOrCreateVReg(*U.getOperand(0));
 								  Register Op1 = getOrCreateVReg(*U.getOperand(1));
 								  Register Res = getOrCreateVReg(U);
-												GlobalISel: translate floating-point comparisons

llvm-svn: 279319

											
										
										
											2016-08-20 04:48:16 +08:00
+								  CmpInst::Predicate Pred =
 								      CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
 								                                    cast<ConstantExpr>(U).getPredicate());
 								  if (CmpInst::isIntPredicate(Pred))
-												GlobalISel: move type information to MachineRegisterInfo.

We want each register to have a canonical type, which means the best place to
store this is in MachineRegisterInfo rather than on every MachineInstr that
happens to use or define that register.

Most changes following from this are pretty simple (you need an MRI anyway if
you're going to be doing any transformations, so just check the type there).
But legalization doesn't really want to check redundant operands (when, for
example, a G_ADD only ever has one type) so I've made use of MCInstrDesc's
operand type field to encode these constraints and limit legalization's work.

As an added bonus, more validation is possible, both in MachineVerifier and
MachineIRBuilder (coming soon).

llvm-svn: 281035

											
										
										
											2016-09-09 19:46:34 +08:00
+								    MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
-												GlobalISel: correctly handle trivial fcmp predicates.

It makes sense to only do them once in IRTranslator rather than making everyone
deal with them.

llvm-svn: 297304

											
										
										
											2017-03-09 02:49:54 +08:00
+								  else if (Pred == CmpInst::FCMP_FALSE)
-												[GlobalISel] Avoid translating synthetic constants to new G_CONSTANTS.

Currently, we create a G_CONSTANT for every "synthetic" integer
constant operand (for instance, for the G_GEP offset).
Instead, share the G_CONSTANTs we might have created by going through
the ValueToVReg machinery.

When we're emitting synthetic constants, we do need to get Constants from
the context.  One could argue that we shouldn't modify the context at
all (for instance, this means that we're going to use a tad more memory
if the constant wasn't used elsewhere), but constants are mostly
harmless.  We currently do this for extractvalue and all.

For constant fcmp, this does mean we'll emit an extra COPY, which is not
necessarily more optimal than an extra materialized constant.
But that preserves the current intended design of uniqued G_CONSTANTs,
and the rematerialization problem exists elsewhere and should be
resolved with a single coherent solution.

llvm-svn: 297875

											
										
										
											2017-03-16 03:21:11 +08:00
+								    MIRBuilder.buildCopy(
-												IRTranslator - silence static analyzer null dereference warnings. NFCI.

The CmpInst::getType() calls can be replaced by just using User::getType() that it was dyn_cast from, and we then need to assert that any default predicate cases came from the CmpInst.

llvm-svn: 374716

											
										
										
											2019-10-13 19:29:35 +08:00
+								        Res, getOrCreateVReg(*Constant::getNullValue(U.getType())));
-												[GlobalISel] Avoid translating synthetic constants to new G_CONSTANTS.

Currently, we create a G_CONSTANT for every "synthetic" integer
constant operand (for instance, for the G_GEP offset).
Instead, share the G_CONSTANTs we might have created by going through
the ValueToVReg machinery.

When we're emitting synthetic constants, we do need to get Constants from
the context.  One could argue that we shouldn't modify the context at
all (for instance, this means that we're going to use a tad more memory
if the constant wasn't used elsewhere), but constants are mostly
harmless.  We currently do this for extractvalue and all.

For constant fcmp, this does mean we'll emit an extra COPY, which is not
necessarily more optimal than an extra materialized constant.
But that preserves the current intended design of uniqued G_CONSTANTs,
and the rematerialization problem exists elsewhere and should be
resolved with a single coherent solution.

llvm-svn: 297875

											
										
										
											2017-03-16 03:21:11 +08:00
+								  else if (Pred == CmpInst::FCMP_TRUE)
 								    MIRBuilder.buildCopy(
-												IRTranslator - silence static analyzer null dereference warnings. NFCI.

The CmpInst::getType() calls can be replaced by just using User::getType() that it was dyn_cast from, and we then need to assert that any default predicate cases came from the CmpInst.

llvm-svn: 374716

											
										
										
											2019-10-13 19:29:35 +08:00
+								        Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
-												Add FMF management to common fp intrinsics in GlobalIsel

Summary: This the initial code change to facilitate managing FMF flags from Instructions to MI wrt Intrinsics in Global Isel.  Eventually the GlobalObserver interface will be added as well, where FMF additions can be tracked for the builder and CSE.

Reviewers: aditya_nandakumar, bogner

Reviewed By: bogner

Subscribers: rovka, kristof.beyls, javed.absar

Differential Revision: https://reviews.llvm.org/D55668

llvm-svn: 349514

											
										
										
											2018-12-19 01:54:52 +08:00
+								  else {
-												IRTranslator - silence static analyzer null dereference warnings. NFCI.

The CmpInst::getType() calls can be replaced by just using User::getType() that it was dyn_cast from, and we then need to assert that any default predicate cases came from the CmpInst.

llvm-svn: 374716

											
										
										
											2019-10-13 19:29:35 +08:00
+								    assert(CI && "Instruction should be CmpInst");
-												[GlobalISel] Use more MachineIRBuilder helper methods

Reviewers: arsenm, nhaehnle

Subscribers: wdng, rovka, hiraditya, volkan, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D72833

											
										
										
											2020-01-16 20:09:48 +08:00
+								    MIRBuilder.buildFCmp(Pred, Res, Op0, Op1,
 								                         MachineInstr::copyFlagsFromInstruction(*CI));
-												Add FMF management to common fp intrinsics in GlobalIsel

Summary: This the initial code change to facilitate managing FMF flags from Instructions to MI wrt Intrinsics in Global Isel.  Eventually the GlobalObserver interface will be added as well, where FMF additions can be tracked for the builder and CSE.

Reviewers: aditya_nandakumar, bogner

Reviewed By: bogner

Subscribers: rovka, kristof.beyls, javed.absar

Differential Revision: https://reviews.llvm.org/D55668

llvm-svn: 349514

											
										
										
											2018-12-19 01:54:52 +08:00
+								  }
-												GlobalISel: translate floating-point comparisons

llvm-svn: 279319

											
										
										
											2016-08-20 04:48:16 +08:00
-												GlobalISel: support irtranslation of icmp instructions.

llvm-svn: 278969

											
										
										
											2016-08-18 04:25:25 +08:00
+								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								  const ReturnInst &RI = cast<ReturnInst>(U);
-												GlobalISel: make translate* functions take the most specialized class possible.

NFC.

llvm-svn: 277188

											
										
										
											2016-07-30 02:11:21 +08:00
+								  const Value *Ret = RI.getReturnValue();
-												[GlobalISel][IRTranslator] Fix crash during translation of zero sized loads/stores/args/returns.

This fixes PR35358.

rdar://35619533

Differential Revision: https://reviews.llvm.org/D40604

llvm-svn: 319465

											
										
										
											2017-12-01 04:06:02 +08:00
+								  if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
 								    Ret = nullptr;
-												[GlobalISel] Rewrite CallLowering::lowerReturn to accept multiple VRegs per Value

This is logical continuation of https://reviews.llvm.org/D46018 (r332449)

Differential Revision: https://reviews.llvm.org/D49660

llvm-svn: 338685

											
										
										
											2018-08-02 16:33:31 +08:00
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								  ArrayRef<Register> VRegs;
-												[GlobalISel] Rewrite CallLowering::lowerReturn to accept multiple VRegs per Value

This is logical continuation of https://reviews.llvm.org/D46018 (r332449)

Differential Revision: https://reviews.llvm.org/D49660

llvm-svn: 338685

											
										
										
											2018-08-02 16:33:31 +08:00
+								  if (Ret)
 								    VRegs = getOrCreateVRegs(*Ret);
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								  Register SwiftErrorVReg = 0;
-												GlobalISel: support swifterror attribute on AArch64.

swifterror marks an argument as a register pretending to be a pointer, so we
need a guaranteed mem2reg-like analysis of its uses. Fortunately most of the
infrastructure can be reused from the DAG world.

llvm-svn: 361608

											
										
										
											2019-05-24 16:40:13 +08:00
+								  if (CLI->supportSwiftError() && SwiftError.getFunctionArg()) {
 								    SwiftErrorVReg = SwiftError.getOrCreateVRegUseAt(
 								        &RI, &MIRBuilder.getMBB(), SwiftError.getFunctionArg());
 								  }
-												[GlobalISel] Teach the IRTranslator how to lower returns.

llvm-svn: 260562

											
										
										
											2016-02-12 02:53:28 +08:00
+								  // The target may mess up with the insertion point, but
 								  // this is not important as a return is the last instruction
 								  // of the block anyway.
-												[GlobalISel] Base implementation for sret demotion.

If the return values can't be lowered to registers
SelectionDAG performs the sret demotion. This patch
contains the basic implementation for the same in
the GlobalISel pipeline.

Furthermore, targets should bring relevant changes
during lowerFormalArguments, lowerReturn and
lowerCall to make use of this feature.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D92953

											
										
										
											2020-12-23 14:52:36 +08:00
+								  return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg);
-												[GlobalISel] Teach the IRTranslator how to lower returns.

llvm-svn: 260562

											
										
										
											2016-02-12 02:53:28 +08:00
+								}
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								void IRTranslator::emitBranchForMergedCondition(
 								    const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
 								    MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
 								    BranchProbability TProb, BranchProbability FProb, bool InvertCond) {
 								  // If the leaf of the tree is a comparison, merge the condition into
 								  // the caseblock.
 								  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
 								    CmpInst::Predicate Condition;
 								    if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
 								      Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate();
 								    } else {
 								      const FCmpInst *FC = cast<FCmpInst>(Cond);
 								      Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate();
 								    }
 								    SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0),
 								                           BOp->getOperand(1), nullptr, TBB, FBB, CurBB,
 								                           CurBuilder->getDebugLoc(), TProb, FProb);
 								    SL->SwitchCases.push_back(CB);
 								    return;
 								  }
 								  // Create a CaseBlock record representing this branch.
 								  CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
 								  SwitchCG::CaseBlock CB(
 								      Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()),
 								      nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb);
 								  SL->SwitchCases.push_back(CB);
 								}
 								static bool isValInBlock(const Value *V, const BasicBlock *BB) {
 								  if (const Instruction *I = dyn_cast<Instruction>(V))
 								    return I->getParent() == BB;
 								  return true;
 								}
 								void IRTranslator::findMergedConditions(
 								    const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
 								    MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
 								    Instruction::BinaryOps Opc, BranchProbability TProb,
 								    BranchProbability FProb, bool InvertCond) {
 								  using namespace PatternMatch;
 								  assert((Opc == Instruction::And || Opc == Instruction::Or) &&
 								         "Expected Opc to be AND/OR");
 								  // Skip over not part of the tree and remember to invert op and operands at
 								  // next level.
 								  Value *NotCond;
 								  if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
 								      isValInBlock(NotCond, CurBB->getBasicBlock())) {
 								    findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
 								                         !InvertCond);
 								    return;
 								  }
 								  const Instruction *BOp = dyn_cast<Instruction>(Cond);
-												[CodeGen] recognize select form of and/ors when splitting branch conditions

Recently a few patches are made to move towards using select i1 instead of and/or i1 to represent "a && b"/"a || b" in C/C++.
"a && b" in C/C++ does not evaluate b if a is false whereas 'and a, b' in IR evaluates b and uses its result regardless of the result of a.
This is problematic because it can cause miscompilation if b was an erroneous operation (https://llvm.org/pr48353).
In C/C++, the result is simply false because b is not evaluated, but in IR the result is poison.
The discussion at D93065 has more context about this.

This patch makes two branch-splitting optimizations (one in SelectionDAGBuilder, one in CodeGenPrepare) recognize
select form of and/or as well using m_LogicalAnd/Or.
Since it is CodeGen, I think this is semantically ok (at least as safe as what codegen already did).

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D93853

											
										
										
											2021-01-01 03:46:10 +08:00
+								  const Value *BOpOp0, *BOpOp1;
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								  // Compute the effective opcode for Cond, taking into account whether it needs
 								  // to be inverted, e.g.
 								  //   and (not (or A, B)), C
 								  // gets lowered as
 								  //   and (and (not A, not B), C)
-												[CodeGen] recognize select form of and/ors when splitting branch conditions

Recently a few patches are made to move towards using select i1 instead of and/or i1 to represent "a && b"/"a || b" in C/C++.
"a && b" in C/C++ does not evaluate b if a is false whereas 'and a, b' in IR evaluates b and uses its result regardless of the result of a.
This is problematic because it can cause miscompilation if b was an erroneous operation (https://llvm.org/pr48353).
In C/C++, the result is simply false because b is not evaluated, but in IR the result is poison.
The discussion at D93065 has more context about this.

This patch makes two branch-splitting optimizations (one in SelectionDAGBuilder, one in CodeGenPrepare) recognize
select form of and/or as well using m_LogicalAnd/Or.
Since it is CodeGen, I think this is semantically ok (at least as safe as what codegen already did).

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D93853

											
										
										
											2021-01-01 03:46:10 +08:00
+								  Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								  if (BOp) {
-												[CodeGen] recognize select form of and/ors when splitting branch conditions

Recently a few patches are made to move towards using select i1 instead of and/or i1 to represent "a && b"/"a || b" in C/C++.
"a && b" in C/C++ does not evaluate b if a is false whereas 'and a, b' in IR evaluates b and uses its result regardless of the result of a.
This is problematic because it can cause miscompilation if b was an erroneous operation (https://llvm.org/pr48353).
In C/C++, the result is simply false because b is not evaluated, but in IR the result is poison.
The discussion at D93065 has more context about this.

This patch makes two branch-splitting optimizations (one in SelectionDAGBuilder, one in CodeGenPrepare) recognize
select form of and/or as well using m_LogicalAnd/Or.
Since it is CodeGen, I think this is semantically ok (at least as safe as what codegen already did).

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D93853

											
										
										
											2021-01-01 03:46:10 +08:00
+								    BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
 								               ? Instruction::And
 								               : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
 								                      ? Instruction::Or
 								                      : (Instruction::BinaryOps)0);
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								    if (InvertCond) {
 								      if (BOpc == Instruction::And)
 								        BOpc = Instruction::Or;
 								      else if (BOpc == Instruction::Or)
 								        BOpc = Instruction::And;
 								    }
 								  }
 								  // If this node is not part of the or/and tree, emit it as a branch.
-												[CodeGen] recognize select form of and/ors when splitting branch conditions

Recently a few patches are made to move towards using select i1 instead of and/or i1 to represent "a && b"/"a || b" in C/C++.
"a && b" in C/C++ does not evaluate b if a is false whereas 'and a, b' in IR evaluates b and uses its result regardless of the result of a.
This is problematic because it can cause miscompilation if b was an erroneous operation (https://llvm.org/pr48353).
In C/C++, the result is simply false because b is not evaluated, but in IR the result is poison.
The discussion at D93065 has more context about this.

This patch makes two branch-splitting optimizations (one in SelectionDAGBuilder, one in CodeGenPrepare) recognize
select form of and/or as well using m_LogicalAnd/Or.
Since it is CodeGen, I think this is semantically ok (at least as safe as what codegen already did).

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D93853

											
										
										
											2021-01-01 03:46:10 +08:00
+								  // Note that all nodes in the tree should have same opcode.
 								  bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
 								  if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
 								      !isValInBlock(BOpOp0, CurBB->getBasicBlock()) ||
 								      !isValInBlock(BOpOp1, CurBB->getBasicBlock())) {
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								    emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb,
 								                                 InvertCond);
 								    return;
 								  }
 								  //  Create TmpBB after CurBB.
 								  MachineFunction::iterator BBI(CurBB);
 								  MachineBasicBlock *TmpBB =
 								      MF->CreateMachineBasicBlock(CurBB->getBasicBlock());
 								  CurBB->getParent()->insert(++BBI, TmpBB);
 								  if (Opc == Instruction::Or) {
 								    // Codegen X | Y as:
 								    // BB1:
 								    //   jmp_if_X TBB
 								    //   jmp TmpBB
 								    // TmpBB:
 								    //   jmp_if_Y TBB
 								    //   jmp FBB
 								    //
 								    // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
 								    // The requirement is that
 								    //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
 								    //     = TrueProb for original BB.
 								    // Assuming the original probabilities are A and B, one choice is to set
 								    // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
 								    // A/(1+B) and 2B/(1+B). This choice assumes that
 								    //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
 								    // Another choice is to assume TrueProb for BB1 equals to TrueProb for
 								    // TmpBB, but the math is more complicated.
 								    auto NewTrueProb = TProb / 2;
 								    auto NewFalseProb = TProb / 2 + FProb;
 								    // Emit the LHS condition.
-												[CodeGen] recognize select form of and/ors when splitting branch conditions

Recently a few patches are made to move towards using select i1 instead of and/or i1 to represent "a && b"/"a || b" in C/C++.
"a && b" in C/C++ does not evaluate b if a is false whereas 'and a, b' in IR evaluates b and uses its result regardless of the result of a.
This is problematic because it can cause miscompilation if b was an erroneous operation (https://llvm.org/pr48353).
In C/C++, the result is simply false because b is not evaluated, but in IR the result is poison.
The discussion at D93065 has more context about this.

This patch makes two branch-splitting optimizations (one in SelectionDAGBuilder, one in CodeGenPrepare) recognize
select form of and/or as well using m_LogicalAnd/Or.
Since it is CodeGen, I think this is semantically ok (at least as safe as what codegen already did).

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D93853

											
										
										
											2021-01-01 03:46:10 +08:00
+								    findMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
 								                         NewFalseProb, InvertCond);
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
 								    // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
 								    SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
 								    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
 								    // Emit the RHS condition into TmpBB.
-												[CodeGen] recognize select form of and/ors when splitting branch conditions

Recently a few patches are made to move towards using select i1 instead of and/or i1 to represent "a && b"/"a || b" in C/C++.
"a && b" in C/C++ does not evaluate b if a is false whereas 'and a, b' in IR evaluates b and uses its result regardless of the result of a.
This is problematic because it can cause miscompilation if b was an erroneous operation (https://llvm.org/pr48353).
In C/C++, the result is simply false because b is not evaluated, but in IR the result is poison.
The discussion at D93065 has more context about this.

This patch makes two branch-splitting optimizations (one in SelectionDAGBuilder, one in CodeGenPrepare) recognize
select form of and/or as well using m_LogicalAnd/Or.
Since it is CodeGen, I think this is semantically ok (at least as safe as what codegen already did).

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D93853

											
										
										
											2021-01-01 03:46:10 +08:00
+								    findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
 								                         Probs[1], InvertCond);
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								  } else {
 								    assert(Opc == Instruction::And && "Unknown merge op!");
 								    // Codegen X & Y as:
 								    // BB1:
 								    //   jmp_if_X TmpBB
 								    //   jmp FBB
 								    // TmpBB:
 								    //   jmp_if_Y TBB
 								    //   jmp FBB
 								    //
 								    //  This requires creation of TmpBB after CurBB.
 								    // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
 								    // The requirement is that
 								    //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
 								    //     = FalseProb for original BB.
 								    // Assuming the original probabilities are A and B, one choice is to set
 								    // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
 								    // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
 								    // TrueProb for BB1 * FalseProb for TmpBB.
 								    auto NewTrueProb = TProb + FProb / 2;
 								    auto NewFalseProb = FProb / 2;
 								    // Emit the LHS condition.
-												[CodeGen] recognize select form of and/ors when splitting branch conditions

Recently a few patches are made to move towards using select i1 instead of and/or i1 to represent "a && b"/"a || b" in C/C++.
"a && b" in C/C++ does not evaluate b if a is false whereas 'and a, b' in IR evaluates b and uses its result regardless of the result of a.
This is problematic because it can cause miscompilation if b was an erroneous operation (https://llvm.org/pr48353).
In C/C++, the result is simply false because b is not evaluated, but in IR the result is poison.
The discussion at D93065 has more context about this.

This patch makes two branch-splitting optimizations (one in SelectionDAGBuilder, one in CodeGenPrepare) recognize
select form of and/or as well using m_LogicalAnd/Or.
Since it is CodeGen, I think this is semantically ok (at least as safe as what codegen already did).

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D93853

											
										
										
											2021-01-01 03:46:10 +08:00
+								    findMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
 								                         NewFalseProb, InvertCond);
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
 								    // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
 								    SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
 								    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
 								    // Emit the RHS condition into TmpBB.
-												[CodeGen] recognize select form of and/ors when splitting branch conditions

Recently a few patches are made to move towards using select i1 instead of and/or i1 to represent "a && b"/"a || b" in C/C++.
"a && b" in C/C++ does not evaluate b if a is false whereas 'and a, b' in IR evaluates b and uses its result regardless of the result of a.
This is problematic because it can cause miscompilation if b was an erroneous operation (https://llvm.org/pr48353).
In C/C++, the result is simply false because b is not evaluated, but in IR the result is poison.
The discussion at D93065 has more context about this.

This patch makes two branch-splitting optimizations (one in SelectionDAGBuilder, one in CodeGenPrepare) recognize
select form of and/or as well using m_LogicalAnd/Or.
Since it is CodeGen, I think this is semantically ok (at least as safe as what codegen already did).

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D93853

											
										
										
											2021-01-01 03:46:10 +08:00
+								    findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
 								                         Probs[1], InvertCond);
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								  }
 								}
 								bool IRTranslator::shouldEmitAsBranches(
 								    const std::vector<SwitchCG::CaseBlock> &Cases) {
 								  // For multiple cases, it's better to emit as branches.
 								  if (Cases.size() != 2)
 								    return true;
 								  // If this is two comparisons of the same values or'd or and'd together, they
 								  // will get folded into a single comparison, so don't emit two blocks.
 								  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
 								       Cases[0].CmpRHS == Cases[1].CmpRHS) ||
 								      (Cases[0].CmpRHS == Cases[1].CmpLHS &&
 								       Cases[0].CmpLHS == Cases[1].CmpRHS)) {
 								    return false;
 								  }
 								  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
 								  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
 								  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
 								      Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred &&
 								      isa<Constant>(Cases[0].CmpRHS) &&
 								      cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
 								    if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ &&
 								        Cases[0].TrueBB == Cases[1].ThisBB)
 								      return false;
 								    if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE &&
 								        Cases[0].FalseBB == Cases[1].ThisBB)
 								      return false;
 								  }
 								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								  const BranchInst &BrInst = cast<BranchInst>(U);
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								  auto &CurMBB = MIRBuilder.getMBB();
 								  auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0));
 								  if (BrInst.isUnconditional()) {
 								    // If the unconditional target is the layout successor, fallthrough.
 								    if (!CurMBB.isLayoutSuccessor(Succ0MBB))
 								      MIRBuilder.buildBr(*Succ0MBB);
 								    // Link successors.
 								    for (const BasicBlock *Succ : successors(&BrInst))
 								      CurMBB.addSuccessor(&getMBB(*Succ));
 								    return true;
-												[IRTranslator] Translate unconditional branches.

llvm-svn: 263265

											
										
										
											2016-03-12 01:28:03 +08:00
+								  }
-												GlobalISel: add generic conditional branch.

Just the basic equivalent to DAG's condbr for now, we'll get to things like
br_cc when we start doing more legalization.

llvm-svn: 277184

											
										
										
											2016-07-30 01:58:00 +08:00
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								  // If this condition is one of the special cases we handle, do special stuff
 								  // now.
 								  const Value *CondVal = BrInst.getCondition();
 								  MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1));
-												[GlobalISel] Don't translate br to layout successor.

MI can represent fallthrough to layout successor blocks, and our
post-isel representation uses that extensively.

We might as well use it too, to avoid translating and carrying along
unnecessary branches.

llvm-svn: 298459

											
										
										
											2017-03-22 07:42:50 +08:00
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								  const auto &TLI = *MF->getSubtarget().getTargetLowering();
-												GlobalISel: add generic conditional branch.

Just the basic equivalent to DAG's condbr for now, we'll get to things like
br_cc when we start doing more legalization.

llvm-svn: 277184

											
										
										
											2016-07-30 01:58:00 +08:00
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								  // If this is a series of conditions that are or'd or and'd together, emit
 								  // this as a sequence of branches instead of setcc's with and/or operations.
 								  // As long as jumps are not expensive (exceptions for multi-use logic ops,
 								  // unpredictable branches, and vector extracts because those jumps are likely
 								  // expensive for any target), this should improve performance.
 								  // For example, instead of something like:
 								  //     cmp A, B
 								  //     C = seteq
 								  //     cmp D, E
 								  //     F = setle
 								  //     or C, F
 								  //     jnz foo
 								  // Emit:
 								  //     cmp A, B
 								  //     je foo
 								  //     cmp D, E
 								  //     jle foo
 								  using namespace PatternMatch;
-												[CodeGen] recognize select form of and/ors when splitting branch conditions

Recently a few patches are made to move towards using select i1 instead of and/or i1 to represent "a && b"/"a || b" in C/C++.
"a && b" in C/C++ does not evaluate b if a is false whereas 'and a, b' in IR evaluates b and uses its result regardless of the result of a.
This is problematic because it can cause miscompilation if b was an erroneous operation (https://llvm.org/pr48353).
In C/C++, the result is simply false because b is not evaluated, but in IR the result is poison.
The discussion at D93065 has more context about this.

This patch makes two branch-splitting optimizations (one in SelectionDAGBuilder, one in CodeGenPrepare) recognize
select form of and/or as well using m_LogicalAnd/Or.
Since it is CodeGen, I think this is semantically ok (at least as safe as what codegen already did).

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D93853

											
										
										
											2021-01-01 03:46:10 +08:00
+								  const Instruction *CondI = dyn_cast<Instruction>(CondVal);
 								  if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() &&
 								      !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) {
 								    Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
 								    Value *Vec;
 								    const Value *BOp0, *BOp1;
 								    if (match(CondI, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
 								      Opcode = Instruction::And;
 								    else if (match(CondI, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
 								      Opcode = Instruction::Or;
 								    if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
 								                    match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
 								      findMergedConditions(CondI, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode,
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								                           getEdgeProbability(&CurMBB, Succ0MBB),
 								                           getEdgeProbability(&CurMBB, Succ1MBB),
 								                           /*InvertCond=*/false);
 								      assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!");
 								      // Allow some cases to be rejected.
 								      if (shouldEmitAsBranches(SL->SwitchCases)) {
 								        // Emit the branch for this block.
 								        emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder);
 								        SL->SwitchCases.erase(SL->SwitchCases.begin());
 								        return true;
 								      }
 								      // Okay, we decided not to do this, remove any inserted MBB's and clear
 								      // SwitchCases.
 								      for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I)
 								        MF->erase(SL->SwitchCases[I].ThisBB);
 								      SL->SwitchCases.clear();
 								    }
 								  }
 								  // Create a CaseBlock record representing this branch.
 								  SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal,
 								                         ConstantInt::getTrue(MF->getFunction().getContext()),
 								                         nullptr, Succ0MBB, Succ1MBB, &CurMBB,
 								                         CurBuilder->getDebugLoc());
 								  // Use emitSwitchCase to actually insert the fast branch sequence for this
 								  // cond branch.
 								  emitSwitchCase(CB, &CurMBB, *CurBuilder);
-												[IRTranslator] Translate unconditional branches.

llvm-svn: 263265

											
										
										
											2016-03-12 01:28:03 +08:00
+								  return true;
 								}
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								void IRTranslator::addSuccessorWithProb(MachineBasicBlock *Src,
 								                                        MachineBasicBlock *Dst,
 								                                        BranchProbability Prob) {
 								  if (!FuncInfo.BPI) {
 								    Src->addSuccessorWithoutProb(Dst);
 								    return;
 								  }
 								  if (Prob.isUnknown())
 								    Prob = getEdgeProbability(Src, Dst);
 								  Src->addSuccessor(Dst, Prob);
 								}
 								BranchProbability
 								IRTranslator::getEdgeProbability(const MachineBasicBlock *Src,
 								                                 const MachineBasicBlock *Dst) const {
 								  const BasicBlock *SrcBB = Src->getBasicBlock();
 								  const BasicBlock *DstBB = Dst->getBasicBlock();
 								  if (!FuncInfo.BPI) {
 								    // If BPI is not available, set the default probability as 1 / N, where N is
 								    // the number of successors.
 								    auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
 								    return BranchProbability(1, SuccSize);
 								  }
 								  return FuncInfo.BPI->getEdgeProbability(SrcBB, DstBB);
 								}
 								bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
 								  using namespace SwitchCG;
 								  // Extract cases from the switch.
 								  const SwitchInst &SI = cast<SwitchInst>(U);
 								  BranchProbabilityInfo *BPI = FuncInfo.BPI;
 								  CaseClusterVector Clusters;
 								  Clusters.reserve(SI.getNumCases());
 								  for (auto &I : SI.cases()) {
 								    MachineBasicBlock *Succ = &getMBB(*I.getCaseSuccessor());
 								    assert(Succ && "Could not find successor mbb in mapping");
 								    const ConstantInt *CaseVal = I.getCaseValue();
 								    BranchProbability Prob =
 								        BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
 								            : BranchProbability(1, SI.getNumCases() + 1);
 								    Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
 								  }
 								  MachineBasicBlock *DefaultMBB = &getMBB(*SI.getDefaultDest());
 								  // Cluster adjacent cases with the same destination. We do this at all
 								  // optimization levels because it's cheap to do and will make codegen faster
 								  // if there are many clusters.
 								  sortAndRangeify(Clusters);
 								  MachineBasicBlock *SwitchMBB = &getMBB(*SI.getParent());
 								  // If there is only the default destination, jump there directly.
 								  if (Clusters.empty()) {
 								    SwitchMBB->addSuccessor(DefaultMBB);
 								    if (DefaultMBB != SwitchMBB->getNextNode())
 								      MIB.buildBr(*DefaultMBB);
 								    return true;
 								  }
-												[PGO][PGSO] TargetLowering/TargetTransformationInfo/SwitchLoweringUtils part.

Summary:
(Split of off D67120)

TargetLowering/TargetTransformationInfo/SwitchLoweringUtils changes for profile
guided size optimization.

Reviewers: davidxl

Subscribers: eraman, hiraditya, haicheng, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69580

											
										
										
											2019-10-30 02:30:30 +08:00
+								  SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr);
-												[GlobalISel] Implement bit-test switch table optimization.

This is mostly a straight port from SelectionDAG. We re-use the actual bit-test
analysis part from SwitchLoweringUtils, which was factored out earlier to
support jump-tables.

Differential Revision: https://reviews.llvm.org/D85233

											
										
										
											2020-08-05 01:55:27 +08:00
+								  SL->findBitTestClusters(Clusters, &SI);
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
 								  LLVM_DEBUG({
 								    dbgs() << "Case clusters: ";
 								    for (const CaseCluster &C : Clusters) {
 								      if (C.Kind == CC_JumpTable)
 								        dbgs() << "JT:";
 								      if (C.Kind == CC_BitTests)
 								        dbgs() << "BT:";
 								      C.Low->getValue().print(dbgs(), true);
 								      if (C.Low != C.High) {
 								        dbgs() << '-';
 								        C.High->getValue().print(dbgs(), true);
 								      }
 								      dbgs() << ' ';
 								    }
 								    dbgs() << '\n';
 								  });
 								  assert(!Clusters.empty());
 								  SwitchWorkList WorkList;
 								  CaseClusterIt First = Clusters.begin();
 								  CaseClusterIt Last = Clusters.end() - 1;
 								  auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
 								  WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
 								  // FIXME: At the moment we don't do any splitting optimizations here like
 								  // SelectionDAG does, so this worklist only has one entry.
 								  while (!WorkList.empty()) {
 								    SwitchWorkListItem W = WorkList.back();
 								    WorkList.pop_back();
 								    if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB))
 								      return false;
 								  }
 								  return true;
 								}
 								void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT,
 								                                 MachineBasicBlock *MBB) {
 								  // Emit the code for the jump table
 								  assert(JT.Reg != -1U && "Should lower JT Header first!");
 								  MachineIRBuilder MIB(*MBB->getParent());
 								  MIB.setMBB(*MBB);
 								  MIB.setDebugLoc(CurBuilder->getDebugLoc());
 								  Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
 								  const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
 								  auto Table = MIB.buildJumpTable(PtrTy, JT.JTI);
 								  MIB.buildBrJT(Table.getReg(0), JT.JTI, JT.Reg);
 								}
 								bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
 								                                       SwitchCG::JumpTableHeader &JTH,
-												[GlobalISel][IRTranslator] Fix some PHI bugs related to jump tables when optimizations are used.

The new switch lowering code that tries to generate jump tables and range checks
were tested at -O0 on arm64, but on -O3 the generic switch lowering code goes to
town on trying to generate optimized lowerings, e.g. multiple jump tables, range
checks etc. This exposed bugs in the way PHI nodes are handled because the CFG
looks even stranger after all of this is done.

llvm-svn: 364613

											
										
										
											2019-06-28 07:56:34 +08:00
+								                                       MachineBasicBlock *HeaderBB) {
 								  MachineIRBuilder MIB(*HeaderBB->getParent());
 								  MIB.setMBB(*HeaderBB);
 								  MIB.setDebugLoc(CurBuilder->getDebugLoc());
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
 								  const Value &SValue = *JTH.SValue;
 								  // Subtract the lowest switch case value from the value being switched on.
 								  const LLT SwitchTy = getLLTForType(*SValue.getType(), *DL);
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register SwitchOpReg = getOrCreateVReg(SValue);
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								  auto FirstCst = MIB.buildConstant(SwitchTy, JTH.First);
 								  auto Sub = MIB.buildSub({SwitchTy}, SwitchOpReg, FirstCst);
 								  // This value may be smaller or larger than the target's pointer type, and
 								  // therefore require extension or truncating.
 								  Type *PtrIRTy = SValue.getType()->getPointerTo();
 								  const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy));
 								  Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub);
 								  JT.Reg = Sub.getReg(0);
 								  if (JTH.OmitRangeCheck) {
-												[GlobalISel][IRTranslator] Fix some PHI bugs related to jump tables when optimizations are used.

The new switch lowering code that tries to generate jump tables and range checks
were tested at -O0 on arm64, but on -O3 the generic switch lowering code goes to
town on trying to generate optimized lowerings, e.g. multiple jump tables, range
checks etc. This exposed bugs in the way PHI nodes are handled because the CFG
looks even stranger after all of this is done.

llvm-svn: 364613

											
										
										
											2019-06-28 07:56:34 +08:00
+								    if (JT.MBB != HeaderBB->getNextNode())
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								      MIB.buildBr(*JT.MBB);
 								    return true;
 								  }
 								  // Emit the range check for the jump table, and branch to the default block
 								  // for the switch statement if the value being switched on exceeds the
 								  // largest case in the switch.
 								  auto Cst = getOrCreateVReg(
 								      *ConstantInt::get(SValue.getType(), JTH.Last - JTH.First));
 								  Cst = MIB.buildZExtOrTrunc(PtrScalarTy, Cst).getReg(0);
 								  auto Cmp = MIB.buildICmp(CmpInst::ICMP_UGT, LLT::scalar(1), Sub, Cst);
 								  auto BrCond = MIB.buildBrCond(Cmp.getReg(0), *JT.Default);
 								  // Avoid emitting unnecessary branches to the next block.
-												[GlobalISel][IRTranslator] Fix some PHI bugs related to jump tables when optimizations are used.

The new switch lowering code that tries to generate jump tables and range checks
were tested at -O0 on arm64, but on -O3 the generic switch lowering code goes to
town on trying to generate optimized lowerings, e.g. multiple jump tables, range
checks etc. This exposed bugs in the way PHI nodes are handled because the CFG
looks even stranger after all of this is done.

llvm-svn: 364613

											
										
										
											2019-06-28 07:56:34 +08:00
+								  if (JT.MBB != HeaderBB->getNextNode())
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								    BrCond = MIB.buildBr(*JT.MBB);
 								  return true;
 								}
 								void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
 								                                  MachineBasicBlock *SwitchBB,
 								                                  MachineIRBuilder &MIB) {
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register CondLHS = getOrCreateVReg(*CB.CmpLHS);
 								  Register Cond;
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								  DebugLoc OldDbgLoc = MIB.getDebugLoc();
 								  MIB.setDebugLoc(CB.DbgLoc);
 								  MIB.setMBB(*CB.ThisBB);
 								  if (CB.PredInfo.NoCmp) {
 								    // Branch or fall through to TrueBB.
 								    addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
 								    addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
 								                      CB.ThisBB);
 								    CB.ThisBB->normalizeSuccProbs();
 								    if (CB.TrueBB != CB.ThisBB->getNextNode())
 								      MIB.buildBr(*CB.TrueBB);
 								    MIB.setDebugLoc(OldDbgLoc);
 								    return;
 								  }
 								  const LLT i1Ty = LLT::scalar(1);
 								  // Build the compare.
 								  if (!CB.CmpMHS) {
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								    const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS);
 								    // For conditional branch lowering, we might try to do something silly like
 								    // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
 								    // just re-use the existing condition vreg.
-												GlobalISel: check type size before getZExtValue()ing it.

Otherwise getZExtValue() asserts.

											
										
										
											2021-02-01 20:43:33 +08:00
+								    if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI &&
 								        CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								      Cond = CondLHS;
 								    } else {
 								      Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
 								      if (CmpInst::isFPPredicate(CB.PredInfo.Pred))
 								        Cond =
 								            MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
 								      else
 								        Cond =
 								            MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
 								    }
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								  } else {
-												[GlobalISel][IRTranslator] Fix switch table lowering to use signed LE not unsigned.

We were miscompiling switch value comparisons with the wrong signedness, which
shows up when we have things like switch case values with i1 types, which end up
being legalized incorrectly.

Fixes PR43383

llvm-svn: 372675

											
										
										
											2019-09-24 08:09:23 +08:00
+								    assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
 								           "Can only handle SLE ranges");
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
 								    const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
 								    const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								    Register CmpOpReg = getOrCreateVReg(*CB.CmpMHS);
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								      Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								      Cond =
-												[GlobalISel][IRTranslator] Fix switch table lowering to use signed LE not unsigned.

We were miscompiling switch value comparisons with the wrong signedness, which
shows up when we have things like switch case values with i1 types, which end up
being legalized incorrectly.

Fixes PR43383

llvm-svn: 372675

											
										
										
											2019-09-24 08:09:23 +08:00
+								          MIB.buildICmp(CmpInst::ICMP_SLE, i1Ty, CmpOpReg, CondRHS).getReg(0);
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								    } else {
-												GlobalISel: Don't use LLT references

These should always be passed by value

											
										
										
											2020-02-14 03:30:50 +08:00
+								      const LLT CmpTy = MRI->getType(CmpOpReg);
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								      auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS);
 								      auto Diff = MIB.buildConstant(CmpTy, High - Low);
 								      Cond = MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, Sub, Diff).getReg(0);
 								    }
 								  }
 								  // Update successor info
 								  addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
 								  addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
 								                    CB.ThisBB);
 								  // TrueBB and FalseBB are always different unless the incoming IR is
 								  // degenerate. This only happens when running llc on weird IR.
 								  if (CB.TrueBB != CB.FalseBB)
 								    addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb);
 								  CB.ThisBB->normalizeSuccProbs();
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
+								  addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
 								                    CB.ThisBB);
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
 								  MIB.buildBrCond(Cond, *CB.TrueBB);
 								  MIB.buildBr(*CB.FalseBB);
 								  MIB.setDebugLoc(OldDbgLoc);
 								}
 								bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
 								                                          MachineBasicBlock *SwitchMBB,
-												[GlobalISel][IRTranslator] Fix some PHI bugs related to jump tables when optimizations are used.

The new switch lowering code that tries to generate jump tables and range checks
were tested at -O0 on arm64, but on -O3 the generic switch lowering code goes to
town on trying to generate optimized lowerings, e.g. multiple jump tables, range
checks etc. This exposed bugs in the way PHI nodes are handled because the CFG
looks even stranger after all of this is done.

llvm-svn: 364613

											
										
										
											2019-06-28 07:56:34 +08:00
+								                                          MachineBasicBlock *CurMBB,
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								                                          MachineBasicBlock *DefaultMBB,
 								                                          MachineIRBuilder &MIB,
 								                                          MachineFunction::iterator BBI,
 								                                          BranchProbability UnhandledProbs,
 								                                          SwitchCG::CaseClusterIt I,
 								                                          MachineBasicBlock *Fallthrough,
 								                                          bool FallthroughUnreachable) {
 								  using namespace SwitchCG;
 								  MachineFunction *CurMF = SwitchMBB->getParent();
 								  // FIXME: Optimize away range check based on pivot comparisons.
 								  JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
 								  SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
 								  BranchProbability DefaultProb = W.DefaultProb;
 								  // The jump block hasn't been inserted yet; insert it here.
 								  MachineBasicBlock *JumpMBB = JT->MBB;
 								  CurMF->insert(BBI, JumpMBB);
 								  // Since the jump table block is separate from the switch block, we need
 								  // to keep track of it as a machine predecessor to the default block,
 								  // otherwise we lose the phi edges.
 								  addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
-												[GlobalISel][IRTranslator] Fix some PHI bugs related to jump tables when optimizations are used.

The new switch lowering code that tries to generate jump tables and range checks
were tested at -O0 on arm64, but on -O3 the generic switch lowering code goes to
town on trying to generate optimized lowerings, e.g. multiple jump tables, range
checks etc. This exposed bugs in the way PHI nodes are handled because the CFG
looks even stranger after all of this is done.

llvm-svn: 364613

											
										
										
											2019-06-28 07:56:34 +08:00
+								                    CurMBB);
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								  addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
 								                    JumpMBB);
 								  auto JumpProb = I->Prob;
 								  auto FallthroughProb = UnhandledProbs;
 								  // If the default statement is a target of the jump table, we evenly
 								  // distribute the default probability to successors of CurMBB. Also
 								  // update the probability on the edge from JumpMBB to Fallthrough.
 								  for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
 								                                        SE = JumpMBB->succ_end();
 								       SI != SE; ++SI) {
 								    if (*SI == DefaultMBB) {
 								      JumpProb += DefaultProb / 2;
 								      FallthroughProb -= DefaultProb / 2;
 								      JumpMBB->setSuccProbability(SI, DefaultProb / 2);
 								      JumpMBB->normalizeSuccProbs();
-												[GlobalISel][IRTranslator] Fix some PHI bugs related to jump tables when optimizations are used.

The new switch lowering code that tries to generate jump tables and range checks
were tested at -O0 on arm64, but on -O3 the generic switch lowering code goes to
town on trying to generate optimized lowerings, e.g. multiple jump tables, range
checks etc. This exposed bugs in the way PHI nodes are handled because the CFG
looks even stranger after all of this is done.

llvm-svn: 364613

											
										
										
											2019-06-28 07:56:34 +08:00
+								    } else {
 								      // Also record edges from the jump table block to it's successors.
 								      addMachineCFGPred({SwitchMBB->getBasicBlock(), (*SI)->getBasicBlock()},
 								                        JumpMBB);
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								    }
 								  }
 								  // Skip the range check if the fallthrough block is unreachable.
 								  if (FallthroughUnreachable)
 								    JTH->OmitRangeCheck = true;
 								  if (!JTH->OmitRangeCheck)
 								    addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
 								  addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
 								  CurMBB->normalizeSuccProbs();
 								  // The jump table header will be inserted in our current block, do the
 								  // range check, and fall through to our fallthrough block.
 								  JTH->HeaderBB = CurMBB;
 								  JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
 								  // If we're in the right place, emit the jump table header right now.
 								  if (CurMBB == SwitchMBB) {
-												[GlobalISel][IRTranslator] Fix some PHI bugs related to jump tables when optimizations are used.

The new switch lowering code that tries to generate jump tables and range checks
were tested at -O0 on arm64, but on -O3 the generic switch lowering code goes to
town on trying to generate optimized lowerings, e.g. multiple jump tables, range
checks etc. This exposed bugs in the way PHI nodes are handled because the CFG
looks even stranger after all of this is done.

llvm-svn: 364613

											
										
										
											2019-06-28 07:56:34 +08:00
+								    if (!emitJumpTableHeader(*JT, *JTH, CurMBB))
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								      return false;
 								    JTH->Emitted = true;
 								  }
 								  return true;
 								}
 								bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
 								                                            Value *Cond,
 								                                            MachineBasicBlock *Fallthrough,
 								                                            bool FallthroughUnreachable,
 								                                            BranchProbability UnhandledProbs,
 								                                            MachineBasicBlock *CurMBB,
 								                                            MachineIRBuilder &MIB,
 								                                            MachineBasicBlock *SwitchMBB) {
 								  using namespace SwitchCG;
 								  const Value *RHS, *LHS, *MHS;
 								  CmpInst::Predicate Pred;
 								  if (I->Low == I->High) {
 								    // Check Cond == I->Low.
 								    Pred = CmpInst::ICMP_EQ;
 								    LHS = Cond;
 								    RHS = I->Low;
 								    MHS = nullptr;
 								  } else {
 								    // Check I->Low <= Cond <= I->High.
-												[GlobalISel][IRTranslator] Fix switch table lowering to use signed LE not unsigned.

We were miscompiling switch value comparisons with the wrong signedness, which
shows up when we have things like switch case values with i1 types, which end up
being legalized incorrectly.

Fixes PR43383

llvm-svn: 372675

											
										
										
											2019-09-24 08:09:23 +08:00
+								    Pred = CmpInst::ICMP_SLE;
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								    LHS = I->Low;
 								    MHS = Cond;
 								    RHS = I->High;
 								  }
 								  // If Fallthrough is unreachable, fold away the comparison.
 								  // The false probability is the sum of all unhandled cases.
 								  CaseBlock CB(Pred, FallthroughUnreachable, LHS, RHS, MHS, I->MBB, Fallthrough,
 								               CurMBB, MIB.getDebugLoc(), I->Prob, UnhandledProbs);
 								  emitSwitchCase(CB, SwitchMBB, MIB);
 								  return true;
 								}
-												[GlobalISel] Implement bit-test switch table optimization.

This is mostly a straight port from SelectionDAG. We re-use the actual bit-test
analysis part from SwitchLoweringUtils, which was factored out earlier to
support jump-tables.

Differential Revision: https://reviews.llvm.org/D85233

											
										
										
											2020-08-05 01:55:27 +08:00
+								void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
 								                                     MachineBasicBlock *SwitchBB) {
 								  MachineIRBuilder &MIB = *CurBuilder;
 								  MIB.setMBB(*SwitchBB);
 								  // Subtract the minimum value.
 								  Register SwitchOpReg = getOrCreateVReg(*B.SValue);
 								  LLT SwitchOpTy = MRI->getType(SwitchOpReg);
 								  Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
 								  auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);
 								  // Ensure that the type will fit the mask value.
 								  LLT MaskTy = SwitchOpTy;
 								  for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
 								    if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
 								      // Switch table case range are encoded into series of masks.
 								      // Just use pointer type, it's guaranteed to fit.
 								      MaskTy = LLT::scalar(64);
 								      break;
 								    }
 								  }
 								  Register SubReg = RangeSub.getReg(0);
 								  if (SwitchOpTy != MaskTy)
 								    SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0);
 								  B.RegVT = getMVTForLLT(MaskTy);
 								  B.Reg = SubReg;
 								  MachineBasicBlock *MBB = B.Cases[0].ThisBB;
 								  if (!B.OmitRangeCheck)
 								    addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
 								  addSuccessorWithProb(SwitchBB, MBB, B.Prob);
 								  SwitchBB->normalizeSuccProbs();
 								  if (!B.OmitRangeCheck) {
 								    // Conditional branch to the default block.
 								    auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
 								    auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1),
 								                                  RangeSub, RangeCst);
 								    MIB.buildBrCond(RangeCmp, *B.Default);
 								  }
 								  // Avoid emitting unnecessary branches to the next block.
 								  if (MBB != SwitchBB->getNextNode())
 								    MIB.buildBr(*MBB);
 								}
 								void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
 								                                   MachineBasicBlock *NextMBB,
 								                                   BranchProbability BranchProbToNext,
 								                                   Register Reg, SwitchCG::BitTestCase &B,
 								                                   MachineBasicBlock *SwitchBB) {
 								  MachineIRBuilder &MIB = *CurBuilder;
 								  MIB.setMBB(*SwitchBB);
 								  LLT SwitchTy = getLLTForMVT(BB.RegVT);
 								  Register Cmp;
 								  unsigned PopCount = countPopulation(B.Mask);
 								  if (PopCount == 1) {
 								    // Testing for a single bit; just compare the shift count with what it
 								    // would need to be to shift a 1 bit in that position.
 								    auto MaskTrailingZeros =
 								        MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask));
 								    Cmp =
 								        MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros)
 								            .getReg(0);
 								  } else if (PopCount == BB.Range) {
 								    // There is only one zero bit in the range, test for it directly.
 								    auto MaskTrailingOnes =
 								        MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask));
 								    Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes)
 								              .getReg(0);
 								  } else {
 								    // Make desired shift.
 								    auto CstOne = MIB.buildConstant(SwitchTy, 1);
 								    auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg);
 								    // Emit bit tests and jumps.
 								    auto CstMask = MIB.buildConstant(SwitchTy, B.Mask);
 								    auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask);
 								    auto CstZero = MIB.buildConstant(SwitchTy, 0);
 								    Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero)
 								              .getReg(0);
 								  }
 								  // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
 								  addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
 								  // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
 								  addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
 								  // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
 								  // one as they are relative probabilities (and thus work more like weights),
 								  // and hence we need to normalize them to let the sum of them become one.
 								  SwitchBB->normalizeSuccProbs();
 								  // Record the fact that the IR edge from the header to the bit test target
 								  // will go through our new block. Neeeded for PHIs to have nodes added.
 								  addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()},
 								                    SwitchBB);
 								  MIB.buildBrCond(Cmp, *B.TargetBB);
 								  // Avoid emitting unnecessary branches to the next block.
 								  if (NextMBB != SwitchBB->getNextNode())
 								    MIB.buildBr(*NextMBB);
 								}
 								bool IRTranslator::lowerBitTestWorkItem(
 								    SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
 								    MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
 								    MachineIRBuilder &MIB, MachineFunction::iterator BBI,
 								    BranchProbability DefaultProb, BranchProbability UnhandledProbs,
 								    SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough,
 								    bool FallthroughUnreachable) {
 								  using namespace SwitchCG;
 								  MachineFunction *CurMF = SwitchMBB->getParent();
 								  // FIXME: Optimize away range check based on pivot comparisons.
 								  BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
 								  // The bit test blocks haven't been inserted yet; insert them here.
 								  for (BitTestCase &BTC : BTB->Cases)
 								    CurMF->insert(BBI, BTC.ThisBB);
 								  // Fill in fields of the BitTestBlock.
 								  BTB->Parent = CurMBB;
 								  BTB->Default = Fallthrough;
 								  BTB->DefaultProb = UnhandledProbs;
 								  // If the cases in bit test don't form a contiguous range, we evenly
 								  // distribute the probability on the edge to Fallthrough to two
 								  // successors of CurMBB.
 								  if (!BTB->ContiguousRange) {
 								    BTB->Prob += DefaultProb / 2;
 								    BTB->DefaultProb -= DefaultProb / 2;
 								  }
 								  if (FallthroughUnreachable) {
 								    // Skip the range check if the fallthrough block is unreachable.
 								    BTB->OmitRangeCheck = true;
 								  }
 								  // If we're in the right place, emit the bit test header right now.
 								  if (CurMBB == SwitchMBB) {
 								    emitBitTestHeader(*BTB, SwitchMBB);
 								    BTB->Emitted = true;
 								  }
 								  return true;
 								}
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
 								                                       Value *Cond,
 								                                       MachineBasicBlock *SwitchMBB,
 								                                       MachineBasicBlock *DefaultMBB,
 								                                       MachineIRBuilder &MIB) {
 								  using namespace SwitchCG;
 								  MachineFunction *CurMF = FuncInfo.MF;
 								  MachineBasicBlock *NextMBB = nullptr;
 								  MachineFunction::iterator BBI(W.MBB);
 								  if (++BBI != FuncInfo.MF->end())
 								    NextMBB = &*BBI;
 								  if (EnableOpts) {
 								    // Here, we order cases by probability so the most likely case will be
 								    // checked first. However, two clusters can have the same probability in
 								    // which case their relative ordering is non-deterministic. So we use Low
 								    // as a tie-breaker as clusters are guaranteed to never overlap.
 								    llvm::sort(W.FirstCluster, W.LastCluster + 1,
 								               [](const CaseCluster &a, const CaseCluster &b) {
 								                 return a.Prob != b.Prob
 								                            ? a.Prob > b.Prob
 								                            : a.Low->getValue().slt(b.Low->getValue());
 								               });
 								    // Rearrange the case blocks so that the last one falls through if possible
 								    // without changing the order of probabilities.
 								    for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster;) {
 								      --I;
 								      if (I->Prob > W.LastCluster->Prob)
 								        break;
 								      if (I->Kind == CC_Range && I->MBB == NextMBB) {
 								        std::swap(*I, *W.LastCluster);
 								        break;
 								      }
 								    }
 								  }
 								  // Compute total probability.
 								  BranchProbability DefaultProb = W.DefaultProb;
 								  BranchProbability UnhandledProbs = DefaultProb;
 								  for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
 								    UnhandledProbs += I->Prob;
 								  MachineBasicBlock *CurMBB = W.MBB;
 								  for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
 								    bool FallthroughUnreachable = false;
 								    MachineBasicBlock *Fallthrough;
 								    if (I == W.LastCluster) {
 								      // For the last cluster, fall through to the default destination.
 								      Fallthrough = DefaultMBB;
 								      FallthroughUnreachable = isa<UnreachableInst>(
 								          DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
 								    } else {
 								      Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
 								      CurMF->insert(BBI, Fallthrough);
 								    }
 								    UnhandledProbs -= I->Prob;
 								    switch (I->Kind) {
 								    case CC_BitTests: {
-												[GlobalISel] Implement bit-test switch table optimization.

This is mostly a straight port from SelectionDAG. We re-use the actual bit-test
analysis part from SwitchLoweringUtils, which was factored out earlier to
support jump-tables.

Differential Revision: https://reviews.llvm.org/D85233

											
										
										
											2020-08-05 01:55:27 +08:00
+								      if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
 								                                DefaultProb, UnhandledProbs, I, Fallthrough,
 								                                FallthroughUnreachable)) {
 								        LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch");
 								        return false;
 								      }
 								      break;
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								    }
-												[GlobalISel] Implement bit-test switch table optimization.

This is mostly a straight port from SelectionDAG. We re-use the actual bit-test
analysis part from SwitchLoweringUtils, which was factored out earlier to
support jump-tables.

Differential Revision: https://reviews.llvm.org/D85233

											
										
										
											2020-08-05 01:55:27 +08:00
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								    case CC_JumpTable: {
-												[GlobalISel][IRTranslator] Fix some PHI bugs related to jump tables when optimizations are used.

The new switch lowering code that tries to generate jump tables and range checks
were tested at -O0 on arm64, but on -O3 the generic switch lowering code goes to
town on trying to generate optimized lowerings, e.g. multiple jump tables, range
checks etc. This exposed bugs in the way PHI nodes are handled because the CFG
looks even stranger after all of this is done.

llvm-svn: 364613

											
										
										
											2019-06-28 07:56:34 +08:00
+								      if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								                                  UnhandledProbs, I, Fallthrough,
 								                                  FallthroughUnreachable)) {
 								        LLVM_DEBUG(dbgs() << "Failed to lower jump table");
 								        return false;
 								      }
 								      break;
 								    }
 								    case CC_Range: {
 								      if (!lowerSwitchRangeWorkItem(I, Cond, Fallthrough,
 								                                    FallthroughUnreachable, UnhandledProbs,
 								                                    CurMBB, MIB, SwitchMBB)) {
 								        LLVM_DEBUG(dbgs() << "Failed to lower switch range");
 								        return false;
 								      }
 								      break;
 								    }
 								    }
 								    CurMBB = Fallthrough;
 								  }
-												[GlobalISel] Add support for switch statements

This commit does this using a trivial chain of conditional branches.  In the
future, we probably want to reuse the optimized switch lowering used in
SelectionDAG.

Differential Revision: https://reviews.llvm.org/D28176

llvm-svn: 291099

											
										
										
											2017-01-05 19:28:51 +08:00
 								  return true;
 								}
-												[GlobalISel] Add support for indirectbr

Differential Revision: https://reviews.llvm.org/D28079

llvm-svn: 293470

											
										
										
											2017-01-30 17:13:18 +08:00
+								bool IRTranslator::translateIndirectBr(const User &U,
 								                                       MachineIRBuilder &MIRBuilder) {
 								  const IndirectBrInst &BrInst = cast<IndirectBrInst>(U);
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  const Register Tgt = getOrCreateVReg(*BrInst.getAddress());
-												[GlobalISel] Add support for indirectbr

Differential Revision: https://reviews.llvm.org/D28079

llvm-svn: 293470

											
										
										
											2017-01-30 17:13:18 +08:00
+								  MIRBuilder.buildBrIndirect(Tgt);
 								  // Link successors.
-												[GlobalISel] Don't add duplicate successors to MBBs when translating indirectbr

This fixes a verifier failure on a bot:

http://green.lab.llvm.org/green/job/test-suite-verify-machineinstrs-aarch64-O0-g/

```
*** Bad machine code: MBB has duplicate entries in its successor list. ***
- function:    foo
- basic block: %bb.5 indirectgoto (0x7fe3d687ca08)
```

One of the GCC torture suite tests (pr70460.c) has an indirectbr instruction
which has duplicate blocks in its destination list.

According to the langref this is allowed:

> Blocks are allowed to occur multiple times in the destination list, though
> this isn’t particularly useful.
(https://www.llvm.org/docs/LangRef.html#indirectbr-instruction)

We don't allow this in MIR. So, when we translate such an instruction, the
verifier screams.

This patch makes `translateIndirectBr` check if a successor has already been
added to a block. If the successor is present, it is skipped rather than added
twice.

Differential Revision: https://reviews.llvm.org/D79609

											
										
										
											2020-05-08 07:25:34 +08:00
+								  SmallPtrSet<const BasicBlock *, 32> AddedSuccessors;
-												[GlobalISel] Add support for indirectbr

Differential Revision: https://reviews.llvm.org/D28079

llvm-svn: 293470

											
										
										
											2017-01-30 17:13:18 +08:00
+								  MachineBasicBlock &CurBB = MIRBuilder.getMBB();
-												[GlobalISel] Don't add duplicate successors to MBBs when translating indirectbr

This fixes a verifier failure on a bot:

http://green.lab.llvm.org/green/job/test-suite-verify-machineinstrs-aarch64-O0-g/

```
*** Bad machine code: MBB has duplicate entries in its successor list. ***
- function:    foo
- basic block: %bb.5 indirectgoto (0x7fe3d687ca08)
```

One of the GCC torture suite tests (pr70460.c) has an indirectbr instruction
which has duplicate blocks in its destination list.

According to the langref this is allowed:

> Blocks are allowed to occur multiple times in the destination list, though
> this isn’t particularly useful.
(https://www.llvm.org/docs/LangRef.html#indirectbr-instruction)

We don't allow this in MIR. So, when we translate such an instruction, the
verifier screams.

This patch makes `translateIndirectBr` check if a successor has already been
added to a block. If the successor is present, it is skipped rather than added
twice.

Differential Revision: https://reviews.llvm.org/D79609

											
										
										
											2020-05-08 07:25:34 +08:00
+								  for (const BasicBlock *Succ : successors(&BrInst)) {
 								    // It's legal for indirectbr instructions to have duplicate blocks in the
 								    // destination list. We don't allow this in MIR. Skip anything that's
 								    // already a successor.
 								    if (!AddedSuccessors.insert(Succ).second)
 								      continue;
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								    CurBB.addSuccessor(&getMBB(*Succ));
-												[GlobalISel] Don't add duplicate successors to MBBs when translating indirectbr

This fixes a verifier failure on a bot:

http://green.lab.llvm.org/green/job/test-suite-verify-machineinstrs-aarch64-O0-g/

```
*** Bad machine code: MBB has duplicate entries in its successor list. ***
- function:    foo
- basic block: %bb.5 indirectgoto (0x7fe3d687ca08)
```

One of the GCC torture suite tests (pr70460.c) has an indirectbr instruction
which has duplicate blocks in its destination list.

According to the langref this is allowed:

> Blocks are allowed to occur multiple times in the destination list, though
> this isn’t particularly useful.
(https://www.llvm.org/docs/LangRef.html#indirectbr-instruction)

We don't allow this in MIR. So, when we translate such an instruction, the
verifier screams.

This patch makes `translateIndirectBr` check if a successor has already been
added to a block. If the successor is present, it is skipped rather than added
twice.

Differential Revision: https://reviews.llvm.org/D79609

											
										
										
											2020-05-08 07:25:34 +08:00
+								  }
-												[GlobalISel] Add support for indirectbr

Differential Revision: https://reviews.llvm.org/D28079

llvm-svn: 293470

											
										
										
											2017-01-30 17:13:18 +08:00
 								  return true;
 								}
-												GlobalISel: support swifterror attribute on AArch64.

swifterror marks an argument as a register pretending to be a pointer, so we
need a guaranteed mem2reg-like analysis of its uses. Fortunately most of the
infrastructure can be reused from the DAG world.

llvm-svn: 361608

											
										
										
											2019-05-24 16:40:13 +08:00
+								static bool isSwiftError(const Value *V) {
 								  if (auto Arg = dyn_cast<Argument>(V))
 								    return Arg->hasSwiftErrorAttr();
 								  if (auto AI = dyn_cast<AllocaInst>(V))
 								    return AI->isSwiftError();
 								  return false;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								  const LoadInst &LI = cast<LoadInst>(U);
-												[GlobalISel][IRTranslator] Fix crash during translation of zero sized loads/stores/args/returns.

This fixes PR35358.

rdar://35619533

Differential Revision: https://reviews.llvm.org/D40604

llvm-svn: 319465

											
										
										
											2017-12-01 04:06:02 +08:00
+								  if (DL->getTypeStoreSize(LI.getType()) == 0)
 								    return true;
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								  ArrayRef<Register> Regs = getOrCreateVRegs(LI);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI);
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register Base = getOrCreateVReg(*LI.getPointerOperand());
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
-												[IRTranslator] Don't hardcode GEP index type

When breaking up loads and stores of aggregates, the IRTranslator uses
LLT::scalar(64) for the index type of the G_GEP instructions that
compute the addresses. This is unnecessarily large for 32-bit targets.
Use the int ptr type provided by the DataLayout instead.

Note that we're already doing the right thing when translating
getelementptr instructions from the IR. This is just an oversight when
generating new ones while translating loads/stores.

Both x86 and AArch64 already have tests confirming that the old
behaviour is preserved for 64-bit targets.

Differential Revision: https://reviews.llvm.org/D61852

llvm-svn: 360656

											
										
										
											2019-05-14 17:25:17 +08:00
+								  Type *OffsetIRTy = DL->getIntPtrType(LI.getPointerOperandType());
 								  LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
-												GlobalISel: support swifterror attribute on AArch64.

swifterror marks an argument as a register pretending to be a pointer, so we
need a guaranteed mem2reg-like analysis of its uses. Fortunately most of the
infrastructure can be reused from the DAG world.

llvm-svn: 361608

											
										
										
											2019-05-24 16:40:13 +08:00
+								  if (CLI->supportSwiftError() && isSwiftError(LI.getPointerOperand())) {
 								    assert(Regs.size() == 1 && "swifterror should be single pointer");
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								    Register VReg = SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(),
-												GlobalISel: support swifterror attribute on AArch64.

swifterror marks an argument as a register pretending to be a pointer, so we
need a guaranteed mem2reg-like analysis of its uses. Fortunately most of the
infrastructure can be reused from the DAG world.

llvm-svn: 361608

											
										
										
											2019-05-24 16:40:13 +08:00
+								                                                    LI.getPointerOperand());
 								    MIRBuilder.buildCopy(Regs[0], VReg);
 								    return true;
 								  }
-												GlobalISel: Preserve load/store metadata in IRTranslator

This was dropping the invariant metadata on dead argument loads, so
they weren't deleted.

Atomics still need to be fixed the same way. Also, apparently store
was never preserving dereferencable which should also be fixed.

											
										
										
											2020-01-13 03:10:42 +08:00
+								  auto &TLI = *MF->getSubtarget().getTargetLowering();
 								  MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL);
-												[GISel]: Attach missing range metadata while translating G_LOADs

https://reviews.llvm.org/D65048

Attach range information to G_LOAD when only defining one register.

reviewed by: arsenm

llvm-svn: 366656

											
										
										
											2019-07-21 22:07:54 +08:00
+								  const MDNode *Ranges =
 								      Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr;
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  for (unsigned i = 0; i < Regs.size(); ++i) {
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								    Register Addr;
-												[globalisel] Rename G_GEP to G_PTR_ADD

Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD

Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm

Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69734

											
										
										
											2019-11-02 04:18:00 +08:00
+								    MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
 								    MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
-												[Alignment][NFC] Transitionning more getMachineMemOperand call sites

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77127

											
										
										
											2020-03-31 17:43:50 +08:00
+								    Align BaseAlign = getMemOpAlign(LI);
-												[globalisel][irtanslator] The IRTranslator should preserve TBAA information

											
										
										
											2019-11-15 04:11:00 +08:00
+								    AAMDNodes AAMetadata;
 								    LI.getAAMetadata(AAMetadata);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    auto MMO = MF->getMachineMemOperand(
-												[Alignment][NFC] Transitionning more getMachineMemOperand call sites

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77127

											
										
										
											2020-03-31 17:43:50 +08:00
+								        Ptr, Flags, MRI->getType(Regs[i]).getSizeInBytes(),
 								        commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges,
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								        LI.getSyncScopeID(), LI.getOrdering());
 								    MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
 								  }
-												Recommit: [globalisel] Change LLT constructor string into an LLT-based object that knows how to generate it.

Summary:
This will allow future patches to inspect the details of the LLT. The implementation is now split between
the Support and CodeGen libraries to allow TableGen to use this class without introducing layering concerns.

Thanks to Ahmed Bougacha for finding a reasonable way to avoid the layering issue and providing the version of this patch without that problem.

The problem with the previous commit appears to have been that TableGen was including CodeGen/LowLevelType.h instead of Support/LowLevelTypeImpl.h.

Reviewers: t.p.northover, qcolombet, rovka, aditya_nandakumar, ab, javed.absar

Subscribers: arsenm, nhaehnle, mgorny, dberris, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30046

llvm-svn: 297241

											
										
										
											2017-03-08 07:20:35 +08:00
-												GlobalISel: add generic load and store instructions.

Pretty straightforward, the only oddity is the MachineMemOperand (which it's
surprisingly difficult to share code for).

llvm-svn: 276799

											
										
										
											2016-07-27 04:23:26 +08:00
+								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								  const StoreInst &SI = cast<StoreInst>(U);
-												[GlobalISel][IRTranslator] Fix crash during translation of zero sized loads/stores/args/returns.

This fixes PR35358.

rdar://35619533

Differential Revision: https://reviews.llvm.org/D40604

llvm-svn: 319465

											
										
										
											2017-12-01 04:06:02 +08:00
+								  if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
 								    return true;
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								  ArrayRef<Register> Vals = getOrCreateVRegs(*SI.getValueOperand());
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register Base = getOrCreateVReg(*SI.getPointerOperand());
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
-												[IRTranslator] Don't hardcode GEP index type

When breaking up loads and stores of aggregates, the IRTranslator uses
LLT::scalar(64) for the index type of the G_GEP instructions that
compute the addresses. This is unnecessarily large for 32-bit targets.
Use the int ptr type provided by the DataLayout instead.

Note that we're already doing the right thing when translating
getelementptr instructions from the IR. This is just an oversight when
generating new ones while translating loads/stores.

Both x86 and AArch64 already have tests confirming that the old
behaviour is preserved for 64-bit targets.

Differential Revision: https://reviews.llvm.org/D61852

llvm-svn: 360656

											
										
										
											2019-05-14 17:25:17 +08:00
+								  Type *OffsetIRTy = DL->getIntPtrType(SI.getPointerOperandType());
 								  LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
-												GlobalISel: support swifterror attribute on AArch64.

swifterror marks an argument as a register pretending to be a pointer, so we
need a guaranteed mem2reg-like analysis of its uses. Fortunately most of the
infrastructure can be reused from the DAG world.

llvm-svn: 361608

											
										
										
											2019-05-24 16:40:13 +08:00
+								  if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) {
 								    assert(Vals.size() == 1 && "swifterror should be single pointer");
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								    Register VReg = SwiftError.getOrCreateVRegDefAt(&SI, &MIRBuilder.getMBB(),
-												GlobalISel: support swifterror attribute on AArch64.

swifterror marks an argument as a register pretending to be a pointer, so we
need a guaranteed mem2reg-like analysis of its uses. Fortunately most of the
infrastructure can be reused from the DAG world.

llvm-svn: 361608

											
										
										
											2019-05-24 16:40:13 +08:00
+								                                                    SI.getPointerOperand());
 								    MIRBuilder.buildCopy(VReg, Vals[0]);
 								    return true;
 								  }
-												GlobalISel: Preserve load/store metadata in IRTranslator

This was dropping the invariant metadata on dead argument loads, so
they weren't deleted.

Atomics still need to be fixed the same way. Also, apparently store
was never preserving dereferencable which should also be fixed.

											
										
										
											2020-01-13 03:10:42 +08:00
+								  auto &TLI = *MF->getSubtarget().getTargetLowering();
 								  MachineMemOperand::Flags Flags = TLI.getStoreMemOperandFlags(SI, *DL);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  for (unsigned i = 0; i < Vals.size(); ++i) {
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								    Register Addr;
-												[globalisel] Rename G_GEP to G_PTR_ADD

Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD

Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm

Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69734

											
										
										
											2019-11-02 04:18:00 +08:00
+								    MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
 								    MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
-												[Alignment][NFC] Transitionning more getMachineMemOperand call sites

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77127

											
										
										
											2020-03-31 17:43:50 +08:00
+								    Align BaseAlign = getMemOpAlign(SI);
-												[globalisel][irtanslator] The IRTranslator should preserve TBAA information

											
										
										
											2019-11-15 04:11:00 +08:00
+								    AAMDNodes AAMetadata;
 								    SI.getAAMetadata(AAMetadata);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    auto MMO = MF->getMachineMemOperand(
-												[Alignment][NFC] Transitionning more getMachineMemOperand call sites

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77127

											
										
										
											2020-03-31 17:43:50 +08:00
+								        Ptr, Flags, MRI->getType(Vals[i]).getSizeInBytes(),
 								        commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr,
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								        SI.getSyncScopeID(), SI.getOrdering());
 								    MIRBuilder.buildStore(Vals[i], Addr, *MMO);
 								  }
-												GlobalISel: add generic load and store instructions.

Pretty straightforward, the only oddity is the MachineMemOperand (which it's
surprisingly difficult to share code for).

llvm-svn: 276799

											
										
										
											2016-07-27 04:23:26 +08:00
+								  return true;
 								}
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) {
-												GlobalISel: fix insert/extract to work on ConstantExprs too.

No tests yet unfortunately (ConstantFolding reduces all supported constants to
ConstantInts before we get to translation). Soon.

llvm-svn: 279308

											
										
										
											2016-08-20 04:09:03 +08:00
+								  const Value *Src = U.getOperand(0);
 								  Type *Int32Ty = Type::getInt32Ty(U.getContext());
-												[GlobalISel] IRTranslator: Translate ConstantStruct

Reviewers: qcolombet, ab, t.p.northover, aditya_nandakumar, dsanders

Reviewed By: qcolombet

Subscribers: rovka, kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D33317

llvm-svn: 303412

											
										
										
											2017-05-19 17:47:02 +08:00
-												GlobalISel: support translation of extractvalue instructions.

llvm-svn: 279285

											
										
										
											2016-08-20 01:47:05 +08:00
+								  // getIndexedOffsetInType is designed for GEPs, so the first index is the
 								  // usual array element rather than looking into the actual aggregate.
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  SmallVector<Value *, 1> Indices;
-												GlobalISel: support translation of extractvalue instructions.

llvm-svn: 279285

											
										
										
											2016-08-20 01:47:05 +08:00
+								  Indices.push_back(ConstantInt::get(Int32Ty, 0));
-												GlobalISel: fix insert/extract to work on ConstantExprs too.

No tests yet unfortunately (ConstantFolding reduces all supported constants to
ConstantInts before we get to translation). Soon.

llvm-svn: 279308

											
										
										
											2016-08-20 04:09:03 +08:00
 								  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) {
 								    for (auto Idx : EVI->indices())
 								      Indices.push_back(ConstantInt::get(Int32Ty, Idx));
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) {
 								    for (auto Idx : IVI->indices())
 								      Indices.push_back(ConstantInt::get(Int32Ty, Idx));
-												GlobalISel: fix insert/extract to work on ConstantExprs too.

No tests yet unfortunately (ConstantFolding reduces all supported constants to
ConstantInts before we get to translation). Soon.

llvm-svn: 279308

											
										
										
											2016-08-20 04:09:03 +08:00
+								  } else {
 								    for (unsigned i = 1; i < U.getNumOperands(); ++i)
 								      Indices.push_back(U.getOperand(i));
 								  }
-												GlobalISel: support translation of extractvalue instructions.

llvm-svn: 279285

											
										
										
											2016-08-20 01:47:05 +08:00
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  return 8 * static_cast<uint64_t>(
 								                 DL.getIndexedOffsetInType(Src->getType(), Indices));
 								}
-												GlobalISel: support translation of extractvalue instructions.

llvm-svn: 279285

											
										
										
											2016-08-20 01:47:05 +08:00
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								bool IRTranslator::translateExtractValue(const User &U,
 								                                         MachineIRBuilder &MIRBuilder) {
 								  const Value *Src = U.getOperand(0);
 								  uint64_t Offset = getOffsetFromIndices(U, *DL);
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								  ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src);
-												Use llvm::lower_bound. NFC

This reapplies rL358161. That commit inadvertently reverted an exegesis file to an old version.

llvm-svn: 358246

											
										
										
											2019-04-12 10:02:06 +08:00
+								  unsigned Idx = llvm::lower_bound(Offsets, Offset) - Offsets.begin();
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  auto &DstRegs = allocateVRegs(U);
 								  for (unsigned i = 0; i < DstRegs.size(); ++i)
 								    DstRegs[i] = SrcRegs[Idx++];
-												GlobalISel: support translation of extractvalue instructions.

llvm-svn: 279285

											
										
										
											2016-08-20 01:47:05 +08:00
 								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateInsertValue(const User &U,
 								                                        MachineIRBuilder &MIRBuilder) {
-												GlobalISel: fix insert/extract to work on ConstantExprs too.

No tests yet unfortunately (ConstantFolding reduces all supported constants to
ConstantInts before we get to translation). Soon.

llvm-svn: 279308

											
										
										
											2016-08-20 04:09:03 +08:00
+								  const Value *Src = U.getOperand(0);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  uint64_t Offset = getOffsetFromIndices(U, *DL);
 								  auto &DstRegs = allocateVRegs(U);
 								  ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U);
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								  ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
 								  ArrayRef<Register> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  auto InsertedIt = InsertedRegs.begin();
 								  for (unsigned i = 0; i < DstRegs.size(); ++i) {
 								    if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end())
 								      DstRegs[i] = *InsertedIt++;
 								    else
 								      DstRegs[i] = SrcRegs[i];
-												GlobalISel: fix insert/extract to work on ConstantExprs too.

No tests yet unfortunately (ConstantFolding reduces all supported constants to
ConstantInts before we get to translation). Soon.

llvm-svn: 279308

											
										
										
											2016-08-20 04:09:03 +08:00
+								  }
-												GlobalISel: translate insertvalue instructions.

This adds a G_INSERT instruction, which technically makes G_SEQUENCE redundant
(it's equivalent to a G_INSERT into an IMPLICIT_DEF). We'll leave G_SEQUENCE
for now though: it's likely to be far more common as it's a fundamental part of
legalization, so avoiding the mess and bloat of the extra IMPLICIT_DEFs is
probably worthwhile.

llvm-svn: 279306

											
										
										
											2016-08-20 04:08:55 +08:00
 								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateSelect(const User &U,
 								                                   MachineIRBuilder &MIRBuilder) {
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register Tst = getOrCreateVReg(*U.getOperand(0));
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								  ArrayRef<Register> ResRegs = getOrCreateVRegs(U);
 								  ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
 								  ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
-												Move IR flag handling directly into builder calls for cases translated from Instructions in GlobalIsel

Reviewers: aditya_nandakumar, volkan

Reviewed By: aditya_nandakumar

Subscribers: rovka, kristof.beyls, volkan, Petar.Avramovic

Differential Revision: https://reviews.llvm.org/D57630

llvm-svn: 353336

											
										
										
											2019-02-07 03:57:06 +08:00
+								  uint16_t Flags = 0;
-												GlobalISel: Copy correct flags to select

This was looking for a compare condition, and copying the compare
flags. I don't think this was ever correct outside of certain min/max
patterns which aren't checked, but this probably predates select
instructions having fast math flags.

											
										
										
											2020-05-19 21:27:25 +08:00
+								  if (const SelectInst *SI = dyn_cast<SelectInst>(&U))
 								    Flags = MachineInstr::copyFlagsFromInstruction(*SI);
-												Move IR flag handling directly into builder calls for cases translated from Instructions in GlobalIsel

Reviewers: aditya_nandakumar, volkan

Reviewed By: aditya_nandakumar

Subscribers: rovka, kristof.beyls, volkan, Petar.Avramovic

Differential Revision: https://reviews.llvm.org/D57630

llvm-svn: 353336

											
										
										
											2019-02-07 03:57:06 +08:00
-												Add FMF management to common fp intrinsics in GlobalIsel

Summary: This the initial code change to facilitate managing FMF flags from Instructions to MI wrt Intrinsics in Global Isel.  Eventually the GlobalObserver interface will be added as well, where FMF additions can be tracked for the builder and CSE.

Reviewers: aditya_nandakumar, bogner

Reviewed By: bogner

Subscribers: rovka, kristof.beyls, javed.absar

Differential Revision: https://reviews.llvm.org/D55668

llvm-svn: 349514

											
										
										
											2018-12-19 01:54:52 +08:00
+								  for (unsigned i = 0; i < ResRegs.size(); ++i) {
-												[GlobalISel] Use more MachineIRBuilder helper methods

Reviewers: arsenm, nhaehnle

Subscribers: wdng, rovka, hiraditya, volkan, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D72833

											
										
										
											2020-01-16 20:09:48 +08:00
+								    MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i], Flags);
-												Add FMF management to common fp intrinsics in GlobalIsel

Summary: This the initial code change to facilitate managing FMF flags from Instructions to MI wrt Intrinsics in Global Isel.  Eventually the GlobalObserver interface will be added as well, where FMF additions can be tracked for the builder and CSE.

Reviewers: aditya_nandakumar, bogner

Reviewed By: bogner

Subscribers: rovka, kristof.beyls, javed.absar

Differential Revision: https://reviews.llvm.org/D55668

llvm-svn: 349514

											
										
										
											2018-12-19 01:54:52 +08:00
+								  }
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
-												GlobalISel: support translating select instructions.

llvm-svn: 279309

											
										
										
											2016-08-20 04:09:07 +08:00
+								  return true;
 								}
-												[GlobalISel][IRTranslator] New helper function translateCopy. NFC.

Reviewers: arsenm, volkan, t.p.northover, aditya_nandakumar

Subscribers: wdng, rovka, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78377

											
										
										
											2020-04-17 21:36:01 +08:00
+								bool IRTranslator::translateCopy(const User &U, const Value &V,
 								                                 MachineIRBuilder &MIRBuilder) {
 								  Register Src = getOrCreateVReg(V);
 								  auto &Regs = *VMap.getVRegs(U);
 								  if (Regs.empty()) {
 								    Regs.push_back(Src);
 								    VMap.getOffsets(U)->push_back(0);
 								  } else {
 								    // If we already assigned a vreg for this instruction, we can't change that.
 								    // Emit a copy to satisfy the users we already emitted.
 								    MIRBuilder.buildCopy(Regs[0], Src);
 								  }
 								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateBitCast(const User &U,
 								                                    MachineIRBuilder &MIRBuilder) {
-												[GlobalISel] Avoid invalidating ValToVReg when translating no-op bitcast.

When we translate a no-op (same type) bitcast, we try to be clever and
only emit a COPY if we already assigned a vreg to the defined value.
However, when we didn't, we tried to assign to a reference into the
ValToVReg DenseMap, even though the RHS of the assignment
(getOrCreateVReg) could potentially grow that DenseMap, invalidating the
reference.

Avoid that by getting the source vreg first.
I audited the rest of the translator; this is the only tricky case.

The test is quite unwieldy, as the problem is caused by the DenseMap
growing, which happens after the 47th mapped value.

llvm-svn: 297208

											
										
										
											2017-03-08 04:53:06 +08:00
+								  // If we're bitcasting to the source type, we can reuse the source vreg.
-												Recommit: [globalisel] Change LLT constructor string into an LLT-based object that knows how to generate it.

Summary:
This will allow future patches to inspect the details of the LLT. The implementation is now split between
the Support and CodeGen libraries to allow TableGen to use this class without introducing layering concerns.

Thanks to Ahmed Bougacha for finding a reasonable way to avoid the layering issue and providing the version of this patch without that problem.

The problem with the previous commit appears to have been that TableGen was including CodeGen/LowLevelType.h instead of Support/LowLevelTypeImpl.h.

Reviewers: t.p.northover, qcolombet, rovka, aditya_nandakumar, ab, javed.absar

Subscribers: arsenm, nhaehnle, mgorny, dberris, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30046

llvm-svn: 297241

											
										
										
											2017-03-08 07:20:35 +08:00
+								  if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
-												[GlobalISel][IRTranslator] New helper function translateCopy. NFC.

Reviewers: arsenm, volkan, t.p.northover, aditya_nandakumar

Subscribers: wdng, rovka, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78377

											
										
										
											2020-04-17 21:36:01 +08:00
+								      getLLTForType(*U.getType(), *DL))
 								    return translateCopy(U, *U.getOperand(0), MIRBuilder);
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								  return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
-												GlobalISel: add generic casts to IRTranslator

This adds LLVM's 3 main cast instructions (inttoptr, ptrtoint, bitcast) to the
IRTranslator. The first two are direct translations (with 2 MachineInstr types
each). Since LLT discards information, a bitcast might become trivial and we
emit a COPY in those cases instead.

llvm-svn: 276690

											
										
										
											2016-07-26 05:01:29 +08:00
+								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateCast(unsigned Opcode, const User &U,
 								                                 MachineIRBuilder &MIRBuilder) {
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register Op = getOrCreateVReg(*U.getOperand(0));
 								  Register Res = getOrCreateVReg(U);
-												[GISel]:IRTranslator: Prefer a buidInstr form that allows CSE of cast instructions

https://reviews.llvm.org/D60844

Use the style of buildInstr that allows CSEing.

llvm-svn: 358637

											
										
										
											2019-04-18 10:19:29 +08:00
+								  MIRBuilder.buildInstr(Opcode, {Res}, {Op});
-												GlobalISel: add generic casts to IRTranslator

This adds LLVM's 3 main cast instructions (inttoptr, ptrtoint, bitcast) to the
IRTranslator. The first two are direct translations (with 2 MachineInstr types
each). Since LLT discards information, a bitcast might become trivial and we
emit a COPY in those cases instead.

llvm-svn: 276690

											
										
										
											2016-07-26 05:01:29 +08:00
+								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateGetElementPtr(const User &U,
 								                                          MachineIRBuilder &MIRBuilder) {
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
+								  Value &Op0 = *U.getOperand(0);
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register BaseReg = getOrCreateVReg(Op0);
-												[GlobalISel] Avoid translating synthetic constants to new G_CONSTANTS.

Currently, we create a G_CONSTANT for every "synthetic" integer
constant operand (for instance, for the G_GEP offset).
Instead, share the G_CONSTANTs we might have created by going through
the ValueToVReg machinery.

When we're emitting synthetic constants, we do need to get Constants from
the context.  One could argue that we shouldn't modify the context at
all (for instance, this means that we're going to use a tad more memory
if the constant wasn't used elsewhere), but constants are mostly
harmless.  We currently do this for extractvalue and all.

For constant fcmp, this does mean we'll emit an extra COPY, which is not
necessarily more optimal than an extra materialized constant.
But that preserves the current intended design of uniqued G_CONSTANTs,
and the rematerialization problem exists elsewhere and should be
resolved with a single coherent solution.

llvm-svn: 297875

											
										
										
											2017-03-16 03:21:11 +08:00
+								  Type *PtrIRTy = Op0.getType();
 								  LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
 								  Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
 								  LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
-												GlobalISel: Translate vector GEPs

											
										
										
											2020-01-25 11:57:49 +08:00
+								  // Normalize Vector GEP - all scalar operands should be converted to the
 								  // splat vector.
 								  unsigned VectorWidth = 0;
 								  if (auto *VT = dyn_cast<VectorType>(U.getType()))
-												[SVE] Remove calls to VectorType::getNumElements from CodeGen

Reviewers: efriedma, fpetrogalli, sdesmalen, RKSimon, arsenm

Reviewed By: RKSimon

Subscribers: wdng, tschuett, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D82210

											
										
										
											2020-07-10 02:51:03 +08:00
+								    VectorWidth = cast<FixedVectorType>(VT)->getNumElements();
-												GlobalISel: Translate vector GEPs

											
										
										
											2020-01-25 11:57:49 +08:00
-												[GlobalISel][IRTranslator] When translating vector geps, splat the base pointer if required.

We can have geps that have a scalar base pointer, and a vector index value, which
means that the base pointer must be splatted into a vector of pointers.

This fixes crashes on arm64 GlobalISel with optimizations enabled.

											
										
										
											2020-01-31 08:25:20 +08:00
+								  // We might need to splat the base pointer into a vector if the offsets
 								  // are vectors.
 								  if (VectorWidth && !PtrTy.isVector()) {
 								    BaseReg =
 								        MIRBuilder.buildSplatVector(LLT::vector(VectorWidth, PtrTy), BaseReg)
 								            .getReg(0);
-												[SVE] Eliminate calls to default-false VectorType::get() from CodeGen

Reviewers: efriedma, c-rhodes, david-arm, spatel, craig.topper, aqjune, paquette, arsenm, gchatelet

Reviewed By: spatel, gchatelet

Subscribers: wdng, tschuett, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80313

											
										
										
											2020-06-09 01:12:08 +08:00
+								    PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth);
-												[GlobalISel][IRTranslator] When translating vector geps, splat the base pointer if required.

We can have geps that have a scalar base pointer, and a vector index value, which
means that the base pointer must be splatted into a vector of pointers.

This fixes crashes on arm64 GlobalISel with optimizations enabled.

											
										
										
											2020-01-31 08:25:20 +08:00
+								    PtrTy = getLLTForType(*PtrIRTy, *DL);
 								    OffsetIRTy = DL->getIntPtrType(PtrIRTy);
 								    OffsetTy = getLLTForType(*OffsetIRTy, *DL);
 								  }
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
+								  int64_t Offset = 0;
 								  for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U);
 								       GTI != E; ++GTI) {
 								    const Value *Idx = GTI.getOperand();
-												Fix GlobalISel build.

llvm-svn: 288460

											
										
										
											2016-12-02 10:55:30 +08:00
+								    if (StructType *StTy = GTI.getStructTypeOrNull()) {
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
+								      unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
 								      Offset += DL->getStructLayout(StTy)->getElementOffset(Field);
 								      continue;
 								    } else {
 								      uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
 								      // If this is a scalar constant or a splat vector of constants,
 								      // handle it quickly.
 								      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
 								        Offset += ElementSize * CI->getSExtValue();
 								        continue;
 								      }
 								      if (Offset != 0) {
-												[GlobalISel] Enable CSE in the IRTranslator & legalizer for -O0 with constants only.

Other opcodes shouldn't be CSE'd until we can be sure debug info quality won't
be degraded.

This change also improves the IRTranslator so that in most places, but not all,
it creates constants using the MIRBuilder directly instead of first creating a
new destination vreg and then creating a constant. By doing this, the
buildConstant() method can just return the vreg of an existing G_CONSTANT
instead of having to create a COPY from it.

I measured a 0.2% improvement in compile time and a 0.9% improvement in code
size at -O0 ARM64.

Compile time:
Program                                        base   cse    diff
test-suite...ark/tramp3d-v4/tramp3d-v4.test     9.04   9.12  0.8%
test-suite...Mark/mafft/pairlocalalign.test     2.68   2.66 -0.7%
test-suite...-typeset/consumer-typeset.test     5.53   5.51 -0.4%
test-suite :: CTMark/lencod/lencod.test         5.30   5.28 -0.3%
test-suite :: CTMark/Bullet/bullet.test        25.82  25.76 -0.2%
test-suite...:: CTMark/ClamAV/clamscan.test     6.92   6.90 -0.2%
test-suite...TMark/7zip/7zip-benchmark.test    34.24  34.17 -0.2%
test-suite :: CTMark/SPASS/SPASS.test           6.25   6.24 -0.1%
test-suite...:: CTMark/sqlite3/sqlite3.test     1.66   1.66 -0.1%
test-suite :: CTMark/kimwitu++/kc.test         13.61  13.60 -0.0%
Geomean difference                                          -0.2%

Code size:
Program                                        base     cse      diff
test-suite...-typeset/consumer-typeset.test    1315632  1266480 -3.7%
test-suite...:: CTMark/ClamAV/clamscan.test    1313892  1297508 -1.2%
test-suite :: CTMark/lencod/lencod.test        1439504  1423112 -1.1%
test-suite...TMark/7zip/7zip-benchmark.test    2936980  2904172 -1.1%
test-suite :: CTMark/Bullet/bullet.test        3478276  3445460 -0.9%
test-suite...ark/tramp3d-v4/tramp3d-v4.test    8082868  8033492 -0.6%
test-suite :: CTMark/kimwitu++/kc.test         3870380  3853972 -0.4%
test-suite :: CTMark/SPASS/SPASS.test          1434904  1434896 -0.0%
test-suite...Mark/mafft/pairlocalalign.test    764528   764528   0.0%
test-suite...:: CTMark/sqlite3/sqlite3.test    782092   782092   0.0%
Geomean difference                                              -0.9%

Differential Revision: https://reviews.llvm.org/D60580

llvm-svn: 358369

											
										
										
											2019-04-15 13:04:20 +08:00
+								        auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
-												[globalisel] Rename G_GEP to G_PTR_ADD

Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD

Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm

Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69734

											
										
										
											2019-11-02 04:18:00 +08:00
+								        BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0))
 								                      .getReg(0);
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
+								        Offset = 0;
 								      }
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								      Register IdxReg = getOrCreateVReg(*Idx);
-												GlobalISel: Translate vector GEPs

											
										
										
											2020-01-25 11:57:49 +08:00
+								      LLT IdxTy = MRI->getType(IdxReg);
 								      if (IdxTy != OffsetTy) {
 								        if (!IdxTy.isVector() && VectorWidth) {
 								          IdxReg = MIRBuilder.buildSplatVector(
 								            OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0);
 								        }
-												[GlobalISel] CSEMIRBuilder: Add support for G_GEP

Summary:
This patch adds G_GEP to `shouldCSEOpc` so that it can be CSEd. It also refactors
`translateGetElementPtr` by replacing `createGenericVirtualRegister` calls with types.

Reviewers: aditya_nandakumar, arsenm, dsanders, paquette, aemerson

Reviewed By: aditya_nandakumar

Subscribers: wdng, rovka, javed.absar, hiraditya, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D66316

llvm-svn: 369070

											
										
										
											2019-08-16 07:45:45 +08:00
+								        IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0);
-												GlobalISel: Translate vector GEPs

											
										
										
											2020-01-25 11:57:49 +08:00
+								      }
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
-												[GISel]: Don't create G_MUL with 1 during translation of GEP

When element size is 1, it's just wasteful to create MUL with 1.
https://reviews.llvm.org/D41738

llvm-svn: 321857

											
										
										
											2018-01-05 10:56:28 +08:00
+								      // N = N + Idx * ElementSize;
 								      // Avoid doing it for ElementSize of 1.
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								      Register GepOffsetReg;
-												[GISel]: Don't create G_MUL with 1 during translation of GEP

When element size is 1, it's just wasteful to create MUL with 1.
https://reviews.llvm.org/D41738

llvm-svn: 321857

											
										
										
											2018-01-05 10:56:28 +08:00
+								      if (ElementSize != 1) {
-												[GlobalISel] Enable CSE in the IRTranslator & legalizer for -O0 with constants only.

Other opcodes shouldn't be CSE'd until we can be sure debug info quality won't
be degraded.

This change also improves the IRTranslator so that in most places, but not all,
it creates constants using the MIRBuilder directly instead of first creating a
new destination vreg and then creating a constant. By doing this, the
buildConstant() method can just return the vreg of an existing G_CONSTANT
instead of having to create a COPY from it.

I measured a 0.2% improvement in compile time and a 0.9% improvement in code
size at -O0 ARM64.

Compile time:
Program                                        base   cse    diff
test-suite...ark/tramp3d-v4/tramp3d-v4.test     9.04   9.12  0.8%
test-suite...Mark/mafft/pairlocalalign.test     2.68   2.66 -0.7%
test-suite...-typeset/consumer-typeset.test     5.53   5.51 -0.4%
test-suite :: CTMark/lencod/lencod.test         5.30   5.28 -0.3%
test-suite :: CTMark/Bullet/bullet.test        25.82  25.76 -0.2%
test-suite...:: CTMark/ClamAV/clamscan.test     6.92   6.90 -0.2%
test-suite...TMark/7zip/7zip-benchmark.test    34.24  34.17 -0.2%
test-suite :: CTMark/SPASS/SPASS.test           6.25   6.24 -0.1%
test-suite...:: CTMark/sqlite3/sqlite3.test     1.66   1.66 -0.1%
test-suite :: CTMark/kimwitu++/kc.test         13.61  13.60 -0.0%
Geomean difference                                          -0.2%

Code size:
Program                                        base     cse      diff
test-suite...-typeset/consumer-typeset.test    1315632  1266480 -3.7%
test-suite...:: CTMark/ClamAV/clamscan.test    1313892  1297508 -1.2%
test-suite :: CTMark/lencod/lencod.test        1439504  1423112 -1.1%
test-suite...TMark/7zip/7zip-benchmark.test    2936980  2904172 -1.1%
test-suite :: CTMark/Bullet/bullet.test        3478276  3445460 -0.9%
test-suite...ark/tramp3d-v4/tramp3d-v4.test    8082868  8033492 -0.6%
test-suite :: CTMark/kimwitu++/kc.test         3870380  3853972 -0.4%
test-suite :: CTMark/SPASS/SPASS.test          1434904  1434896 -0.0%
test-suite...Mark/mafft/pairlocalalign.test    764528   764528   0.0%
test-suite...:: CTMark/sqlite3/sqlite3.test    782092   782092   0.0%
Geomean difference                                              -0.9%

Differential Revision: https://reviews.llvm.org/D60580

llvm-svn: 358369

											
										
										
											2019-04-15 13:04:20 +08:00
+								        auto ElementSizeMIB = MIRBuilder.buildConstant(
 								            getLLTForType(*OffsetIRTy, *DL), ElementSize);
-												[GlobalISel] CSEMIRBuilder: Add support for G_GEP

Summary:
This patch adds G_GEP to `shouldCSEOpc` so that it can be CSEd. It also refactors
`translateGetElementPtr` by replacing `createGenericVirtualRegister` calls with types.

Reviewers: aditya_nandakumar, arsenm, dsanders, paquette, aemerson

Reviewed By: aditya_nandakumar

Subscribers: wdng, rovka, javed.absar, hiraditya, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D66316

llvm-svn: 369070

											
										
										
											2019-08-16 07:45:45 +08:00
+								        GepOffsetReg =
-												[GlobalISel][IRTranslator] Follow convention and put constant offset of getelementptr arithmetic on RHS.

We were needlessly putting known constant values on the LHS of a G_MUL, which
is suboptimal.

Differential Revision: https://reviews.llvm.org/D73650

											
										
										
											2020-01-30 01:34:33 +08:00
+								            MIRBuilder.buildMul(OffsetTy, IdxReg, ElementSizeMIB).getReg(0);
-												[GISel]: Don't create G_MUL with 1 during translation of GEP

When element size is 1, it's just wasteful to create MUL with 1.
https://reviews.llvm.org/D41738

llvm-svn: 321857

											
										
										
											2018-01-05 10:56:28 +08:00
+								      } else
 								        GepOffsetReg = IdxReg;
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
-												[globalisel] Rename G_GEP to G_PTR_ADD

Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD

Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm

Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69734

											
										
										
											2019-11-02 04:18:00 +08:00
+								      BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg).getReg(0);
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
+								    }
 								  }
 								  if (Offset != 0) {
-												[GlobalISel] Enable CSE in the IRTranslator & legalizer for -O0 with constants only.

Other opcodes shouldn't be CSE'd until we can be sure debug info quality won't
be degraded.

This change also improves the IRTranslator so that in most places, but not all,
it creates constants using the MIRBuilder directly instead of first creating a
new destination vreg and then creating a constant. By doing this, the
buildConstant() method can just return the vreg of an existing G_CONSTANT
instead of having to create a COPY from it.

I measured a 0.2% improvement in compile time and a 0.9% improvement in code
size at -O0 ARM64.

Compile time:
Program                                        base   cse    diff
test-suite...ark/tramp3d-v4/tramp3d-v4.test     9.04   9.12  0.8%
test-suite...Mark/mafft/pairlocalalign.test     2.68   2.66 -0.7%
test-suite...-typeset/consumer-typeset.test     5.53   5.51 -0.4%
test-suite :: CTMark/lencod/lencod.test         5.30   5.28 -0.3%
test-suite :: CTMark/Bullet/bullet.test        25.82  25.76 -0.2%
test-suite...:: CTMark/ClamAV/clamscan.test     6.92   6.90 -0.2%
test-suite...TMark/7zip/7zip-benchmark.test    34.24  34.17 -0.2%
test-suite :: CTMark/SPASS/SPASS.test           6.25   6.24 -0.1%
test-suite...:: CTMark/sqlite3/sqlite3.test     1.66   1.66 -0.1%
test-suite :: CTMark/kimwitu++/kc.test         13.61  13.60 -0.0%
Geomean difference                                          -0.2%

Code size:
Program                                        base     cse      diff
test-suite...-typeset/consumer-typeset.test    1315632  1266480 -3.7%
test-suite...:: CTMark/ClamAV/clamscan.test    1313892  1297508 -1.2%
test-suite :: CTMark/lencod/lencod.test        1439504  1423112 -1.1%
test-suite...TMark/7zip/7zip-benchmark.test    2936980  2904172 -1.1%
test-suite :: CTMark/Bullet/bullet.test        3478276  3445460 -0.9%
test-suite...ark/tramp3d-v4/tramp3d-v4.test    8082868  8033492 -0.6%
test-suite :: CTMark/kimwitu++/kc.test         3870380  3853972 -0.4%
test-suite :: CTMark/SPASS/SPASS.test          1434904  1434896 -0.0%
test-suite...Mark/mafft/pairlocalalign.test    764528   764528   0.0%
test-suite...:: CTMark/sqlite3/sqlite3.test    782092   782092   0.0%
Geomean difference                                              -0.9%

Differential Revision: https://reviews.llvm.org/D60580

llvm-svn: 358369

											
										
										
											2019-04-15 13:04:20 +08:00
+								    auto OffsetMIB =
-												GlobalISel: Translate vector GEPs

											
										
										
											2020-01-25 11:57:49 +08:00
+								        MIRBuilder.buildConstant(OffsetTy, Offset);
-												[globalisel] Rename G_GEP to G_PTR_ADD

Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD

Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm

Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69734

											
										
										
											2019-11-02 04:18:00 +08:00
+								    MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
+								    return true;
 								  }
 								  MIRBuilder.buildCopy(getOrCreateVReg(U), BaseReg);
 								  return true;
 								}
-												[GlobalISel] Translate calls to memcpy et al to G_INTRINSIC_W_SIDE_EFFECTs and legalize later.

I plan on adding memcpy optimizations in the GlobalISel pipeline, but we can't
do that unless we delay lowering to actual function calls. This patch changes
the translator to generate G_INTRINSIC_W_SIDE_EFFECTS for these functions, and
then have each target specify that using the new custom legalizer for intrinsics
hook that they want it expanded it a libcall.

Differential Revision: https://reviews.llvm.org/D64895

llvm-svn: 366516

											
										
										
											2019-07-19 08:24:45 +08:00
+								bool IRTranslator::translateMemFunc(const CallInst &CI,
-												GlobalISel: translate memset & memmove.

llvm-svn: 293541

											
										
										
											2017-01-31 03:33:07 +08:00
+								                                    MachineIRBuilder &MIRBuilder,
-												GlobalISel: Add generic instructions for memory intrinsics

AArch64, X86 and Mips currently directly consumes these and custom
lowering to produce a libcall, but really these should follow the
normal legalization process through the libcall/lower action.

											
										
										
											2020-08-03 21:00:24 +08:00
+								                                    unsigned Opcode) {
-												[GlobalISel] Translate memset/memmove/memcpy from undef ptrs into nops

If the source is undef, then just don't do anything.

This matches SelectionDAG's behaviour in SelectionDAG.cpp.

Also add a test showing that we do the right thing here.
(irtranslator-memfunc-undef.ll)

Differential Revision: https://reviews.llvm.org/D63095

llvm-svn: 362989

											
										
										
											2019-06-11 05:53:56 +08:00
 								  // If the source is undef, then just emit a nop.
-												[GlobalISel] Translate calls to memcpy et al to G_INTRINSIC_W_SIDE_EFFECTs and legalize later.

I plan on adding memcpy optimizations in the GlobalISel pipeline, but we can't
do that unless we delay lowering to actual function calls. This patch changes
the translator to generate G_INTRINSIC_W_SIDE_EFFECTS for these functions, and
then have each target specify that using the new custom legalizer for intrinsics
hook that they want it expanded it a libcall.

Differential Revision: https://reviews.llvm.org/D64895

llvm-svn: 366516

											
										
										
											2019-07-19 08:24:45 +08:00
+								  if (isa<UndefValue>(CI.getArgOperand(1)))
 								    return true;
-												GlobalISel: translate memcpy intrinsics.

llvm-svn: 284525

											
										
										
											2016-10-19 04:03:45 +08:00
-												GlobalISel: IRTranslate minimum of pointer sizes on memcpy

I forgot to squash this with 0b7f6cc71a72a85f8a0cbee836a7a8e31876951a

											
										
										
											2020-08-17 10:17:12 +08:00
+								  SmallVector<Register, 3> SrcRegs;
 								  unsigned MinPtrSize = UINT_MAX;
 								  for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) {
 								    Register SrcReg = getOrCreateVReg(**AI);
 								    LLT SrcTy = MRI->getType(SrcReg);
 								    if (SrcTy.isPointer())
 								      MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize);
 								    SrcRegs.push_back(SrcReg);
 								  }
 								  LLT SizeTy = LLT::scalar(MinPtrSize);
 								  // The size operand should be the minimum of the pointer sizes.
 								  Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1];
 								  if (MRI->getType(SizeOpReg) != SizeTy)
 								    SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0);
-												GlobalISel: Add generic instructions for memory intrinsics

AArch64, X86 and Mips currently directly consumes these and custom
lowering to produce a libcall, but really these should follow the
normal legalization process through the libcall/lower action.

											
										
										
											2020-08-03 21:00:24 +08:00
+								  auto ICall = MIRBuilder.buildInstr(Opcode);
-												GlobalISel: IRTranslate minimum of pointer sizes on memcpy

I forgot to squash this with 0b7f6cc71a72a85f8a0cbee836a7a8e31876951a

											
										
										
											2020-08-17 10:17:12 +08:00
+								  for (Register SrcReg : SrcRegs)
 								    ICall.addUse(SrcReg);
-												[GlobalISel] Translate calls to memcpy et al to G_INTRINSIC_W_SIDE_EFFECTs and legalize later.

I plan on adding memcpy optimizations in the GlobalISel pipeline, but we can't
do that unless we delay lowering to actual function calls. This patch changes
the translator to generate G_INTRINSIC_W_SIDE_EFFECTS for these functions, and
then have each target specify that using the new custom legalizer for intrinsics
hook that they want it expanded it a libcall.

Differential Revision: https://reviews.llvm.org/D64895

llvm-svn: 366516

											
										
										
											2019-07-19 08:24:45 +08:00
-												[Alignment][NFC] Transitionning more getMachineMemOperand call sites

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77127

											
										
										
											2020-03-31 17:43:50 +08:00
+								  Align DstAlign;
 								  Align SrcAlign;
-												[GlobalISel] Translate calls to memcpy et al to G_INTRINSIC_W_SIDE_EFFECTs and legalize later.

I plan on adding memcpy optimizations in the GlobalISel pipeline, but we can't
do that unless we delay lowering to actual function calls. This patch changes
the translator to generate G_INTRINSIC_W_SIDE_EFFECTS for these functions, and
then have each target specify that using the new custom legalizer for intrinsics
hook that they want it expanded it a libcall.

Differential Revision: https://reviews.llvm.org/D64895

llvm-svn: 366516

											
										
										
											2019-07-19 08:24:45 +08:00
+								  unsigned IsVol =
 								      cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1))
 								          ->getZExtValue();
 								  if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
-												[Alignment][NFC] Transitionning more getMachineMemOperand call sites

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77127

											
										
										
											2020-03-31 17:43:50 +08:00
+								    DstAlign = MCI->getDestAlign().valueOrOne();
 								    SrcAlign = MCI->getSourceAlign().valueOrOne();
-												[GlobalISel] Translate calls to memcpy et al to G_INTRINSIC_W_SIDE_EFFECTs and legalize later.

I plan on adding memcpy optimizations in the GlobalISel pipeline, but we can't
do that unless we delay lowering to actual function calls. This patch changes
the translator to generate G_INTRINSIC_W_SIDE_EFFECTS for these functions, and
then have each target specify that using the new custom legalizer for intrinsics
hook that they want it expanded it a libcall.

Differential Revision: https://reviews.llvm.org/D64895

llvm-svn: 366516

											
										
										
											2019-07-19 08:24:45 +08:00
+								  } else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
-												[Alignment][NFC] Transitionning more getMachineMemOperand call sites

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77127

											
										
										
											2020-03-31 17:43:50 +08:00
+								    DstAlign = MMI->getDestAlign().valueOrOne();
 								    SrcAlign = MMI->getSourceAlign().valueOrOne();
-												[GlobalISel] Translate calls to memcpy et al to G_INTRINSIC_W_SIDE_EFFECTs and legalize later.

I plan on adding memcpy optimizations in the GlobalISel pipeline, but we can't
do that unless we delay lowering to actual function calls. This patch changes
the translator to generate G_INTRINSIC_W_SIDE_EFFECTS for these functions, and
then have each target specify that using the new custom legalizer for intrinsics
hook that they want it expanded it a libcall.

Differential Revision: https://reviews.llvm.org/D64895

llvm-svn: 366516

											
										
										
											2019-07-19 08:24:45 +08:00
+								  } else {
 								    auto *MSI = cast<MemSetInst>(&CI);
-												[Alignment][NFC] Transitionning more getMachineMemOperand call sites

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77127

											
										
										
											2020-03-31 17:43:50 +08:00
+								    DstAlign = MSI->getDestAlign().valueOrOne();
-												GlobalISel: translate memcpy intrinsics.

llvm-svn: 284525

											
										
										
											2016-10-19 04:03:45 +08:00
+								  }
-												Add an operand to memory intrinsics to denote the "tail" marker.

We need to propagate this information from the IR in order to be able to safely
do tail call optimizations on the intrinsics during legalization. Assuming
it's safe to do tail call opt without checking for the marker isn't safe because
the mem libcall may use allocas from the caller.

This adds an extra immediate operand to the end of the intrinsics and fixes the
legalizer to handle it.

Differential Revision: https://reviews.llvm.org/D68151

llvm-svn: 373140

											
										
										
											2019-09-28 13:33:21 +08:00
+								  // We need to propagate the tail call flag from the IR inst as an argument.
 								  // Otherwise, we have to pessimize and assume later that we cannot tail call
 								  // any memory intrinsics.
 								  ICall.addImm(CI.isTailCall() ? 1 : 0);
-												[GlobalISel] Translate calls to memcpy et al to G_INTRINSIC_W_SIDE_EFFECTs and legalize later.

I plan on adding memcpy optimizations in the GlobalISel pipeline, but we can't
do that unless we delay lowering to actual function calls. This patch changes
the translator to generate G_INTRINSIC_W_SIDE_EFFECTS for these functions, and
then have each target specify that using the new custom legalizer for intrinsics
hook that they want it expanded it a libcall.

Differential Revision: https://reviews.llvm.org/D64895

llvm-svn: 366516

											
										
										
											2019-07-19 08:24:45 +08:00
+								  // Create mem operands to store the alignment and volatile info.
 								  auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
 								  ICall.addMemOperand(MF->getMachineMemOperand(
 								      MachinePointerInfo(CI.getArgOperand(0)),
 								      MachineMemOperand::MOStore | VolFlag, 1, DstAlign));
-												GlobalISel: Add generic instructions for memory intrinsics

AArch64, X86 and Mips currently directly consumes these and custom
lowering to produce a libcall, but really these should follow the
normal legalization process through the libcall/lower action.

											
										
										
											2020-08-03 21:00:24 +08:00
+								  if (Opcode != TargetOpcode::G_MEMSET)
-												[GlobalISel] Translate calls to memcpy et al to G_INTRINSIC_W_SIDE_EFFECTs and legalize later.

I plan on adding memcpy optimizations in the GlobalISel pipeline, but we can't
do that unless we delay lowering to actual function calls. This patch changes
the translator to generate G_INTRINSIC_W_SIDE_EFFECTS for these functions, and
then have each target specify that using the new custom legalizer for intrinsics
hook that they want it expanded it a libcall.

Differential Revision: https://reviews.llvm.org/D64895

llvm-svn: 366516

											
										
										
											2019-07-19 08:24:45 +08:00
+								    ICall.addMemOperand(MF->getMachineMemOperand(
 								        MachinePointerInfo(CI.getArgOperand(1)),
 								        MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign));
-												GlobalISel: translate memcpy intrinsics.

llvm-svn: 284525

											
										
										
											2016-10-19 04:03:45 +08:00
-												[GlobalISel] Translate calls to memcpy et al to G_INTRINSIC_W_SIDE_EFFECTs and legalize later.

I plan on adding memcpy optimizations in the GlobalISel pipeline, but we can't
do that unless we delay lowering to actual function calls. This patch changes
the translator to generate G_INTRINSIC_W_SIDE_EFFECTS for these functions, and
then have each target specify that using the new custom legalizer for intrinsics
hook that they want it expanded it a libcall.

Differential Revision: https://reviews.llvm.org/D64895

llvm-svn: 366516

											
										
										
											2019-07-19 08:24:45 +08:00
+								  return true;
-												GlobalISel: translate memcpy intrinsics.

llvm-svn: 284525

											
										
										
											2016-10-19 04:03:45 +08:00
+								}
-												GlobalISel: translate GEP instructions.

Unlike SDag, we use a separate G_GEP instruction (much simplified, only taking
a single byte offset) to preserve the pointer type information through
selection.

llvm-svn: 281205

											
										
										
											2016-09-12 19:20:22 +08:00
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								void IRTranslator::getStackGuard(Register DstReg,
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								                                 MachineIRBuilder &MIRBuilder) {
-												GlobalISel: set correct regclass for LOAD_STACK_GUARD.

Since it's not actually a generic MI, its register operands need a RegClass,
which is conveniently the target's pointer RegClass.

llvm-svn: 293335

											
										
										
											2017-01-28 05:31:24 +08:00
+								  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
 								  MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
-												[GlobalISel] Use more MachineIRBuilder helper methods

Reviewers: arsenm, nhaehnle

Subscribers: wdng, rovka, hiraditya, volkan, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D72833

											
										
										
											2020-01-16 20:09:48 +08:00
+								  auto MIB =
 								      MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, {DstReg}, {});
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  auto &TLI = *MF->getSubtarget().getTargetLowering();
-												MachineFunction: Return reference from getFunction(); NFC

The Function can never be nullptr so we can return a reference.

llvm-svn: 320884

											
										
										
											2017-12-16 06:22:58 +08:00
+								  Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent());
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								  if (!Global)
 								    return;
 								  MachinePointerInfo MPInfo(Global);
 								  auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
 								               MachineMemOperand::MODereferenceable;
-												[MI] Change the array of `MachineMemOperand` pointers to be
a generically extensible collection of extra info attached to
a `MachineInstr`.

The primary change here is cleaning up the APIs used for setting and
manipulating the `MachineMemOperand` pointer arrays so chat we can
change how they are allocated.

Then we introduce an extra info object that using the trailing object
pattern to attach some number of MMOs but also other extra info. The
design of this is specifically so that this extra info has a fixed
necessary cost (the header tracking what extra info is included) and
everything else can be tail allocated. This pattern works especially
well with a `BumpPtrAllocator` which we use here.

I've also added the basic scaffolding for putting interesting pointers
into this, namely pre- and post-instruction symbols. These aren't used
anywhere yet, they're just there to ensure I've actually gotten the data
structure types correct. I'll flesh out support for these in
a subsequent patch (MIR dumping, parsing, the works).

Finally, I've included an optimization where we store any single pointer
inline in the `MachineInstr` to avoid the allocation overhead. This is
expected to be the overwhelmingly most common case and so should avoid
any memory usage growth due to slightly less clever / dense allocation
when dealing with >1 MMO. This did require several ergonomic
improvements to the `PointerSumType` to reasonably support the various
usage models.

This also has a side effect of freeing up 8 bits within the
`MachineInstr` which could be repurposed for something else.

The suggested direction here came largely from Hal Finkel. I hope it was
worth it. ;] It does hopefully clear a path for subsequent extensions
w/o nearly as much leg work. Lots of thanks to Reid and Justin for
careful reviews and ideas about how to do all of this.

Differential Revision: https://reviews.llvm.org/D50701

llvm-svn: 339940

											
										
										
											2018-08-17 05:30:05 +08:00
+								  MachineMemOperand *MemRef =
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								      MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
-												[Alignment][NFC] Use Align version of getMachineMemOperand

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: jyknight, sdardis, nemanjai, hiraditya, kbarton, fedor.sergeev, asb, rbar, johnrusso, simoncook, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, jfb, PkmX, jocewei, Jim, lenary, s.egerton, pzheng, sameer.abuasal, apazos, luismarques, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77059

											
										
										
											2020-03-30 22:45:57 +08:00
+								                               DL->getPointerABIAlignment(0));
-												[MI] Change the array of `MachineMemOperand` pointers to be
a generically extensible collection of extra info attached to
a `MachineInstr`.

The primary change here is cleaning up the APIs used for setting and
manipulating the `MachineMemOperand` pointer arrays so chat we can
change how they are allocated.

Then we introduce an extra info object that using the trailing object
pattern to attach some number of MMOs but also other extra info. The
design of this is specifically so that this extra info has a fixed
necessary cost (the header tracking what extra info is included) and
everything else can be tail allocated. This pattern works especially
well with a `BumpPtrAllocator` which we use here.

I've also added the basic scaffolding for putting interesting pointers
into this, namely pre- and post-instruction symbols. These aren't used
anywhere yet, they're just there to ensure I've actually gotten the data
structure types correct. I'll flesh out support for these in
a subsequent patch (MIR dumping, parsing, the works).

Finally, I've included an optimization where we store any single pointer
inline in the `MachineInstr` to avoid the allocation overhead. This is
expected to be the overwhelmingly most common case and so should avoid
any memory usage growth due to slightly less clever / dense allocation
when dealing with >1 MMO. This did require several ergonomic
improvements to the `PointerSumType` to reasonably support the various
usage models.

This also has a side effect of freeing up 8 bits within the
`MachineInstr` which could be repurposed for something else.

The suggested direction here came largely from Hal Finkel. I hope it was
worth it. ;] It does hopefully clear a path for subsequent extensions
w/o nearly as much leg work. Lots of thanks to Reid and Justin for
careful reviews and ideas about how to do all of this.

Differential Revision: https://reviews.llvm.org/D50701

llvm-svn: 339940

											
										
										
											2018-08-17 05:30:05 +08:00
+								  MIB.setMemRefs({MemRef});
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								}
-												GlobalISel: factor overflow handling into separate function. NFC.

llvm-svn: 289149

											
										
										
											2016-12-09 06:44:00 +08:00
+								bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
 								                                              MachineIRBuilder &MIRBuilder) {
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								  ArrayRef<Register> ResRegs = getOrCreateVRegs(CI);
-												[GlobalISel] Use more MachineIRBuilder helper methods

Reviewers: arsenm, nhaehnle

Subscribers: wdng, rovka, hiraditya, volkan, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D72833

											
										
										
											2020-01-16 20:09:48 +08:00
+								  MIRBuilder.buildInstr(
 								      Op, {ResRegs[0], ResRegs[1]},
 								      {getOrCreateVReg(*CI.getOperand(0)), getOrCreateVReg(*CI.getOperand(1))});
-												GlobalISel: factor overflow handling into separate function. NFC.

llvm-svn: 289149

											
										
										
											2016-12-09 06:44:00 +08:00
 								  return true;
 								}
-												GlobalISel: Define mulfix/divfix opcodes

The full expansion involves the funnel shifts, which depend on another
patch to expand those.

											
										
										
											2020-07-20 01:09:48 +08:00
+								bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
 								                                                MachineIRBuilder &MIRBuilder) {
 								  Register Dst = getOrCreateVReg(CI);
 								  Register Src0 = getOrCreateVReg(*CI.getOperand(0));
 								  Register Src1 = getOrCreateVReg(*CI.getOperand(1));
 								  uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue();
 								  MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale });
 								  return true;
 								}
-												[GlobalISel][NFC] Gardening: Make translateSimpleUnaryIntrinsic general

Instead of only having this code work for unary intrinsics, have it work for
an arbitrary number of parameters.

Factor out the cases that fall under this (fma, pow).

This makes it a bit easier to add more intrinsics which don't require any
special work.

Differential Revision: https://reviews.llvm.org/D58079

llvm-svn: 353863

											
										
										
											2019-02-13 01:38:34 +08:00
+								unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
-												[GlobalISel][NFC] Gardening: Factor out code for simple unary intrinsics

There was a lot of repeated code wrt unary math intrinsics in
translateKnownIntrinsic. This factors out the repeated MIRBuilder code into
two functions: translateSimpleUnaryIntrinsic and getSimpleUnaryIntrinsicOpcode.

This simplifies adding simple unary intrinsics, since after this, all you have
to do is add the mapping to SimpleUnaryIntrinsicOpcodes.

Differential Revision: https://reviews.llvm.org/D57774

llvm-svn: 353316

											
										
										
											2019-02-07 01:25:54 +08:00
+								  switch (ID) {
 								    default:
 								      break;
-												[GlobalISel][AArch64] Select llvm.bswap* for non-vector types

This teaches the IRTranslator to emit G_BSWAP when it runs into
Intrinsic::bswap. This allows us to select G_BSWAP for non-vector types in
AArch64.

Add a select-bswap.mir test, and add global isel checks to a couple existing
tests in test/CodeGen/AArch64.

This doesn't handle every bswap case, since some of these rely on known bits
stuff. This just lets us handle the naive case.

Differential Revision: https://reviews.llvm.org/D58081

llvm-svn: 353861

											
										
										
											2019-02-13 01:28:17 +08:00
+								    case Intrinsic::bswap:
 								      return TargetOpcode::G_BSWAP;
-												GlobalISel: Translate llvm.fshl/llvm.fshr

											
										
										
											2020-03-20 08:46:08 +08:00
+								    case Intrinsic::bitreverse:
-												GlobalISel: Add G_BITREVERSE

This is the first failing pattern for AMDGPU and is trivial to handle.

llvm-svn: 370927

											
										
										
											2019-09-05 01:06:53 +08:00
+								      return TargetOpcode::G_BITREVERSE;
-												GlobalISel: Translate llvm.fshl/llvm.fshr

											
										
										
											2020-03-20 08:46:08 +08:00
+								    case Intrinsic::fshl:
 								      return TargetOpcode::G_FSHL;
 								    case Intrinsic::fshr:
 								      return TargetOpcode::G_FSHR;
-												[GlobalISel][NFC] Gardening: Factor out code for simple unary intrinsics

There was a lot of repeated code wrt unary math intrinsics in
translateKnownIntrinsic. This factors out the repeated MIRBuilder code into
two functions: translateSimpleUnaryIntrinsic and getSimpleUnaryIntrinsicOpcode.

This simplifies adding simple unary intrinsics, since after this, all you have
to do is add the mapping to SimpleUnaryIntrinsicOpcodes.

Differential Revision: https://reviews.llvm.org/D57774

llvm-svn: 353316

											
										
										
											2019-02-07 01:25:54 +08:00
+								    case Intrinsic::ceil:
 								      return TargetOpcode::G_FCEIL;
 								    case Intrinsic::cos:
 								      return TargetOpcode::G_FCOS;
 								    case Intrinsic::ctpop:
 								      return TargetOpcode::G_CTPOP;
 								    case Intrinsic::exp:
 								      return TargetOpcode::G_FEXP;
 								    case Intrinsic::exp2:
 								      return TargetOpcode::G_FEXP2;
 								    case Intrinsic::fabs:
 								      return TargetOpcode::G_FABS;
-												GlobalISel: Add G_FCOPYSIGN

llvm-svn: 360850

											
										
										
											2019-05-16 12:08:39 +08:00
+								    case Intrinsic::copysign:
 								      return TargetOpcode::G_FCOPYSIGN;
-												GlobalISel: Define the full family of FP min/max instructions

llvm-svn: 365657

											
										
										
											2019-07-11 00:31:15 +08:00
+								    case Intrinsic::minnum:
 								      return TargetOpcode::G_FMINNUM;
 								    case Intrinsic::maxnum:
 								      return TargetOpcode::G_FMAXNUM;
 								    case Intrinsic::minimum:
 								      return TargetOpcode::G_FMINIMUM;
 								    case Intrinsic::maximum:
 								      return TargetOpcode::G_FMAXIMUM;
-												GlobalISel: Add G_FCANONICALIZE instruction

llvm-svn: 353719

											
										
										
											2019-02-12 01:05:20 +08:00
+								    case Intrinsic::canonicalize:
 								      return TargetOpcode::G_FCANONICALIZE;
-												Recommit "[GlobalISel] Add IRTranslator support for G_FFLOOR"

After the changes introduced in r353586, this instruction doesn't cause any
issues for any backend.

Original review: https://reviews.llvm.org/D57485

llvm-svn: 353720

											
										
										
											2019-02-12 01:16:32 +08:00
+								    case Intrinsic::floor:
 								      return TargetOpcode::G_FFLOOR;
-												[GlobalISel][NFC] Gardening: Make translateSimpleUnaryIntrinsic general

Instead of only having this code work for unary intrinsics, have it work for
an arbitrary number of parameters.

Factor out the cases that fall under this (fma, pow).

This makes it a bit easier to add more intrinsics which don't require any
special work.

Differential Revision: https://reviews.llvm.org/D58079

llvm-svn: 353863

											
										
										
											2019-02-13 01:38:34 +08:00
+								    case Intrinsic::fma:
 								      return TargetOpcode::G_FMA;
-												[GlobalISel][NFC] Gardening: Factor out code for simple unary intrinsics

There was a lot of repeated code wrt unary math intrinsics in
translateKnownIntrinsic. This factors out the repeated MIRBuilder code into
two functions: translateSimpleUnaryIntrinsic and getSimpleUnaryIntrinsicOpcode.

This simplifies adding simple unary intrinsics, since after this, all you have
to do is add the mapping to SimpleUnaryIntrinsicOpcodes.

Differential Revision: https://reviews.llvm.org/D57774

llvm-svn: 353316

											
										
										
											2019-02-07 01:25:54 +08:00
+								    case Intrinsic::log:
 								      return TargetOpcode::G_FLOG;
 								    case Intrinsic::log2:
 								      return TargetOpcode::G_FLOG2;
 								    case Intrinsic::log10:
 								      return TargetOpcode::G_FLOG10;
-												[GlobalISel] Add IRTranslator support for G_FNEARBYINT

Translate llvm.nearbyint into G_FNEARBYINT as a simple intrinsic. Update
arm64-irtranslator.ll.

Differential Revision: https://reviews.llvm.org/D60922

llvm-svn: 359203

											
										
										
											2019-04-26 00:39:28 +08:00
+								    case Intrinsic::nearbyint:
 								      return TargetOpcode::G_FNEARBYINT;
-												[GlobalISel][NFC] Gardening: Make translateSimpleUnaryIntrinsic general

Instead of only having this code work for unary intrinsics, have it work for
an arbitrary number of parameters.

Factor out the cases that fall under this (fma, pow).

This makes it a bit easier to add more intrinsics which don't require any
special work.

Differential Revision: https://reviews.llvm.org/D58079

llvm-svn: 353863

											
										
										
											2019-02-13 01:38:34 +08:00
+								    case Intrinsic::pow:
 								      return TargetOpcode::G_FPOW;
-												GlobalISel: Translate llvm.powi intrinsic

There are a few questionable things about this intrinsic and existing
DAG implementation. For some reason the intrinsic hardcodes the second
operand to be scalar-only i32, and SelectionDAG builder makes a
legalization decision based on whether the operand is constant.

											
										
										
											2020-07-17 22:26:23 +08:00
+								    case Intrinsic::powi:
 								      return TargetOpcode::G_FPOWI;
-												[GlobalISel] Add IRTranslator support for G_FRINT

Add it as a simple intrinsic, update arm64-irtranslator.ll.

Differential Revision: https://reviews.llvm.org/D60893

llvm-svn: 358787

											
										
										
											2019-04-20 05:46:12 +08:00
+								    case Intrinsic::rint:
 								      return TargetOpcode::G_FRINT;
-												[GlobalISel][NFC] Gardening: Factor out code for simple unary intrinsics

There was a lot of repeated code wrt unary math intrinsics in
translateKnownIntrinsic. This factors out the repeated MIRBuilder code into
two functions: translateSimpleUnaryIntrinsic and getSimpleUnaryIntrinsicOpcode.

This simplifies adding simple unary intrinsics, since after this, all you have
to do is add the mapping to SimpleUnaryIntrinsicOpcodes.

Differential Revision: https://reviews.llvm.org/D57774

llvm-svn: 353316

											
										
										
											2019-02-07 01:25:54 +08:00
+								    case Intrinsic::round:
 								      return TargetOpcode::G_INTRINSIC_ROUND;
-												GlobalISel: Handle llvm.roundeven

I still think it's highly questionable that we have two intrinsics
with identical behavior and only vary by the name of the libcall used
if it happens to be lowered that way, but try to reduce the feature
delta between SDAG and GlobalISel for recently added intrinsics. I'm
not sure which opcode should be considered the canonical one, but
lower roundeven back to round.

											
										
										
											2020-07-19 21:56:15 +08:00
+								    case Intrinsic::roundeven:
 								      return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
-												[GlobalISel][NFC] Gardening: Factor out code for simple unary intrinsics

There was a lot of repeated code wrt unary math intrinsics in
translateKnownIntrinsic. This factors out the repeated MIRBuilder code into
two functions: translateSimpleUnaryIntrinsic and getSimpleUnaryIntrinsicOpcode.

This simplifies adding simple unary intrinsics, since after this, all you have
to do is add the mapping to SimpleUnaryIntrinsicOpcodes.

Differential Revision: https://reviews.llvm.org/D57774

llvm-svn: 353316

											
										
										
											2019-02-07 01:25:54 +08:00
+								    case Intrinsic::sin:
 								      return TargetOpcode::G_FSIN;
 								    case Intrinsic::sqrt:
 								      return TargetOpcode::G_FSQRT;
 								    case Intrinsic::trunc:
 								      return TargetOpcode::G_INTRINSIC_TRUNC;
-												GlobalISel: Define G_READCYCLECOUNTER

											
										
										
											2020-01-05 01:46:58 +08:00
+								    case Intrinsic::readcyclecounter:
 								      return TargetOpcode::G_READCYCLECOUNTER;
-												GlobalISel: Merge G_PTR_MASK with llvm.ptrmask intrinsic

Confusingly, these were unrelated and had different semantics. The
G_PTR_MASK instruction predates the llvm.ptrmask intrinsic, but has a
different format. G_PTR_MASK only allows clearing the low bits of a
pointer, and only a constant number of bits. The ptrmask intrinsic
allows an arbitrary mask. Replace G_PTR_MASK to match the intrinsic.

Only selects the cases that look like the old instruction. More work
is needed to select the general case. Also new legalization code is
still needed to deal with the case where the incoming mask size does
not match the pointer size, which has a specified behavior in the
langref.

											
										
										
											2020-05-16 06:33:01 +08:00
+								    case Intrinsic::ptrmask:
 								      return TargetOpcode::G_PTRMASK;
-												[GlobalISel] Add G_INTRINSIC_LRINT and translate from llvm.lrint

Differential Revision: https://reviews.llvm.org/D84551

											
										
										
											2020-07-25 04:00:12 +08:00
+								    case Intrinsic::lrint:
 								      return TargetOpcode::G_INTRINSIC_LRINT;
-												[GlobalISel] Add translation support for vector reduction intrinsics.

In order to prevent the ExpandReductions pass from expanding some intrinsics
before they get to codegen, I had to add a -disable-expand-reductions flag
for testing purposes.

Differential Revision: https://reviews.llvm.org/D89028

											
										
										
											2020-10-08 15:17:02 +08:00
+								    // FADD/FMUL require checking the FMF, so are handled elsewhere.
 								    case Intrinsic::vector_reduce_fmin:
 								      return TargetOpcode::G_VECREDUCE_FMIN;
 								    case Intrinsic::vector_reduce_fmax:
 								      return TargetOpcode::G_VECREDUCE_FMAX;
 								    case Intrinsic::vector_reduce_add:
 								      return TargetOpcode::G_VECREDUCE_ADD;
 								    case Intrinsic::vector_reduce_mul:
 								      return TargetOpcode::G_VECREDUCE_MUL;
 								    case Intrinsic::vector_reduce_and:
 								      return TargetOpcode::G_VECREDUCE_AND;
 								    case Intrinsic::vector_reduce_or:
 								      return TargetOpcode::G_VECREDUCE_OR;
 								    case Intrinsic::vector_reduce_xor:
 								      return TargetOpcode::G_VECREDUCE_XOR;
 								    case Intrinsic::vector_reduce_smax:
 								      return TargetOpcode::G_VECREDUCE_SMAX;
 								    case Intrinsic::vector_reduce_smin:
 								      return TargetOpcode::G_VECREDUCE_SMIN;
 								    case Intrinsic::vector_reduce_umax:
 								      return TargetOpcode::G_VECREDUCE_UMAX;
 								    case Intrinsic::vector_reduce_umin:
 								      return TargetOpcode::G_VECREDUCE_UMIN;
-												[GlobalISel][NFC] Gardening: Factor out code for simple unary intrinsics

There was a lot of repeated code wrt unary math intrinsics in
translateKnownIntrinsic. This factors out the repeated MIRBuilder code into
two functions: translateSimpleUnaryIntrinsic and getSimpleUnaryIntrinsicOpcode.

This simplifies adding simple unary intrinsics, since after this, all you have
to do is add the mapping to SimpleUnaryIntrinsicOpcodes.

Differential Revision: https://reviews.llvm.org/D57774

llvm-svn: 353316

											
										
										
											2019-02-07 01:25:54 +08:00
+								  }
 								  return Intrinsic::not_intrinsic;
 								}
-												[GlobalISel][NFC] Gardening: Make translateSimpleUnaryIntrinsic general

Instead of only having this code work for unary intrinsics, have it work for
an arbitrary number of parameters.

Factor out the cases that fall under this (fma, pow).

This makes it a bit easier to add more intrinsics which don't require any
special work.

Differential Revision: https://reviews.llvm.org/D58079

llvm-svn: 353863

											
										
										
											2019-02-13 01:38:34 +08:00
+								bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
 								                                            Intrinsic::ID ID,
 								                                            MachineIRBuilder &MIRBuilder) {
-												[GlobalISel][NFC] Gardening: Factor out code for simple unary intrinsics

There was a lot of repeated code wrt unary math intrinsics in
translateKnownIntrinsic. This factors out the repeated MIRBuilder code into
two functions: translateSimpleUnaryIntrinsic and getSimpleUnaryIntrinsicOpcode.

This simplifies adding simple unary intrinsics, since after this, all you have
to do is add the mapping to SimpleUnaryIntrinsicOpcodes.

Differential Revision: https://reviews.llvm.org/D57774

llvm-svn: 353316

											
										
										
											2019-02-07 01:25:54 +08:00
-												[GlobalISel][NFC] Gardening: Make translateSimpleUnaryIntrinsic general

Instead of only having this code work for unary intrinsics, have it work for
an arbitrary number of parameters.

Factor out the cases that fall under this (fma, pow).

This makes it a bit easier to add more intrinsics which don't require any
special work.

Differential Revision: https://reviews.llvm.org/D58079

llvm-svn: 353863

											
										
										
											2019-02-13 01:38:34 +08:00
+								  unsigned Op = getSimpleIntrinsicOpcode(ID);
-												[GlobalISel][NFC] Gardening: Factor out code for simple unary intrinsics

There was a lot of repeated code wrt unary math intrinsics in
translateKnownIntrinsic. This factors out the repeated MIRBuilder code into
two functions: translateSimpleUnaryIntrinsic and getSimpleUnaryIntrinsicOpcode.

This simplifies adding simple unary intrinsics, since after this, all you have
to do is add the mapping to SimpleUnaryIntrinsicOpcodes.

Differential Revision: https://reviews.llvm.org/D57774

llvm-svn: 353316

											
										
										
											2019-02-07 01:25:54 +08:00
-												[GlobalISel][NFC] Gardening: Make translateSimpleUnaryIntrinsic general

Instead of only having this code work for unary intrinsics, have it work for
an arbitrary number of parameters.

Factor out the cases that fall under this (fma, pow).

This makes it a bit easier to add more intrinsics which don't require any
special work.

Differential Revision: https://reviews.llvm.org/D58079

llvm-svn: 353863

											
										
										
											2019-02-13 01:38:34 +08:00
+								  // Is this a simple intrinsic?
-												[GlobalISel][NFC] Gardening: Factor out code for simple unary intrinsics

There was a lot of repeated code wrt unary math intrinsics in
translateKnownIntrinsic. This factors out the repeated MIRBuilder code into
two functions: translateSimpleUnaryIntrinsic and getSimpleUnaryIntrinsicOpcode.

This simplifies adding simple unary intrinsics, since after this, all you have
to do is add the mapping to SimpleUnaryIntrinsicOpcodes.

Differential Revision: https://reviews.llvm.org/D57774

llvm-svn: 353316

											
										
										
											2019-02-07 01:25:54 +08:00
+								  if (Op == Intrinsic::not_intrinsic)
 								    return false;
 								  // Yes. Let's translate it.
-												[GlobalISel][NFC] Gardening: Make translateSimpleUnaryIntrinsic general

Instead of only having this code work for unary intrinsics, have it work for
an arbitrary number of parameters.

Factor out the cases that fall under this (fma, pow).

This makes it a bit easier to add more intrinsics which don't require any
special work.

Differential Revision: https://reviews.llvm.org/D58079

llvm-svn: 353863

											
										
										
											2019-02-13 01:38:34 +08:00
+								  SmallVector<llvm::SrcOp, 4> VRegs;
 								  for (auto &Arg : CI.arg_operands())
 								    VRegs.push_back(getOrCreateVReg(*Arg));
 								  MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs,
-												Move IR flag handling directly into builder calls for cases translated from Instructions in GlobalIsel

Reviewers: aditya_nandakumar, volkan

Reviewed By: aditya_nandakumar

Subscribers: rovka, kristof.beyls, volkan, Petar.Avramovic

Differential Revision: https://reviews.llvm.org/D57630

llvm-svn: 353336

											
										
										
											2019-02-07 03:57:06 +08:00
+								                        MachineInstr::copyFlagsFromInstruction(CI));
-												[GlobalISel][NFC] Gardening: Factor out code for simple unary intrinsics

There was a lot of repeated code wrt unary math intrinsics in
translateKnownIntrinsic. This factors out the repeated MIRBuilder code into
two functions: translateSimpleUnaryIntrinsic and getSimpleUnaryIntrinsicOpcode.

This simplifies adding simple unary intrinsics, since after this, all you have
to do is add the mapping to SimpleUnaryIntrinsicOpcodes.

Differential Revision: https://reviews.llvm.org/D57774

llvm-svn: 353316

											
										
										
											2019-02-07 01:25:54 +08:00
+								  return true;
 								}
-												GlobalISel: Start defining strict FP instructions

The AMDGPU lowering for unconstrained G_FDIV sometimes needs to
introduce a mode switch in the middle, so it's helpful to have
constrained instructions available to legalize this. Right now nothing
is preventing reordering of the mode switch with the other
instructions in the expansion.

											
										
										
											2020-05-31 23:58:56 +08:00
+								// TODO: Include ConstainedOps.def when all strict instructions are defined.
 								static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
 								  switch (ID) {
 								  case Intrinsic::experimental_constrained_fadd:
 								    return TargetOpcode::G_STRICT_FADD;
 								  case Intrinsic::experimental_constrained_fsub:
 								    return TargetOpcode::G_STRICT_FSUB;
 								  case Intrinsic::experimental_constrained_fmul:
 								    return TargetOpcode::G_STRICT_FMUL;
 								  case Intrinsic::experimental_constrained_fdiv:
 								    return TargetOpcode::G_STRICT_FDIV;
 								  case Intrinsic::experimental_constrained_frem:
 								    return TargetOpcode::G_STRICT_FREM;
 								  case Intrinsic::experimental_constrained_fma:
 								    return TargetOpcode::G_STRICT_FMA;
 								  case Intrinsic::experimental_constrained_sqrt:
 								    return TargetOpcode::G_STRICT_FSQRT;
 								  default:
 								    return 0;
 								  }
 								}
 								bool IRTranslator::translateConstrainedFPIntrinsic(
 								  const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) {
 								  fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue();
 								  unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID());
 								  if (!Opcode)
 								    return false;
 								  unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI);
 								  if (EB == fp::ExceptionBehavior::ebIgnore)
 								    Flags |= MachineInstr::NoFPExcept;
 								  SmallVector<llvm::SrcOp, 4> VRegs;
 								  VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(0)));
 								  if (!FPI.isUnaryOp())
 								    VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(1)));
 								  if (FPI.isTernaryOp())
 								    VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(2)));
 								  MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(FPI)}, VRegs, Flags);
 								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
 								                                           MachineIRBuilder &MIRBuilder) {
-												[GlobalISel][NFC] Gardening: Factor out code for simple unary intrinsics

There was a lot of repeated code wrt unary math intrinsics in
translateKnownIntrinsic. This factors out the repeated MIRBuilder code into
two functions: translateSimpleUnaryIntrinsic and getSimpleUnaryIntrinsicOpcode.

This simplifies adding simple unary intrinsics, since after this, all you have
to do is add the mapping to SimpleUnaryIntrinsicOpcodes.

Differential Revision: https://reviews.llvm.org/D57774

llvm-svn: 353316

											
										
										
											2019-02-07 01:25:54 +08:00
-												[GlobalISel][NFC] Gardening: Make translateSimpleUnaryIntrinsic general

Instead of only having this code work for unary intrinsics, have it work for
an arbitrary number of parameters.

Factor out the cases that fall under this (fma, pow).

This makes it a bit easier to add more intrinsics which don't require any
special work.

Differential Revision: https://reviews.llvm.org/D58079

llvm-svn: 353863

											
										
										
											2019-02-13 01:38:34 +08:00
+								  // If this is a simple intrinsic (that is, we just need to add a def of
 								  // a vreg, and uses for each arg operand, then translate it.
 								  if (translateSimpleIntrinsic(CI, ID, MIRBuilder))
-												[GlobalISel][NFC] Gardening: Factor out code for simple unary intrinsics

There was a lot of repeated code wrt unary math intrinsics in
translateKnownIntrinsic. This factors out the repeated MIRBuilder code into
two functions: translateSimpleUnaryIntrinsic and getSimpleUnaryIntrinsicOpcode.

This simplifies adding simple unary intrinsics, since after this, all you have
to do is add the mapping to SimpleUnaryIntrinsicOpcodes.

Differential Revision: https://reviews.llvm.org/D57774

llvm-svn: 353316

											
										
										
											2019-02-07 01:25:54 +08:00
+								    return true;
-												GlobalISel: support overflow arithmetic intrinsics.

Unsigned addition and subtraction can reuse the instructions created to
legalize large width operations (i.e. both produce and consume a carry flag).
Signed operations and multiplies get a dedicated op-with-overflow instruction.

Once this is produced the two values are combined into a struct register (which
will almost always be merged with a corresponding G_EXTRACT as part of
legalization).

llvm-svn: 279278

											
										
										
											2016-08-20 01:17:06 +08:00
+								  switch (ID) {
-												GlobalISel: factor overflow handling into separate function. NFC.

llvm-svn: 289149

											
										
										
											2016-12-09 06:44:00 +08:00
+								  default:
 								    break;
-												GlobalISel: drop lifetime intrinsics during translation.

We don't use them yet and they just cause problems.

llvm-svn: 294770

											
										
										
											2017-02-11 03:10:38 +08:00
+								  case Intrinsic::lifetime_start:
-												[GlobalISel] Add ISel support for @llvm.lifetime.start and @llvm.lifetime.end

This adds ISel support for lifetime markers in opt levels above O0.

It also updates the arm64-irtranslator test, and updates some AArch64 tests that
use them for added coverage.

It also adds a testcase taken from the X86 codegen tests which verified a bug
caused by lifetime markers + stack colouring in the past. This is intended to
make sure that GISel doesn't re-introduce the bug.

(This is basically a straight copy from what SelectionDAG does in
SelectionDAGBuilder.cpp)

https://reviews.llvm.org/D57187

llvm-svn: 352410

											
										
										
											2019-01-29 03:22:29 +08:00
+								  case Intrinsic::lifetime_end: {
 								    // No stack colouring in O0, discard region information.
 								    if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
 								      return true;
 								    unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
 								                                                  : TargetOpcode::LIFETIME_END;
 								    // Get the underlying objects for the location passed on the lifetime
 								    // marker.
-												Add "const" in GetUnderlyingObjects. NFC

Summary:
Both the input Value pointer and the returned Value
pointers in GetUnderlyingObjects are now declared as
const.

It turned out that all current (in-tree) uses of
GetUnderlyingObjects were trivial to update, being
satisfied with have those Value pointers declared
as const. Actually, in the past several of the users
had to use const_cast, just because of ValueTracking
not providing a version of GetUnderlyingObjects with
"const" Value pointers. With this patch we get rid
of those const casts.

Reviewers: hfinkel, materi, jkorous

Reviewed By: jkorous

Subscribers: dexonsmith, jkorous, jholewinski, sdardis, eraman, hiraditya, jrtc27, atanasyan, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61038

llvm-svn: 359072

											
										
										
											2019-04-24 14:55:50 +08:00
+								    SmallVector<const Value *, 4> Allocas;
-												[NFC] Remove unused GetUnderlyingObject paramenter

 Depends on D84617.

Differential Revision: https://reviews.llvm.org/D84621

											
										
										
											2020-07-31 17:09:54 +08:00
+								    getUnderlyingObjects(CI.getArgOperand(1), Allocas);
-												[GlobalISel] Add ISel support for @llvm.lifetime.start and @llvm.lifetime.end

This adds ISel support for lifetime markers in opt levels above O0.

It also updates the arm64-irtranslator test, and updates some AArch64 tests that
use them for added coverage.

It also adds a testcase taken from the X86 codegen tests which verified a bug
caused by lifetime markers + stack colouring in the past. This is intended to
make sure that GISel doesn't re-introduce the bug.

(This is basically a straight copy from what SelectionDAG does in
SelectionDAGBuilder.cpp)

https://reviews.llvm.org/D57187

llvm-svn: 352410

											
										
										
											2019-01-29 03:22:29 +08:00
 								    // Iterate over each underlying object, creating lifetime markers for each
 								    // static alloca. Quit if we find a non-static alloca.
-												Add "const" in GetUnderlyingObjects. NFC

Summary:
Both the input Value pointer and the returned Value
pointers in GetUnderlyingObjects are now declared as
const.

It turned out that all current (in-tree) uses of
GetUnderlyingObjects were trivial to update, being
satisfied with have those Value pointers declared
as const. Actually, in the past several of the users
had to use const_cast, just because of ValueTracking
not providing a version of GetUnderlyingObjects with
"const" Value pointers. With this patch we get rid
of those const casts.

Reviewers: hfinkel, materi, jkorous

Reviewed By: jkorous

Subscribers: dexonsmith, jkorous, jholewinski, sdardis, eraman, hiraditya, jrtc27, atanasyan, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D61038

llvm-svn: 359072

											
										
										
											2019-04-24 14:55:50 +08:00
+								    for (const Value *V : Allocas) {
 								      const AllocaInst *AI = dyn_cast<AllocaInst>(V);
-												[GlobalISel] Add ISel support for @llvm.lifetime.start and @llvm.lifetime.end

This adds ISel support for lifetime markers in opt levels above O0.

It also updates the arm64-irtranslator test, and updates some AArch64 tests that
use them for added coverage.

It also adds a testcase taken from the X86 codegen tests which verified a bug
caused by lifetime markers + stack colouring in the past. This is intended to
make sure that GISel doesn't re-introduce the bug.

(This is basically a straight copy from what SelectionDAG does in
SelectionDAGBuilder.cpp)

https://reviews.llvm.org/D57187

llvm-svn: 352410

											
										
										
											2019-01-29 03:22:29 +08:00
+								      if (!AI)
 								        continue;
 								      if (!AI->isStaticAlloca())
 								        return true;
 								      MIRBuilder.buildInstr(Op).addFrameIndex(getOrCreateFrameIndex(*AI));
 								    }
-												GlobalISel: drop lifetime intrinsics during translation.

We don't use them yet and they just cause problems.

llvm-svn: 294770

											
										
										
											2017-02-11 03:10:38 +08:00
+								    return true;
-												[GlobalISel] Add ISel support for @llvm.lifetime.start and @llvm.lifetime.end

This adds ISel support for lifetime markers in opt levels above O0.

It also updates the arm64-irtranslator test, and updates some AArch64 tests that
use them for added coverage.

It also adds a testcase taken from the X86 codegen tests which verified a bug
caused by lifetime markers + stack colouring in the past. This is intended to
make sure that GISel doesn't re-introduce the bug.

(This is basically a straight copy from what SelectionDAG does in
SelectionDAGBuilder.cpp)

https://reviews.llvm.org/D57187

llvm-svn: 352410

											
										
										
											2019-01-29 03:22:29 +08:00
+								  }
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								  case Intrinsic::dbg_declare: {
 								    const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
 								    assert(DI.getVariable() && "Missing variable");
 								    const Value *Address = DI.getAddress();
 								    if (!Address || isa<UndefValue>(Address)) {
-												Rename DEBUG macro to LLVM_DEBUG.
    
The DEBUG() macro is very generic so it might clash with other projects.
The renaming was done as follows:
- git grep -l 'DEBUG' | xargs sed -i 's/\bDEBUG\s\?(/LLVM_DEBUG(/g'
- git diff -U0 master | ../clang/tools/clang-format/clang-format-diff.py -i -p1 -style LLVM
- Manual change to APInt
- Manually chage DOCS as regex doesn't match it.

In the transition period the DEBUG() macro is still present and aliased
to the LLVM_DEBUG() one.

Differential Revision: https://reviews.llvm.org/D43624

llvm-svn: 332240

											
										
										
											2018-05-14 20:53:11 +08:00
+								      LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								      return true;
 								    }
 								    assert(DI.getVariable()->isValidLocationForIntrinsic(
 								               MIRBuilder.getDebugLoc()) &&
 								           "Expected inlined-at fields to agree");
-												GlobalISel: put debug info for static allocas in the MachineFunction.

The good reason to do this is that static allocas are pretty simple to handle
(especially at -O0) and avoiding tracking DBG_VALUEs throughout the pipeline
should give some kind of performance benefit.

The bad reason is that the debug pipeline is an unholy mess of implicit
contracts, where determining whether "DBG_VALUE %reg, imm" actually implies a
load or not involves the services of at least 3 soothsayers and the sacrifice
of at least one chicken.  And it still gets it wrong if the variable is at SP
directly.

llvm-svn: 297410

											
										
										
											2017-03-10 05:12:06 +08:00
+								    auto AI = dyn_cast<AllocaInst>(Address);
 								    if (AI && AI->isStaticAlloca()) {
 								      // Static allocas are tracked at the MF level, no need for DBG_VALUE
 								      // instructions (in fact, they get ignored if they *do* exist).
 								      MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
 								                             getOrCreateFrameIndex(*AI), DI.getDebugLoc());
-												[GlobalISel] Lower dbg.declare into indirect DBG_VALUE

Summary:
D31439 changed the semantics of dbg.declare to take the address of a
variable as the first argument, making it indirect.  It specifically
updated FastISel for this change here:

https://reviews.llvm.org/D31439#change-WVArzi177jPl

GlobalISel needs to follow suit, or else it will be missing a level of
indirection in the generated debuginfo.  This problem was seen in a Rust
debuginfo test on aarch64, since GlobalISel is used at -O0 for aarch64.

https://github.com/rust-lang/rust/issues/49807
https://bugzilla.redhat.com/show_bug.cgi?id=1611597
https://bugzilla.redhat.com/show_bug.cgi?id=1625768

Reviewers: dblaikie, aprantl, t.p.northover, javed.absar, rnk

Reviewed By: rnk

Subscribers: #debug-info, rovka, kristof.beyls, JDevlieghere, llvm-commits, tstellar

Differential Revision: https://reviews.llvm.org/D51749

llvm-svn: 341969

											
										
										
											2018-09-12 01:52:01 +08:00
+								    } else {
 								      // A dbg.declare describes the address of a source variable, so lower it
 								      // into an indirect DBG_VALUE.
 								      MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
 								                                       DI.getVariable(), DI.getExpression());
 								    }
-												GlobalISel: fall back gracefully for debug intrinsics.

Supporting them properly is a reasonably complex chunk of work, so to allow bot
testing before then we should at least be able to fall back to DAG ISel.

llvm-svn: 289150

											
										
										
											2016-12-09 06:44:13 +08:00
+								    return true;
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								  }
-												[DebugInfo] Generate DWARF debug information for labels. (Fix leak problems)

There are two forms for label debug information in DWARF format.

1. Labels in a non-inlined function:

DW_TAG_label
  DW_AT_name
  DW_AT_decl_file
  DW_AT_decl_line
  DW_AT_low_pc

2. Labels in an inlined function:

DW_TAG_label
  DW_AT_abstract_origin
  DW_AT_low_pc

We will collect label information from DBG_LABEL. Before every DBG_LABEL,
we will generate a temporary symbol to denote the location of the label.
The symbol could be used to get DW_AT_low_pc afterwards. So, we create a
mapping between 'inlined label' and DBG_LABEL MachineInstr in DebugHandlerBase.
The DBG_LABEL in the mapping is used to query the symbol before it.

The AbstractLabels in DwarfCompileUnit is used to process labels in inlined
functions.

We also keep a mapping between scope and labels in DwarfFile to help to
generate correct tree structure of DIEs.

It also generates label debug information under global isel.

Differential Revision: https://reviews.llvm.org/D45556

llvm-svn: 340039

											
										
										
											2018-08-17 23:22:04 +08:00
+								  case Intrinsic::dbg_label: {
 								    const DbgLabelInst &DI = cast<DbgLabelInst>(CI);
 								    assert(DI.getLabel() && "Missing label");
 								    assert(DI.getLabel()->isValidLocationForIntrinsic(
 								               MIRBuilder.getDebugLoc()) &&
 								           "Expected inlined-at fields to agree");
 								    MIRBuilder.buildDbgLabel(DI.getLabel());
 								    return true;
 								  }
-												GlobalISel: translate @llvm.va_end intrinsic.

Turns out no-one actually cares about this one (at least) in tree so we can
just drop it entirely.

llvm-svn: 294345

											
										
										
											2017-02-08 04:08:59 +08:00
+								  case Intrinsic::vaend:
 								    // No target I know of cares about va_end. Certainly no in-tree target
 								    // does. Simplest intrinsic ever!
 								    return true;
-												GlobalISel: translate @llvm.va_start intrinsic.

Because we need to preserve the memory access being performed we need a
separate instruction to represent this.

llvm-svn: 294492

											
										
										
											2017-02-09 01:57:20 +08:00
+								  case Intrinsic::vastart: {
 								    auto &TLI = *MF->getSubtarget().getTargetLowering();
 								    Value *Ptr = CI.getArgOperand(0);
 								    unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
-												GlobalISel: Fix creating MMOs with align 0

llvm-svn: 352712

											
										
										
											2019-01-31 09:38:47 +08:00
+								    // FIXME: Get alignment
-												[GlobalISel] Use more MachineIRBuilder helper methods

Reviewers: arsenm, nhaehnle

Subscribers: wdng, rovka, hiraditya, volkan, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D72833

											
										
										
											2020-01-16 20:09:48 +08:00
+								    MIRBuilder.buildInstr(TargetOpcode::G_VASTART, {}, {getOrCreateVReg(*Ptr)})
-												[Alignment][NFC] Use Align version of getMachineMemOperand

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: jyknight, sdardis, nemanjai, hiraditya, kbarton, fedor.sergeev, asb, rbar, johnrusso, simoncook, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, jfb, PkmX, jocewei, Jim, lenary, s.egerton, pzheng, sameer.abuasal, apazos, luismarques, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77059

											
										
										
											2020-03-30 22:45:57 +08:00
+								        .addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Ptr),
 								                                                MachineMemOperand::MOStore,
 								                                                ListSize, Align(1)));
-												GlobalISel: translate @llvm.va_start intrinsic.

Because we need to preserve the memory access being performed we need a
separate instruction to represent this.

llvm-svn: 294492

											
										
										
											2017-02-09 01:57:20 +08:00
+								    return true;
 								  }
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								  case Intrinsic::dbg_value: {
 								    // This form of DBG_VALUE is target-independent.
 								    const DbgValueInst &DI = cast<DbgValueInst>(CI);
 								    const Value *V = DI.getValue();
 								    assert(DI.getVariable()->isValidLocationForIntrinsic(
 								               MIRBuilder.getDebugLoc()) &&
 								           "Expected inlined-at fields to agree");
 								    if (!V) {
 								      // Currently the optimizer can produce this; insert an undef to
 								      // help debugging.  Probably the optimizer should not do this.
-												Revert "[DebugInfo] Remove some users of DBG_VALUEs IsIndirect field"

This reverts commit ed29dbaafa49bb8c9039a35f768244c394411fea.

I'm backing out D68945, which as the discussion for D73526 shows, doesn't
seem to handle the -O0 path through the codegen backend correctly. I'll
reland the patch when a fix is worked out, apologies for all the churn.
The two parent commits are part of this revert too.

Conflicts:
	llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
	llvm/test/DebugInfo/X86/dbg-addr-dse.ll

SelectionDAGBuilder conflict is due to a nearby change in e39e2b4a79c6
that's technically unrelated. dbg-addr-dse.ll conflicted because
41206b61e30c (legitimately) changes the order of two lines.

There are further modifications to dbg-value-func-arg.ll: it landed after
the patch being reverted, and I've converted indirection to be represented
by the isIndirect field rather than DW_OP_deref.

											
										
										
											2020-02-06 01:27:44 +08:00
+								      MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								    } else if (const auto *CI = dyn_cast<Constant>(V)) {
-												Remove the unused DBG_VALUE offset parameter from GlobalISel (NFC)

Followup to r309426.
rdar://problem/33580047

llvm-svn: 309449

											
										
										
											2017-07-29 06:46:20 +08:00
+								      MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								    } else {
-												[GlobalISel] Handle multiple registers in dbg.value intrinsic

https://reviews.llvm.org/D66077

The value passed into dbg.value may relate to multiple registers,
each of which need a DBG_VALUE.

This fix calls MIRBuilder.buildDirectDbgValue for each register.

Without this, IR passed in from flang-compiler/flang may fail an
assertion in getOrCreateVReg.

Patch by : peterwaller-arm.

llvm-svn: 369403

											
										
										
											2019-08-21 00:28:37 +08:00
+								      for (Register Reg : getOrCreateVRegs(*V)) {
 								        // FIXME: This does not handle register-indirect values at offset 0. The
 								        // direct/indirect thing shouldn't really be handled by something as
 								        // implicit as reg+noreg vs reg+imm in the first place, but it seems
 								        // pretty baked in right now.
 								        MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
 								      }
-												GlobalISel: support debug intrinsics.

The translation scheme is mostly cribbed from FastISel, and it's not entirely
convincing semantically. But it does seem to work in the common cases and allow
variables to be printed so it can't be all wrong.

llvm-svn: 293228

											
										
										
											2017-01-27 07:39:14 +08:00
+								    }
 								    return true;
 								  }
-												GlobalISel: factor overflow handling into separate function. NFC.

llvm-svn: 289149

											
										
										
											2016-12-09 06:44:00 +08:00
+								  case Intrinsic::uadd_with_overflow:
-												[GISel]: Add missing opcodes for overflow intrinsics

https://reviews.llvm.org/D51197

Currently, IRTranslator (and GISel) seems to be arbitrarily picking
which overflow intrinsics get mapped into opcodes which either have a
carry as an input or not.
For intrinsics such as Intrinsic::uadd_with_overflow, translate it to an
opcode (G_UADDO) which doesn't have any carry inputs (similar to LLVM
IR).

This patch adds 4 missing opcodes for completeness - G_UADDO, G_USUBO,
G_SSUBE and G_SADDE.

llvm-svn: 340865

											
										
										
											2018-08-29 02:54:10 +08:00
+								    return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDO, MIRBuilder);
-												GlobalISel: factor overflow handling into separate function. NFC.

llvm-svn: 289149

											
										
										
											2016-12-09 06:44:00 +08:00
+								  case Intrinsic::sadd_with_overflow:
 								    return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder);
 								  case Intrinsic::usub_with_overflow:
-												[GISel]: Add missing opcodes for overflow intrinsics

https://reviews.llvm.org/D51197

Currently, IRTranslator (and GISel) seems to be arbitrarily picking
which overflow intrinsics get mapped into opcodes which either have a
carry as an input or not.
For intrinsics such as Intrinsic::uadd_with_overflow, translate it to an
opcode (G_UADDO) which doesn't have any carry inputs (similar to LLVM
IR).

This patch adds 4 missing opcodes for completeness - G_UADDO, G_USUBO,
G_SSUBE and G_SADDE.

llvm-svn: 340865

											
										
										
											2018-08-29 02:54:10 +08:00
+								    return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBO, MIRBuilder);
-												GlobalISel: factor overflow handling into separate function. NFC.

llvm-svn: 289149

											
										
										
											2016-12-09 06:44:00 +08:00
+								  case Intrinsic::ssub_with_overflow:
 								    return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder);
 								  case Intrinsic::umul_with_overflow:
 								    return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
 								  case Intrinsic::smul_with_overflow:
 								    return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
-												[GlobalISel] Add generic opcodes for saturating add/subtract

Summary:
Add new generic MIR opcodes G_SADDSAT etc. Add support in IRTranslator
for translating the saturating add/subtract intrinsics to the new
opcodes.

Reviewers: aemerson, dsanders, paquette, arsenm

Subscribers: jvesely, wdng, nhaehnle, rovka, hiraditya, volkan, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D76600

											
										
										
											2020-02-24 19:13:05 +08:00
+								  case Intrinsic::uadd_sat:
 								    return translateBinaryOp(TargetOpcode::G_UADDSAT, CI, MIRBuilder);
 								  case Intrinsic::sadd_sat:
 								    return translateBinaryOp(TargetOpcode::G_SADDSAT, CI, MIRBuilder);
 								  case Intrinsic::usub_sat:
 								    return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder);
 								  case Intrinsic::ssub_sat:
 								    return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder);
-												[Intrinsic] Add sshl.sat/ushl.sat, saturated shift intrinsics.

Summary:
This patch adds two intrinsics, llvm.sshl.sat and llvm.ushl.sat,
which perform signed and unsigned saturating left shift,
respectively.

These are useful for implementing the Embedded-C fixed point
support in Clang, originally discussed in
http://lists.llvm.org/pipermail/llvm-dev/2018-August/125433.html
and
http://lists.llvm.org/pipermail/cfe-dev/2018-May/058019.html

Reviewers: leonardchan, craig.topper, bjope, jdoerfert

Subscribers: hiraditya, jdoerfert, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83216

											
										
										
											2020-07-16 23:02:04 +08:00
+								  case Intrinsic::ushl_sat:
 								    return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder);
 								  case Intrinsic::sshl_sat:
 								    return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder);
-												[IR] Add min/max/abs intrinsics

This adds the llvm.abs(), llvm.umin(), llvm.umax(), llvm.smin(),
and llvm.smax() intrinsics specified in D81829. For SelectionDAG,
the ISD opcodes and all the legalization and lowering already exist,
so this just wires them up to the intrinsic in the SDAG builder and
adds rudimentary tests. For GlobalISel only the min/max intrinsics
are wired up, as llvm.abs() will require the addition of a G_ABS op,
and corresponding legalization support.

Differential Revision: https://reviews.llvm.org/D84125

											
										
										
											2020-07-19 23:06:17 +08:00
+								  case Intrinsic::umin:
 								    return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder);
 								  case Intrinsic::umax:
 								    return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder);
 								  case Intrinsic::smin:
 								    return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder);
 								  case Intrinsic::smax:
 								    return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder);
-												[GlobalISel] Add G_ABS

This is equivalent to the new llvm.abs intrinsic added by D84125 with
is_int_min_poison=0.

Differential Revision: https://reviews.llvm.org/D85718

											
										
										
											2020-08-11 17:50:58 +08:00
+								  case Intrinsic::abs:
 								    // TODO: Preserve "int min is poison" arg in GMIR?
 								    return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder);
-												GlobalISel: Define mulfix/divfix opcodes

The full expansion involves the funnel shifts, which depend on another
patch to expand those.

											
										
										
											2020-07-20 01:09:48 +08:00
+								  case Intrinsic::smul_fix:
 								    return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder);
 								  case Intrinsic::umul_fix:
 								    return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder);
 								  case Intrinsic::smul_fix_sat:
 								    return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder);
 								  case Intrinsic::umul_fix_sat:
 								    return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder);
 								  case Intrinsic::sdiv_fix:
 								    return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder);
 								  case Intrinsic::udiv_fix:
 								    return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder);
 								  case Intrinsic::sdiv_fix_sat:
 								    return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder);
 								  case Intrinsic::udiv_fix_sat:
 								    return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder);
-												GlobalISel: IRTranslate llvm.fmuladd.* intrinsic

Reviewers: qcolombet, ab, dsanders, aditya_nandakumar, bogner

Reviewed By: qcolombet

Subscribers: rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D43090

llvm-svn: 324971

											
										
										
											2018-02-13 08:47:46 +08:00
+								  case Intrinsic::fmuladd: {
 								    const TargetMachine &TM = MF->getTarget();
 								    const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								    Register Dst = getOrCreateVReg(CI);
 								    Register Op0 = getOrCreateVReg(*CI.getArgOperand(0));
 								    Register Op1 = getOrCreateVReg(*CI.getArgOperand(1));
 								    Register Op2 = getOrCreateVReg(*CI.getArgOperand(2));
-												GlobalISel: IRTranslate llvm.fmuladd.* intrinsic

Reviewers: qcolombet, ab, dsanders, aditya_nandakumar, bogner

Reviewed By: qcolombet

Subscribers: rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D43090

llvm-svn: 324971

											
										
										
											2018-02-13 08:47:46 +08:00
+								    if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
-												DAG: Add function context to isFMAFasterThanFMulAndFAdd

AMDGPU needs to know the FP mode for the function to answer this
correctly when this is removed from the subtarget.

AArch64 had to make this more complicated by using this from an IR
hook, so add an IR typed overload.

											
										
										
											2019-10-29 08:38:44 +08:00
+								        TLI.isFMAFasterThanFMulAndFAdd(*MF,
 								                                       TLI.getValueType(*DL, CI.getType()))) {
-												GlobalISel: IRTranslate llvm.fmuladd.* intrinsic

Reviewers: qcolombet, ab, dsanders, aditya_nandakumar, bogner

Reviewed By: qcolombet

Subscribers: rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D43090

llvm-svn: 324971

											
										
										
											2018-02-13 08:47:46 +08:00
+								      // TODO: Revisit this to see if we should move this part of the
 								      // lowering to the combiner.
-												[GlobalISel] Use more MachineIRBuilder helper methods

Reviewers: arsenm, nhaehnle

Subscribers: wdng, rovka, hiraditya, volkan, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D72833

											
										
										
											2020-01-16 20:09:48 +08:00
+								      MIRBuilder.buildFMA(Dst, Op0, Op1, Op2,
 								                          MachineInstr::copyFlagsFromInstruction(CI));
-												GlobalISel: IRTranslate llvm.fmuladd.* intrinsic

Reviewers: qcolombet, ab, dsanders, aditya_nandakumar, bogner

Reviewed By: qcolombet

Subscribers: rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D43090

llvm-svn: 324971

											
										
										
											2018-02-13 08:47:46 +08:00
+								    } else {
 								      LLT Ty = getLLTForType(*CI.getType(), *DL);
-												[GlobalISel] Use more MachineIRBuilder helper methods

Reviewers: arsenm, nhaehnle

Subscribers: wdng, rovka, hiraditya, volkan, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D72833

											
										
										
											2020-01-16 20:09:48 +08:00
+								      auto FMul = MIRBuilder.buildFMul(
 								          Ty, Op0, Op1, MachineInstr::copyFlagsFromInstruction(CI));
 								      MIRBuilder.buildFAdd(Dst, FMul, Op2,
 								                           MachineInstr::copyFlagsFromInstruction(CI));
-												GlobalISel: IRTranslate llvm.fmuladd.* intrinsic

Reviewers: qcolombet, ab, dsanders, aditya_nandakumar, bogner

Reviewed By: qcolombet

Subscribers: rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D43090

llvm-svn: 324971

											
										
										
											2018-02-13 08:47:46 +08:00
+								    }
 								    return true;
 								  }
-												GlobalISel: Translate llvm.convert.{to|from}.fp16 intrinsics

I think these were added as a workaround for SelectionDAG lacking half
legalization support in the past. I think they should probably be
removed from the IR, but clang does still have a target control to
emit these instead of the native half fpext/fptrunc.

											
										
										
											2020-07-26 04:58:15 +08:00
+								  case Intrinsic::convert_from_fp16:
 								    // FIXME: This intrinsic should probably be removed from the IR.
 								    MIRBuilder.buildFPExt(getOrCreateVReg(CI),
 								                          getOrCreateVReg(*CI.getArgOperand(0)),
 								                          MachineInstr::copyFlagsFromInstruction(CI));
 								    return true;
 								  case Intrinsic::convert_to_fp16:
 								    // FIXME: This intrinsic should probably be removed from the IR.
 								    MIRBuilder.buildFPTrunc(getOrCreateVReg(CI),
 								                            getOrCreateVReg(*CI.getArgOperand(0)),
 								                            MachineInstr::copyFlagsFromInstruction(CI));
 								    return true;
-												GlobalISel: translate memcpy intrinsics.

llvm-svn: 284525

											
										
										
											2016-10-19 04:03:45 +08:00
+								  case Intrinsic::memcpy:
-												GlobalISel: Add generic instructions for memory intrinsics

AArch64, X86 and Mips currently directly consumes these and custom
lowering to produce a libcall, but really these should follow the
normal legalization process through the libcall/lower action.

											
										
										
											2020-08-03 21:00:24 +08:00
+								    return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
-												GlobalISel: translate memset & memmove.

llvm-svn: 293541

											
										
										
											2017-01-31 03:33:07 +08:00
+								  case Intrinsic::memmove:
-												GlobalISel: Add generic instructions for memory intrinsics

AArch64, X86 and Mips currently directly consumes these and custom
lowering to produce a libcall, but really these should follow the
normal legalization process through the libcall/lower action.

											
										
										
											2020-08-03 21:00:24 +08:00
+								    return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE);
-												GlobalISel: translate memset & memmove.

llvm-svn: 293541

											
										
										
											2017-01-31 03:33:07 +08:00
+								  case Intrinsic::memset:
-												GlobalISel: Add generic instructions for memory intrinsics

AArch64, X86 and Mips currently directly consumes these and custom
lowering to produce a libcall, but really these should follow the
normal legalization process through the libcall/lower action.

											
										
										
											2020-08-03 21:00:24 +08:00
+								    return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET);
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  case Intrinsic::eh_typeid_for: {
 								    GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								    Register Reg = getOrCreateVReg(CI);
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								    unsigned TypeID = MF->getTypeIDFor(GV);
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								    MIRBuilder.buildConstant(Reg, TypeID);
 								    return true;
 								  }
-												Reapply r374743 with a fix for the ocaml binding

Add a pass to lower is.constant and objectsize intrinsics

This pass lowers is.constant and objectsize intrinsics not simplified by
earlier constant folding, i.e. if the object given is not constant or if
not using the optimized pass chain. The result is recursively simplified
and constant conditionals are pruned, so that dead blocks are removed
even for -O0. This allows inline asm blocks with operand constraints to
work all the time.

The new pass replaces the existing lowering in the codegen-prepare pass
and fallbacks in SDAG/GlobalISEL and FastISel. The latter now assert
on the intrinsics.

Differential Revision: https://reviews.llvm.org/D65280

llvm-svn: 374784

											
										
										
											2019-10-15 00:15:14 +08:00
+								  case Intrinsic::objectsize:
 								    llvm_unreachable("llvm.objectsize.* should have been lowered already");
-												GlobalISel: translate the @llvm.objectsize intrinsic.

llvm-svn: 284527

											
										
										
											2016-10-19 04:03:51 +08:00
-												Add support for llvm.is.constant intrinsic (PR4898)

This adds the llvm-side support for post-inlining evaluation of the
__builtin_constant_p GCC intrinsic.

Also fixed SCCPSolver::visitCallSite to not blow up when seeing a call
to a function where canConstantFoldTo returns true, and one of the
arguments is a struct.

Updated from patch initially by Janusz Sobczak.

Differential Revision: https://reviews.llvm.org/D4276

llvm-svn: 346322

											
										
										
											2018-11-07 23:24:12 +08:00
+								  case Intrinsic::is_constant:
-												Reapply r374743 with a fix for the ocaml binding

Add a pass to lower is.constant and objectsize intrinsics

This pass lowers is.constant and objectsize intrinsics not simplified by
earlier constant folding, i.e. if the object given is not constant or if
not using the optimized pass chain. The result is recursively simplified
and constant conditionals are pruned, so that dead blocks are removed
even for -O0. This allows inline asm blocks with operand constraints to
work all the time.

The new pass replaces the existing lowering in the codegen-prepare pass
and fallbacks in SDAG/GlobalISEL and FastISel. The latter now assert
on the intrinsics.

Differential Revision: https://reviews.llvm.org/D65280

llvm-svn: 374784

											
										
										
											2019-10-15 00:15:14 +08:00
+								    llvm_unreachable("llvm.is.constant.* should have been lowered already");
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								  case Intrinsic::stackguard:
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								    getStackGuard(getOrCreateVReg(CI), MIRBuilder);
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								    return true;
 								  case Intrinsic::stackprotector: {
-												Recommit: [globalisel] Change LLT constructor string into an LLT-based object that knows how to generate it.

Summary:
This will allow future patches to inspect the details of the LLT. The implementation is now split between
the Support and CodeGen libraries to allow TableGen to use this class without introducing layering concerns.

Thanks to Ahmed Bougacha for finding a reasonable way to avoid the layering issue and providing the version of this patch without that problem.

The problem with the previous commit appears to have been that TableGen was including CodeGen/LowLevelType.h instead of Support/LowLevelTypeImpl.h.

Reviewers: t.p.northover, qcolombet, rovka, aditya_nandakumar, ab, javed.absar

Subscribers: arsenm, nhaehnle, mgorny, dberris, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30046

llvm-svn: 297241

											
										
										
											2017-03-08 07:20:35 +08:00
+								    LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								    Register GuardVal = MRI->createGenericVirtualRegister(PtrTy);
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								    getStackGuard(GuardVal, MIRBuilder);
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
 								    AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
-												[GlobalISel] Set stack protector index when translating Intrinsic::stackprotector

Record the stack protector index in MachineFrameInfo when translating
Intrinsic::stackprotector similarly as is done by SelectionDAG when
processing the same intrinsic.

Setting this index allows the Prologue/Epilogue Insertion to recognize
that the stack protection is enabled. The pass can then make sure that
the stack protector comes before local variables on the stack and
assigns potentially vulnerable objects first so they are close to the
stack protector slot.

Differential Revision: https://reviews.llvm.org/D55418

llvm-svn: 348761

											
										
										
											2018-12-10 23:15:05 +08:00
+								    int FI = getOrCreateFrameIndex(*Slot);
 								    MF->getFrameInfo().setStackProtectorIndex(FI);
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								    MIRBuilder.buildStore(
 								        GuardVal, getOrCreateVReg(*Slot),
-												[GlobalISel] Set stack protector index when translating Intrinsic::stackprotector

Record the stack protector index in MachineFrameInfo when translating
Intrinsic::stackprotector similarly as is done by SelectionDAG when
processing the same intrinsic.

Setting this index allows the Prologue/Epilogue Insertion to recognize
that the stack protection is enabled. The pass can then make sure that
the stack protector comes before local variables on the stack and
assigns potentially vulnerable objects first so they are close to the
stack protector slot.

Differential Revision: https://reviews.llvm.org/D55418

llvm-svn: 348761

											
										
										
											2018-12-10 23:15:05 +08:00
+								        *MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
 								                                  MachineMemOperand::MOStore |
 								                                      MachineMemOperand::MOVolatile,
-												[Alignment][NFC] Transitionning more getMachineMemOperand call sites

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77127

											
										
										
											2020-03-31 17:43:50 +08:00
+								                                  PtrTy.getSizeInBits() / 8, Align(8)));
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								    return true;
 								  }
-												[GlobalISel] Add IRTranslator support for llvm.stacksave and llvm.stackrestore

Also update arm64-irtranslator.ll.

Differential Revision: https://reviews.llvm.org/D60140

llvm-svn: 357538

											
										
										
											2019-04-03 06:46:31 +08:00
+								  case Intrinsic::stacksave: {
 								    // Save the stack pointer to the location provided by the intrinsic.
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								    Register Reg = getOrCreateVReg(CI);
 								    Register StackPtr = MF->getSubtarget()
-												[GlobalISel] Add IRTranslator support for llvm.stacksave and llvm.stackrestore

Also update arm64-irtranslator.ll.

Differential Revision: https://reviews.llvm.org/D60140

llvm-svn: 357538

											
										
										
											2019-04-03 06:46:31 +08:00
+								                            .getTargetLowering()
 								                            ->getStackPointerRegisterToSaveRestore();
 								    // If the target doesn't specify a stack pointer, then fall back.
 								    if (!StackPtr)
 								      return false;
 								    MIRBuilder.buildCopy(Reg, StackPtr);
 								    return true;
 								  }
 								  case Intrinsic::stackrestore: {
 								    // Restore the stack pointer from the location provided by the intrinsic.
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								    Register Reg = getOrCreateVReg(*CI.getArgOperand(0));
 								    Register StackPtr = MF->getSubtarget()
-												[GlobalISel] Add IRTranslator support for llvm.stacksave and llvm.stackrestore

Also update arm64-irtranslator.ll.

Differential Revision: https://reviews.llvm.org/D60140

llvm-svn: 357538

											
										
										
											2019-04-03 06:46:31 +08:00
+								                            .getTargetLowering()
 								                            ->getStackPointerRegisterToSaveRestore();
 								    // If the target doesn't specify a stack pointer, then fall back.
 								    if (!StackPtr)
 								      return false;
 								    MIRBuilder.buildCopy(StackPtr, Reg);
 								    return true;
 								  }
-												[GISel]: Add Opcodes for CTLZ/CTTZ/CTPOP

https://reviews.llvm.org/D48600

Added IRTranslator support to translate these known intrinsics into GISel opcodes.

llvm-svn: 338944

											
										
										
											2018-08-04 09:22:12 +08:00
+								  case Intrinsic::cttz:
 								  case Intrinsic::ctlz: {
 								    ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
 								    bool isTrailing = ID == Intrinsic::cttz;
 								    unsigned Opcode = isTrailing
 								                          ? Cst->isZero() ? TargetOpcode::G_CTTZ
 								                                          : TargetOpcode::G_CTTZ_ZERO_UNDEF
 								                          : Cst->isZero() ? TargetOpcode::G_CTLZ
 								                                          : TargetOpcode::G_CTLZ_ZERO_UNDEF;
-												[GlobalISel] Use more MachineIRBuilder helper methods

Reviewers: arsenm, nhaehnle

Subscribers: wdng, rovka, hiraditya, volkan, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D72833

											
										
										
											2020-01-16 20:09:48 +08:00
+								    MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(CI)},
 								                          {getOrCreateVReg(*CI.getArgOperand(0))});
-												[GISel]: Add Opcodes for CTLZ/CTTZ/CTPOP

https://reviews.llvm.org/D48600

Added IRTranslator support to translate these known intrinsics into GISel opcodes.

llvm-svn: 338944

											
										
										
											2018-08-04 09:22:12 +08:00
+								    return true;
 								  }
-												[GlobalIsel] Add llvm.invariant.start and llvm.invariant.end

Port over the implementation in SelectionDAGBuilder.cpp into the IRTranslator
and update the arm64-irtranslator test.

These were causing fallbacks in CTMark/Bullet (-Rpass-missed=gisel-select),
and this patch fixes that.

https://reviews.llvm.org/D52945

llvm-svn: 343885

											
										
										
											2018-10-06 05:02:46 +08:00
+								  case Intrinsic::invariant_start: {
 								    LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								    Register Undef = MRI->createGenericVirtualRegister(PtrTy);
-												[GlobalIsel] Add llvm.invariant.start and llvm.invariant.end

Port over the implementation in SelectionDAGBuilder.cpp into the IRTranslator
and update the arm64-irtranslator test.

These were causing fallbacks in CTMark/Bullet (-Rpass-missed=gisel-select),
and this patch fixes that.

https://reviews.llvm.org/D52945

llvm-svn: 343885

											
										
										
											2018-10-06 05:02:46 +08:00
+								    MIRBuilder.buildUndef(Undef);
 								    return true;
 								  }
 								  case Intrinsic::invariant_end:
 								    return true;
-												GlobalISel: Handle assorted no-op intrinsics

SelectionDAGBuilder just drops these, so do the same.

											
										
										
											2020-07-29 20:55:02 +08:00
+								  case Intrinsic::expect:
 								  case Intrinsic::annotation:
 								  case Intrinsic::ptr_annotation:
 								  case Intrinsic::launder_invariant_group:
 								  case Intrinsic::strip_invariant_group: {
 								    // Drop the intrinsic, but forward the value.
 								    MIRBuilder.buildCopy(getOrCreateVReg(CI),
 								                         getOrCreateVReg(*CI.getArgOperand(0)));
 								    return true;
 								  }
-												[GlobalISel] IRTranslator: Translate the intrinsics ignored by CodeGen

Summary:
Translate `llvm.assume`, `llvm.var.annotation` and `llvm.sideeffect` to nothing
as they have no effect on CodeGen.

Reviewers: qcolombet, aditya_nandakumar, dsanders, paquette, aemerson, arsenm

Reviewed By: arsenm

Subscribers: hiraditya, wdng, rovka, kristof.beyls, javed.absar, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63022

llvm-svn: 362834

											
										
										
											2019-06-08 04:19:27 +08:00
+								  case Intrinsic::assume:
-												[GlobalISel][IRTranslator] Ignore the llvm.experimental.noalias.scope.decl intrinsic.

These don't generate any code.

											
										
										
											2021-01-27 04:54:41 +08:00
+								  case Intrinsic::experimental_noalias_scope_decl:
-												[GlobalISel] IRTranslator: Translate the intrinsics ignored by CodeGen

Summary:
Translate `llvm.assume`, `llvm.var.annotation` and `llvm.sideeffect` to nothing
as they have no effect on CodeGen.

Reviewers: qcolombet, aditya_nandakumar, dsanders, paquette, aemerson, arsenm

Reviewed By: arsenm

Subscribers: hiraditya, wdng, rovka, kristof.beyls, javed.absar, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63022

llvm-svn: 362834

											
										
										
											2019-06-08 04:19:27 +08:00
+								  case Intrinsic::var_annotation:
 								  case Intrinsic::sideeffect:
 								    // Discard annotate attributes, assumptions, and artificial side-effects.
 								    return true;
-												AArch64+ARM: make LLVM consider system registers volatile.

Some of the system registers readable on AArch64 and ARM platforms
return different values with each read (for example a timer counter),
these shouldn't be hoisted outside loops or otherwise interfered with,
but the normal @llvm.read_register intrinsic is only considered to read
memory.

This introduces a separate @llvm.read_volatile_register intrinsic and
maps all system-registers on ARM platforms to use it for the
__builtin_arm_rsr calls. Registers declared with asm("r9") or similar
are unaffected.

											
										
										
											2020-07-15 16:11:36 +08:00
+								  case Intrinsic::read_volatile_register:
-												GlobalISel: Handle llvm.read_register

Compared to the attempt in bdcc6d3d2638b3a2c99ab3b9bfaa9c02e584993a,
this uses intermediate generic instructions.

											
										
										
											2019-12-28 08:26:51 +08:00
+								  case Intrinsic::read_register: {
 								    Value *Arg = CI.getArgOperand(0);
-												[GlobalISel] Use more MachineIRBuilder helper methods

Reviewers: arsenm, nhaehnle

Subscribers: wdng, rovka, hiraditya, volkan, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D72833

											
										
										
											2020-01-16 20:09:48 +08:00
+								    MIRBuilder
 								        .buildInstr(TargetOpcode::G_READ_REGISTER, {getOrCreateVReg(CI)}, {})
 								        .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()));
-												GlobalISel: Handle llvm.read_register

Compared to the attempt in bdcc6d3d2638b3a2c99ab3b9bfaa9c02e584993a,
this uses intermediate generic instructions.

											
										
										
											2019-12-28 08:26:51 +08:00
+								    return true;
 								  }
-												GlobalISel: Lower G_WRITE_REGISTER

											
										
										
											2020-01-13 02:29:44 +08:00
+								  case Intrinsic::write_register: {
 								    Value *Arg = CI.getArgOperand(0);
 								    MIRBuilder.buildInstr(TargetOpcode::G_WRITE_REGISTER)
 								      .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()))
 								      .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
 								    return true;
 								  }
-												GlobalISel: Handle llvm.localescape

This one is pretty easy and shrinks the list of unhandled
intrinsics. I'm not sure how relevant the insert point is. Using the
insert position of EntryBuilder will place this after
constants. SelectionDAG seems to end up emitting these after argument
copies and before anything else, but I don't think it really
matters. This also ends up emitting these in the opposite order from
SelectionDAG, but I don't think that matters either.

This also needs a fix to stop the later passes dropping this as a dead
instruction. DeadMachineInstructionElim's version of isDead special
cases LOCAL_ESCAPE for some reason, and I'm not sure why it's excluded
from MachineInstr::isLabel (or why isDead doesn't check it).

I also noticed DeadMachineInstructionElim never considers inline asm
as dead, but GlobalISel will drop asm with no constraints.

											
										
										
											2020-07-29 21:48:26 +08:00
+								  case Intrinsic::localescape: {
 								    MachineBasicBlock &EntryMBB = MF->front();
 								    StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName());
 								    // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
 								    // is the same on all targets.
 								    for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) {
 								      Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts();
 								      if (isa<ConstantPointerNull>(Arg))
 								        continue; // Skip null pointers. They represent a hole in index space.
 								      int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg));
 								      MCSymbol *FrameAllocSym =
 								          MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName,
 								                                                                Idx);
 								      // This should be inserted at the start of the entry block.
 								      auto LocalEscape =
 								          MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE)
 								              .addSym(FrameAllocSym)
 								              .addFrameIndex(FI);
 								      EntryMBB.insert(EntryMBB.begin(), LocalEscape);
 								    }
-												[GlobalISel] Add translation support for vector reduction intrinsics.

In order to prevent the ExpandReductions pass from expanding some intrinsics
before they get to codegen, I had to add a -disable-expand-reductions flag
for testing purposes.

Differential Revision: https://reviews.llvm.org/D89028

											
										
										
											2020-10-08 15:17:02 +08:00
+								    return true;
 								  }
 								  case Intrinsic::vector_reduce_fadd:
 								  case Intrinsic::vector_reduce_fmul: {
 								    // Need to check for the reassoc flag to decide whether we want a
 								    // sequential reduction opcode or not.
 								    Register Dst = getOrCreateVReg(CI);
 								    Register ScalarSrc = getOrCreateVReg(*CI.getArgOperand(0));
 								    Register VecSrc = getOrCreateVReg(*CI.getArgOperand(1));
 								    unsigned Opc = 0;
 								    if (!CI.hasAllowReassoc()) {
 								      // The sequential ordering case.
 								      Opc = ID == Intrinsic::vector_reduce_fadd
 								                ? TargetOpcode::G_VECREDUCE_SEQ_FADD
 								                : TargetOpcode::G_VECREDUCE_SEQ_FMUL;
 								      MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc},
 								                            MachineInstr::copyFlagsFromInstruction(CI));
 								      return true;
 								    }
 								    // We split the operation into a separate G_FADD/G_FMUL + the reduce,
 								    // since the associativity doesn't matter.
 								    unsigned ScalarOpc;
 								    if (ID == Intrinsic::vector_reduce_fadd) {
 								      Opc = TargetOpcode::G_VECREDUCE_FADD;
 								      ScalarOpc = TargetOpcode::G_FADD;
 								    } else {
 								      Opc = TargetOpcode::G_VECREDUCE_FMUL;
 								      ScalarOpc = TargetOpcode::G_FMUL;
 								    }
 								    LLT DstTy = MRI->getType(Dst);
 								    auto Rdx = MIRBuilder.buildInstr(
 								        Opc, {DstTy}, {VecSrc}, MachineInstr::copyFlagsFromInstruction(CI));
 								    MIRBuilder.buildInstr(ScalarOpc, {Dst}, {ScalarSrc, Rdx},
 								                          MachineInstr::copyFlagsFromInstruction(CI));
-												GlobalISel: Handle llvm.localescape

This one is pretty easy and shrinks the list of unhandled
intrinsics. I'm not sure how relevant the insert point is. Using the
insert position of EntryBuilder will place this after
constants. SelectionDAG seems to end up emitting these after argument
copies and before anything else, but I don't think it really
matters. This also ends up emitting these in the opposite order from
SelectionDAG, but I don't think that matters either.

This also needs a fix to stop the later passes dropping this as a dead
instruction. DeadMachineInstructionElim's version of isDead special
cases LOCAL_ESCAPE for some reason, and I'm not sure why it's excluded
from MachineInstr::isLabel (or why isDead doesn't check it).

I also noticed DeadMachineInstructionElim never considers inline asm
as dead, but GlobalISel will drop asm with no constraints.

											
										
										
											2020-07-29 21:48:26 +08:00
+								    return true;
 								  }
-												GlobalISel: Start defining strict FP instructions

The AMDGPU lowering for unconstrained G_FDIV sometimes needs to
introduce a mode switch in the middle, so it's helpful to have
constrained instructions available to legalize this. Right now nothing
is preventing reordering of the mode switch with the other
instructions in the expansion.

											
										
										
											2020-05-31 23:58:56 +08:00
+								#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC)  \
 								  case Intrinsic::INTRINSIC:
 								#include "llvm/IR/ConstrainedOps.def"
 								    return translateConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(CI),
 								                                           MIRBuilder);
-												GlobalISel: support overflow arithmetic intrinsics.

Unsigned addition and subtraction can reuse the instructions created to
legalize large width operations (i.e. both produce and consume a carry flag).
Signed operations and multiplies get a dedicated op-with-overflow instruction.

Once this is produced the two values are combined into a struct register (which
will almost always be merged with a corresponding G_EXTRACT as part of
legalization).

llvm-svn: 279278

											
										
										
											2016-08-20 01:17:06 +08:00
+								  }
-												GlobalISel: factor overflow handling into separate function. NFC.

llvm-svn: 289149

											
										
										
											2016-12-09 06:44:00 +08:00
+								  return false;
-												GlobalISel: support overflow arithmetic intrinsics.

Unsigned addition and subtraction can reuse the instructions created to
legalize large width operations (i.e. both produce and consume a carry flag).
Signed operations and multiplies get a dedicated op-with-overflow instruction.

Once this is produced the two values are combined into a struct register (which
will almost always be merged with a corresponding G_EXTRACT as part of
legalization).

llvm-svn: 279278

											
										
										
											2016-08-20 01:17:06 +08:00
+								}
-												[GlobalISel] Introduce InlineAsmLowering class

Summary:
Similar to the CallLowering class used for lowering LLVM IR calls to MIR calls,
we introduce a separate class for lowering LLVM IR inline asm to MIR INLINEASM.

There is no functional change yet, all existing tests should pass.

Reviewers: arsenm, dsanders, aemerson, volkan, t.p.northover, paquette

Reviewed By: aemerson

Subscribers: gargaroff, wdng, mgorny, rovka, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78316

											
										
										
											2020-04-08 20:40:43 +08:00
+								bool IRTranslator::translateInlineAsm(const CallBase &CB,
-												GlobalISel: support trivial inlineasm calls.

They're used for nefarious purposes by ObjC.

llvm-svn: 297422

											
										
										
											2017-03-10 07:36:26 +08:00
+								                                      MachineIRBuilder &MIRBuilder) {
-												[GlobalISel] Introduce InlineAsmLowering class

Summary:
Similar to the CallLowering class used for lowering LLVM IR calls to MIR calls,
we introduce a separate class for lowering LLVM IR inline asm to MIR INLINEASM.

There is no functional change yet, all existing tests should pass.

Reviewers: arsenm, dsanders, aemerson, volkan, t.p.northover, paquette

Reviewed By: aemerson

Subscribers: gargaroff, wdng, mgorny, rovka, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78316

											
										
										
											2020-04-08 20:40:43 +08:00
+								  const InlineAsmLowering *ALI = MF->getSubtarget().getInlineAsmLowering();
-												[GlobalISel][IRTranslator] Add special case support for ~memory inline asm clobber.

This is a one off special case, since actually implementing full inline asm
support will be much more involved. This lets us compile a lot more code as a
common simple case.

Differential Revision: https://reviews.llvm.org/D74201

											
										
										
											2020-02-07 17:07:57 +08:00
-												[GlobalISel] Introduce InlineAsmLowering class

Summary:
Similar to the CallLowering class used for lowering LLVM IR calls to MIR calls,
we introduce a separate class for lowering LLVM IR inline asm to MIR INLINEASM.

There is no functional change yet, all existing tests should pass.

Reviewers: arsenm, dsanders, aemerson, volkan, t.p.northover, paquette

Reviewed By: aemerson

Subscribers: gargaroff, wdng, mgorny, rovka, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78316

											
										
										
											2020-04-08 20:40:43 +08:00
+								  if (!ALI) {
 								    LLVM_DEBUG(
 								        dbgs() << "Inline asm lowering is not supported for this target yet\n");
 								    return false;
 								  }
-												GlobalISel: support trivial inlineasm calls.

They're used for nefarious purposes by ObjC.

llvm-svn: 297422

											
										
										
											2017-03-10 07:36:26 +08:00
-												[GlobalISel][InlineAsm] Add support for basic output operand constraints

Reviewers: arsenm, dsanders, aemerson, volkan, t.p.northover, paquette

Reviewed By: arsenm

Subscribers: gargaroff, wdng, rovka, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78318

											
										
										
											2020-04-09 02:04:13 +08:00
+								  return ALI->lowerInlineAsm(
 								      MIRBuilder, CB, [&](const Value &Val) { return getOrCreateVRegs(Val); });
-												GlobalISel: support trivial inlineasm calls.

They're used for nefarious purposes by ObjC.

llvm-svn: 297422

											
										
										
											2017-03-10 07:36:26 +08:00
+								}
-												[CallSite removal][GlobalISel] Use CallBase instead of CallSite in lowerCall and translateCallBase.

Differential Revision: https://reviews.llvm.org/D78001

											
										
										
											2020-04-14 01:17:29 +08:00
+								bool IRTranslator::translateCallBase(const CallBase &CB,
-												GlobalISel: factor common code from translateCall and translateInvoke. NFC.

llvm-svn: 368166

											
										
										
											2019-08-07 20:43:53 +08:00
+								                                     MachineIRBuilder &MIRBuilder) {
-												[CallSite removal][GlobalISel] Use CallBase instead of CallSite in lowerCall and translateCallBase.

Differential Revision: https://reviews.llvm.org/D78001

											
										
										
											2020-04-14 01:17:29 +08:00
+								  ArrayRef<Register> Res = getOrCreateVRegs(CB);
-												GlobalISel: factor common code from translateCall and translateInvoke. NFC.

llvm-svn: 368166

											
										
										
											2019-08-07 20:43:53 +08:00
 								  SmallVector<ArrayRef<Register>, 8> Args;
 								  Register SwiftInVReg = 0;
 								  Register SwiftErrorVReg = 0;
-												[CallSite removal][GlobalISel] Use CallBase instead of CallSite in lowerCall and translateCallBase.

Differential Revision: https://reviews.llvm.org/D78001

											
										
										
											2020-04-14 01:17:29 +08:00
+								  for (auto &Arg : CB.args()) {
-												GlobalISel: factor common code from translateCall and translateInvoke. NFC.

llvm-svn: 368166

											
										
										
											2019-08-07 20:43:53 +08:00
+								    if (CLI->supportSwiftError() && isSwiftError(Arg)) {
 								      assert(SwiftInVReg == 0 && "Expected only one swift error argument");
 								      LLT Ty = getLLTForType(*Arg->getType(), *DL);
 								      SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
 								      MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
-												[CallSite removal][GlobalISel] Use CallBase instead of CallSite in lowerCall and translateCallBase.

Differential Revision: https://reviews.llvm.org/D78001

											
										
										
											2020-04-14 01:17:29 +08:00
+								                                            &CB, &MIRBuilder.getMBB(), Arg));
-												GlobalISel: factor common code from translateCall and translateInvoke. NFC.

llvm-svn: 368166

											
										
										
											2019-08-07 20:43:53 +08:00
+								      Args.emplace_back(makeArrayRef(SwiftInVReg));
 								      SwiftErrorVReg =
-												[CallSite removal][GlobalISel] Use CallBase instead of CallSite in lowerCall and translateCallBase.

Differential Revision: https://reviews.llvm.org/D78001

											
										
										
											2020-04-14 01:17:29 +08:00
+								          SwiftError.getOrCreateVRegDefAt(&CB, &MIRBuilder.getMBB(), Arg);
-												GlobalISel: factor common code from translateCall and translateInvoke. NFC.

llvm-svn: 368166

											
										
										
											2019-08-07 20:43:53 +08:00
+								      continue;
 								    }
 								    Args.push_back(getOrCreateVRegs(*Arg));
 								  }
-												[GlobalISel] Defer setting HasCalls on MachineFrameInfo to selection time.

We currently always set the HasCalls on MFI during translation and legalization if
we're handling a call or legalizing to a libcall. However, if that call is later
optimized to a tail call then we don't need the flag. The flag being set to true
causes frame lowering to always save and restore FP/LR, which adds unnecessary code.

This change does the same thing as SelectionDAG and ports over some code that scans
instructions after selection, using TargetInstrInfo to determine if target opcodes
are known calls.

Code size geomean improvements on CTMark:
 -O0 : 0.1%
 -Os : 0.3%

Differential Revision: https://reviews.llvm.org/D67868

llvm-svn: 372443

											
										
										
											2019-09-21 07:52:07 +08:00
+								  // We don't set HasCalls on MFI here yet because call lowering may decide to
 								  // optimize into tail calls. Instead, we defer that to selection where a final
 								  // scan is done to check if any instructions are calls.
-												GlobalISel: factor common code from translateCall and translateInvoke. NFC.

llvm-svn: 368166

											
										
										
											2019-08-07 20:43:53 +08:00
+								  bool Success =
-												[CallSite removal][GlobalISel] Use CallBase instead of CallSite in lowerCall and translateCallBase.

Differential Revision: https://reviews.llvm.org/D78001

											
										
										
											2020-04-14 01:17:29 +08:00
+								      CLI->lowerCall(MIRBuilder, CB, Res, Args, SwiftErrorVReg,
-												[IR] Replace all uses of CallBase::getCalledValue() with getCalledOperand().

This method has been commented as deprecated for a while. Remove
it and replace all uses with the equivalent getCalledOperand().

I also made a few cleanups in here. For example, to removes use
of getElementType on a pointer when we could just use getFunctionType
from the call.

Differential Revision: https://reviews.llvm.org/D78882

											
										
										
											2020-04-28 11:15:59 +08:00
+								                     [&]() { return getOrCreateVReg(*CB.getCalledOperand()); });
-												GlobalISel: factor common code from translateCall and translateInvoke. NFC.

llvm-svn: 368166

											
										
										
											2019-08-07 20:43:53 +08:00
-												[GlobalISel] When a tail call is emitted in a block, stop translating it

This fixes a crash in tail call translation caused by assume and lifetime_end
intrinsics.

It's possible to have instructions other than a return after a tail call which
will still have `Analysis::isInTailCallPosition` return true. (Namely,
lifetime_end and assume intrinsics.)

If we emit a tail call, we should stop translating instructions in the block.
Otherwise, we can end up emitting an extra return, or dead instructions in
general. This makes the verifier unhappy, and is generally unfortunate for
codegen.

This also removes the code from AArch64CallLowering that checks if we have a
tail call when lowering a return. This is covered by the new code now.

Also update call-translator-tail-call.ll to show that we now properly tail call
in the presence of lifetime_end and assume.

Differential Revision: https://reviews.llvm.org/D67415

llvm-svn: 371572

											
										
										
											2019-09-11 07:34:45 +08:00
+								  // Check if we just inserted a tail call.
 								  if (Success) {
 								    assert(!HasTailCall && "Can't tail call return twice from block?");
 								    const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
 								    HasTailCall = TII->isTailCall(*std::prev(MIRBuilder.getInsertPt()));
 								  }
-												GlobalISel: factor common code from translateCall and translateInvoke. NFC.

llvm-svn: 368166

											
										
										
											2019-08-07 20:43:53 +08:00
+								  return Success;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								  const CallInst &CI = cast<CallInst>(U);
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  auto TII = MF->getTarget().getIntrinsicInfo();
-												GlobalISel: implement simple function calls on AArch64.

We're still limited in the arguments we support, but this at least handles the
basic cases.

llvm-svn: 278293

											
										
										
											2016-08-11 05:44:01 +08:00
+								  const Function *F = CI.getCalledFunction();
-												[GlobalISel] Bail out on calls to dllimported functions

Differential Revision: https://reviews.llvm.org/D42568

llvm-svn: 323811

											
										
										
											2018-01-31 03:50:58 +08:00
+								  // FIXME: support Windows dllimport function calls.
-												[AArch64] [Windows] Use COFF stubs for calls to extern_weak functions

As the extern_weak target might be missing, resolving to the absolute
address zero, we can't use the normal direct PC-relative branch
instructions (as that would result in relocations out of range).

Improve the classifyGlobalFunctionReference method to set
MO_DLLIMPORT/MO_COFFSTUB, and simplify the existing code in
AArch64TargetLowering::LowerCall to use the return value from
classifyGlobalFunctionReference for these cases.

Add code in both AArch64FastISel and GlobalISel/IRTranslator to
bail out for function calls to extern weak functions on windows,
to let SelectionDAG handle them.

This matches what was done for X86 in 6bf108d77a3c.

Differential Revision: https://reviews.llvm.org/D71721

											
										
										
											2019-12-19 20:00:44 +08:00
+								  if (F && (F->hasDLLImportStorageClass() ||
 								            (MF->getTarget().getTargetTriple().isOSWindows() &&
 								             F->hasExternalWeakLinkage())))
-												[GlobalISel] Bail out on calls to dllimported functions

Differential Revision: https://reviews.llvm.org/D42568

llvm-svn: 323811

											
										
										
											2018-01-31 03:50:58 +08:00
+								    return false;
-												Add Windows Control Flow Guard checks (/guard:cf).

Summary:
A new function pass (Transforms/CFGuard/CFGuard.cpp) inserts CFGuard checks on
indirect function calls, using either the check mechanism (X86, ARM, AArch64) or
or the dispatch mechanism (X86-64). The check mechanism requires a new calling
convention for the supported targets. The dispatch mechanism adds the target as
an operand bundle, which is processed by SelectionDAG. Another pass
(CodeGen/CFGuardLongjmp.cpp) identifies and emits valid longjmp targets, as
required by /guard:cf. This feature is enabled using the `cfguard` CC1 option.

Reviewers: thakis, rnk, theraven, pcc

Subscribers: ychen, hans, metalcanine, dmajor, tomrittervg, alex, mehdi_amini, mgorny, javed.absar, kristof.beyls, hiraditya, steven_wu, dexonsmith, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D65761

											
										
										
											2019-10-28 21:22:19 +08:00
+								  // FIXME: support control flow guard targets.
 								  if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
 								    return false;
-												AArch64: fall back to DAG ISel for inline assembly.

We can't currently handle "calls" to inlineasm strings so it's better to let
the DAG handle it than generate rubbish.

llvm-svn: 292540

											
										
										
											2017-01-20 07:59:35 +08:00
+								  if (CI.isInlineAsm())
-												GlobalISel: support trivial inlineasm calls.

They're used for nefarious purposes by ObjC.

llvm-svn: 297422

											
										
										
											2017-03-10 07:36:26 +08:00
+								    return translateInlineAsm(CI, MIRBuilder);
-												AArch64: fall back to DAG ISel for inline assembly.

We can't currently handle "calls" to inlineasm strings so it's better to let
the DAG handle it than generate rubbish.

llvm-svn: 292540

											
										
										
											2017-01-20 07:59:35 +08:00
-												[AArch64][GlobalISel] Fix assert fail with unknown intrinsic.

A call may have an intrinsic name but not have a valid intrinsic ID,
for example with llvm.invariant.group.barrier. If so, treat it as a
normal call like FastISel does.

llvm-svn: 321662

											
										
										
											2018-01-03 02:56:39 +08:00
+								  Intrinsic::ID ID = Intrinsic::not_intrinsic;
 								  if (F && F->isIntrinsic()) {
 								    ID = F->getIntrinsicID();
 								    if (TII && ID == Intrinsic::not_intrinsic)
 								      ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
 								  }
-												GlobalISel: factor common code from translateCall and translateInvoke. NFC.

llvm-svn: 368166

											
										
										
											2019-08-07 20:43:53 +08:00
+								  if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic)
-												[CallSite removal][GlobalISel] Use CallBase instead of CallSite in lowerCall and translateCallBase.

Differential Revision: https://reviews.llvm.org/D78001

											
										
										
											2020-04-14 01:17:29 +08:00
+								    return translateCallBase(CI, MIRBuilder);
-												GlobalISel: implement simple function calls on AArch64.

We're still limited in the arguments we support, but this at least handles the
basic cases.

llvm-svn: 278293

											
										
										
											2016-08-11 05:44:01 +08:00
 								  assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
-												GlobalISel: support translation of intrinsic calls.

These come in two variants for now: G_INTRINSIC and G_INTRINSIC_W_SIDE_EFFECTS.
We may decide to split the latter up with finer-grained restrictions later, if
necessary.

llvm-svn: 277224

											
										
										
											2016-07-30 06:32:36 +08:00
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								  if (translateKnownIntrinsic(CI, ID, MIRBuilder))
-												GlobalISel: support overflow arithmetic intrinsics.

Unsigned addition and subtraction can reuse the instructions created to
legalize large width operations (i.e. both produce and consume a carry flag).
Signed operations and multiplies get a dedicated op-with-overflow instruction.

Once this is produced the two values are combined into a struct register (which
will almost always be merged with a corresponding G_EXTRACT as part of
legalization).

llvm-svn: 279278

											
										
										
											2016-08-20 01:17:06 +08:00
+								    return true;
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								  ArrayRef<Register> ResultRegs;
-												GlobalISel: Use multiple returns for intrinsic structs

This is consistent with what SelectionDAG does and is much easier to
work with than the extract sequence with an artificial wide register.

For the AMDGPU control flow intrinsics, this was producing an s128 for
the i64, i1 tuple return. Any legalization that should apply to a real
s128 value would badly obscure the direct values that need to be seen.

llvm-svn: 356147

											
										
										
											2019-03-14 22:18:56 +08:00
+								  if (!CI.getType()->isVoidTy())
 								    ResultRegs = getOrCreateVRegs(CI);
-												GlobalISel: Ignore callsite attributes when picking intrinsic type

A target intrinsic may be defined as possibly reading memory, but the
call site may have additional knowledge that it doesn't read
memory. The intrinsic lowering will expect the pessimistic assumption
of the intrinsic definition, so the chain should still be used.

I fixed the same bug in SelectionDAG in r287593.

llvm-svn: 363580

											
										
										
											2019-06-18 01:01:35 +08:00
+								  // Ignore the callsite attributes. Backend code is most likely not expecting
 								  // an intrinsic to sometimes have side effects and sometimes not.
-												GlobalISel: support translation of intrinsic calls.

These come in two variants for now: G_INTRINSIC and G_INTRINSIC_W_SIDE_EFFECTS.
We may decide to split the latter up with finer-grained restrictions later, if
necessary.

llvm-svn: 277224

											
										
										
											2016-07-30 06:32:36 +08:00
+								  MachineInstrBuilder MIB =
-												GlobalISel: Ignore callsite attributes when picking intrinsic type

A target intrinsic may be defined as possibly reading memory, but the
call site may have additional knowledge that it doesn't read
memory. The intrinsic lowering will expect the pessimistic assumption
of the intrinsic definition, so the chain should still be used.

I fixed the same bug in SelectionDAG in r287593.

llvm-svn: 363580

											
										
										
											2019-06-18 01:01:35 +08:00
+								      MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory());
-												[NFC] FMF propagation for GlobalIsel

llvm-svn: 358702

											
										
										
											2019-04-19 02:48:57 +08:00
+								  if (isa<FPMathOperator>(CI))
 								    MIB->copyIRFlags(CI);
-												GlobalISel: support translation of intrinsic calls.

These come in two variants for now: G_INTRINSIC and G_INTRINSIC_W_SIDE_EFFECTS.
We may decide to split the latter up with finer-grained restrictions later, if
necessary.

llvm-svn: 277224

											
										
										
											2016-07-30 06:32:36 +08:00
-												Reapply r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"

This reverts r372314, reapplying r372285 and the commits which depend
on it (r372286-r372293, and r372296-r372297)

This was missing one switch to getTargetConstant in an untested case.

llvm-svn: 372338

											
										
										
											2019-09-20 00:26:14 +08:00
+								  for (auto &Arg : enumerate(CI.arg_operands())) {
 								    // If this is required to be an immediate, don't materialize it in a
 								    // register.
 								    if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
 								      if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
 								        // imm arguments are more convenient than cimm (and realistically
 								        // probably sufficient), so use them.
 								        assert(CI->getBitWidth() <= 64 &&
 								               "large intrinsic immediates not handled");
 								        MIB.addImm(CI->getSExtValue());
 								      } else {
 								        MIB.addFPImm(cast<ConstantFP>(Arg.value()));
 								      }
-												GlobalISel: Don't fail translate on intrinsics with metadata

											
										
										
											2020-07-22 07:29:37 +08:00
+								    } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) {
 								      auto *MDN = dyn_cast<MDNode>(MD->getMetadata());
 								      if (!MDN) // This was probably an MDString.
 								        return false;
 								      MIB.addMetadata(MDN);
-												Reapply r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"

This reverts r372314, reapplying r372285 and the commits which depend
on it (r372286-r372293, and r372296-r372297)

This was missing one switch to getTargetConstant in an untested case.

llvm-svn: 372338

											
										
										
											2019-09-20 00:26:14 +08:00
+								    } else {
 								      ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
 								      if (VRegs.size() > 1)
 								        return false;
 								      MIB.addUse(VRegs[0]);
 								    }
-												GlobalISel: support translation of intrinsic calls.

These come in two variants for now: G_INTRINSIC and G_INTRINSIC_W_SIDE_EFFECTS.
We may decide to split the latter up with finer-grained restrictions later, if
necessary.

llvm-svn: 277224

											
										
										
											2016-07-30 06:32:36 +08:00
+								  }
-												[GlobalISel] IRTranslator: Add MachineMemOperand to target memory intrinsics

Reviewers: qcolombet, ab, t.p.northover, aditya_nandakumar, dsanders

Reviewed By: qcolombet

Subscribers: rovka, kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D33724

llvm-svn: 304743

											
										
										
											2017-06-06 06:17:17 +08:00
 								  // Add a MachineMemOperand if it is a target mem intrinsic.
 								  const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
 								  TargetLowering::IntrinsicInfo Info;
 								  // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
-												TLI: Allow using PSV for intrinsic mem operands

llvm-svn: 320756

											
										
										
											2017-12-15 06:34:10 +08:00
+								  if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
-												[Alignment][NFC] Use Align version of getMachineMemOperand

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: jyknight, sdardis, nemanjai, hiraditya, kbarton, fedor.sergeev, asb, rbar, johnrusso, simoncook, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, jfb, PkmX, jocewei, Jim, lenary, s.egerton, pzheng, sameer.abuasal, apazos, luismarques, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77059

											
										
										
											2020-03-30 22:45:57 +08:00
+								    Align Alignment = Info.align.getValueOr(
 								        DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
-												GlobalISel: Fix creating MMOs with align 0

llvm-svn: 352712

											
										
										
											2019-01-31 09:38:47 +08:00
-												GlobalISel: Fix MMO creation with non-power-of-2 mem size

It should probably just be mandatory for getTgtMemIntrinsic to return
the alignment.

llvm-svn: 352817

											
										
										
											2019-02-01 07:41:23 +08:00
+								    uint64_t Size = Info.memVT.getStoreSize();
-												[Alignment][NFC] Use Align version of getMachineMemOperand

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: jyknight, sdardis, nemanjai, hiraditya, kbarton, fedor.sergeev, asb, rbar, johnrusso, simoncook, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, jfb, PkmX, jocewei, Jim, lenary, s.egerton, pzheng, sameer.abuasal, apazos, luismarques, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77059

											
										
										
											2020-03-30 22:45:57 +08:00
+								    MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
 								                                               Info.flags, Size, Alignment));
-												[GlobalISel] IRTranslator: Add MachineMemOperand to target memory intrinsics

Reviewers: qcolombet, ab, t.p.northover, aditya_nandakumar, dsanders

Reviewed By: qcolombet

Subscribers: rovka, kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D33724

llvm-svn: 304743

											
										
										
											2017-06-06 06:17:17 +08:00
+								  }
-												GlobalISel: support translation of intrinsic calls.

These come in two variants for now: G_INTRINSIC and G_INTRINSIC_W_SIDE_EFFECTS.
We may decide to split the latter up with finer-grained restrictions later, if
necessary.

llvm-svn: 277224

											
										
										
											2016-07-30 06:32:36 +08:00
+								  return true;
 								}
-												[GlobalISel][IRTranslator] Ensure branch probabilities are added when translating invoke edges.

This uses a straightforward port of findUnwindDestinations() from SelectionDAG.

Differential Revision: https://reviews.llvm.org/D93256

											
										
										
											2020-12-15 07:25:35 +08:00
+								bool IRTranslator::findUnwindDestinations(
 								    const BasicBlock *EHPadBB,
 								    BranchProbability Prob,
 								    SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
 								        &UnwindDests) {
 								  EHPersonality Personality = classifyEHPersonality(
 								      EHPadBB->getParent()->getFunction().getPersonalityFn());
 								  bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
 								  bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
 								  bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
 								  bool IsSEH = isAsynchronousEHPersonality(Personality);
 								  if (IsWasmCXX) {
 								    // Ignore this for now.
 								    return false;
 								  }
 								  while (EHPadBB) {
 								    const Instruction *Pad = EHPadBB->getFirstNonPHI();
 								    BasicBlock *NewEHPadBB = nullptr;
 								    if (isa<LandingPadInst>(Pad)) {
 								      // Stop on landingpads. They are not funclets.
 								      UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
 								      break;
 								    }
 								    if (isa<CleanupPadInst>(Pad)) {
 								      // Stop on cleanup pads. Cleanups are always funclet entries for all known
 								      // personalities.
 								      UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
 								      UnwindDests.back().first->setIsEHScopeEntry();
 								      UnwindDests.back().first->setIsEHFuncletEntry();
 								      break;
 								    }
 								    if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
 								      // Add the catchpad handlers to the possible destinations.
 								      for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
 								        UnwindDests.emplace_back(&getMBB(*CatchPadBB), Prob);
 								        // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
 								        if (IsMSVCCXX || IsCoreCLR)
 								          UnwindDests.back().first->setIsEHFuncletEntry();
 								        if (!IsSEH)
 								          UnwindDests.back().first->setIsEHScopeEntry();
 								      }
 								      NewEHPadBB = CatchSwitch->getUnwindDest();
 								    } else {
 								      continue;
 								    }
 								    BranchProbabilityInfo *BPI = FuncInfo.BPI;
 								    if (BPI && NewEHPadBB)
 								      Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
 								    EHPadBB = NewEHPadBB;
 								  }
 								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateInvoke(const User &U,
 								                                   MachineIRBuilder &MIRBuilder) {
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  const InvokeInst &I = cast<InvokeInst>(U);
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  MCContext &Context = MF->getContext();
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
 								  const BasicBlock *ReturnBB = I.getSuccessor(0);
 								  const BasicBlock *EHPadBB = I.getSuccessor(1);
-												[IR] Replace all uses of CallBase::getCalledValue() with getCalledOperand().

This method has been commented as deprecated for a while. Remove
it and replace all uses with the equivalent getCalledOperand().

I also made a few cleanups in here. For example, to removes use
of getElementType on a pointer when we could just use getFunctionType
from the call.

Differential Revision: https://reviews.llvm.org/D78882

											
										
										
											2020-04-28 11:15:59 +08:00
+								  const Function *Fn = I.getCalledFunction();
 								  if (I.isInlineAsm())
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								    return false;
 								  // FIXME: support invoking patchpoint and statepoint intrinsics.
 								  if (Fn && Fn->isIntrinsic())
 								    return false;
 								  // FIXME: support whatever these are.
 								  if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
 								    return false;
-												Add Windows Control Flow Guard checks (/guard:cf).

Summary:
A new function pass (Transforms/CFGuard/CFGuard.cpp) inserts CFGuard checks on
indirect function calls, using either the check mechanism (X86, ARM, AArch64) or
or the dispatch mechanism (X86-64). The check mechanism requires a new calling
convention for the supported targets. The dispatch mechanism adds the target as
an operand bundle, which is processed by SelectionDAG. Another pass
(CodeGen/CFGuardLongjmp.cpp) identifies and emits valid longjmp targets, as
required by /guard:cf. This feature is enabled using the `cfguard` CC1 option.

Reviewers: thakis, rnk, theraven, pcc

Subscribers: ychen, hans, metalcanine, dmajor, tomrittervg, alex, mehdi_amini, mgorny, javed.absar, kristof.beyls, hiraditya, steven_wu, dexonsmith, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D65761

											
										
										
											2019-10-28 21:22:19 +08:00
+								  // FIXME: support control flow guard targets.
 								  if (I.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
 								    return false;
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  // FIXME: support Windows exception handling.
-												[GlobalISel][IRTranslator] Support PHI instructions in landingpad blocks

The check for the landingpad instructions was overly restrictive. In optimimized builds PHI nodes can appear
before the landingpad instructions, resulting in a fallback to SelectionDAG.

This change relaxes the check to allow PHI nodes.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D86141

											
										
										
											2020-08-18 16:32:44 +08:00
+								  if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								    return false;
-												Move most EH from MachineModuleInfo to MachineFunction

Recommitting r288293 with some extra fixes for GlobalISel code.

Most of the exception handling members in MachineModuleInfo is actually
per function data (talks about the "current function") so it is better
to keep it at the function instead of the module.

This is a necessary step to have machine module passes work properly.

Also:
- Rename TidyLandingPads() to tidyLandingPads()
- Use doxygen member groups instead of "//===- EH ---"... so it is clear
  where a group ends.
- I had to add an ugly const_cast at two places in the AsmPrinter
  because the available MachineFunction pointers are const, but the code
  wants to call tidyLandingPads() in between
  (markFunctionEnd()/endFunction()).

Differential Revision: https://reviews.llvm.org/D27227

llvm-svn: 288405

											
										
										
											2016-12-02 03:32:15 +08:00
+								  // Emit the actual call, bracketed by EH_LABELs so that the MF knows about
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  // the region covered by the try.
-												Move most EH from MachineModuleInfo to MachineFunction

Recommitting r288293 with some extra fixes for GlobalISel code.

Most of the exception handling members in MachineModuleInfo is actually
per function data (talks about the "current function") so it is better
to keep it at the function instead of the module.

This is a necessary step to have machine module passes work properly.

Also:
- Rename TidyLandingPads() to tidyLandingPads()
- Use doxygen member groups instead of "//===- EH ---"... so it is clear
  where a group ends.
- I had to add an ugly const_cast at two places in the AsmPrinter
  because the available MachineFunction pointers are const, but the code
  wants to call tidyLandingPads() in between
  (markFunctionEnd()/endFunction()).

Differential Revision: https://reviews.llvm.org/D27227

llvm-svn: 288405

											
										
										
											2016-12-02 03:32:15 +08:00
+								  MCSymbol *BeginSymbol = Context.createTempSymbol();
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
-												[CallSite removal][GlobalISel] Use CallBase instead of CallSite in lowerCall and translateCallBase.

Differential Revision: https://reviews.llvm.org/D78001

											
										
										
											2020-04-14 01:17:29 +08:00
+								  if (!translateCallBase(I, MIRBuilder))
-												[GlobalISel] Fallback when failing to translate invoke.

We unintentionally stopped falling back in r293670.

While there, change an unusual construct.

llvm-svn: 297425

											
										
										
											2017-03-10 08:25:35 +08:00
+								    return false;
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
-												Move most EH from MachineModuleInfo to MachineFunction

Recommitting r288293 with some extra fixes for GlobalISel code.

Most of the exception handling members in MachineModuleInfo is actually
per function data (talks about the "current function") so it is better
to keep it at the function instead of the module.

This is a necessary step to have machine module passes work properly.

Also:
- Rename TidyLandingPads() to tidyLandingPads()
- Use doxygen member groups instead of "//===- EH ---"... so it is clear
  where a group ends.
- I had to add an ugly const_cast at two places in the AsmPrinter
  because the available MachineFunction pointers are const, but the code
  wants to call tidyLandingPads() in between
  (markFunctionEnd()/endFunction()).

Differential Revision: https://reviews.llvm.org/D27227

llvm-svn: 288405

											
										
										
											2016-12-02 03:32:15 +08:00
+								  MCSymbol *EndSymbol = Context.createTempSymbol();
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
-												[GlobalISel][IRTranslator] Ensure branch probabilities are added when translating invoke edges.

This uses a straightforward port of findUnwindDestinations() from SelectionDAG.

Differential Revision: https://reviews.llvm.org/D93256

											
										
										
											2020-12-15 07:25:35 +08:00
+								  SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
 								  BranchProbabilityInfo *BPI = FuncInfo.BPI;
 								  MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB();
 								  BranchProbability EHPadBBProb =
 								      BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
 								          : BranchProbability::getZero();
 								  if (!findUnwindDestinations(EHPadBB, EHPadBBProb, UnwindDests))
 								    return false;
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								  MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB),
 								                    &ReturnMBB = getMBB(*ReturnBB);
-												[GlobalISel][IRTranslator] Ensure branch probabilities are added when translating invoke edges.

This uses a straightforward port of findUnwindDestinations() from SelectionDAG.

Differential Revision: https://reviews.llvm.org/D93256

											
										
										
											2020-12-15 07:25:35 +08:00
+								  // Update successor info.
 								  addSuccessorWithProb(InvokeMBB, &ReturnMBB);
 								  for (auto &UnwindDest : UnwindDests) {
 								    UnwindDest.first->setIsEHPad();
 								    addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
 								  }
 								  InvokeMBB->normalizeSuccProbs();
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
-												GlobalISel: the translation of an invoke must branch to the good block.

Otherwise bad things happen if the basic block order isn't trivial after an
invoke.

llvm-svn: 293679

											
										
										
											2017-02-01 04:12:18 +08:00
+								  MIRBuilder.buildBr(ReturnMBB);
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  return true;
 								}
-												Implementation of asm-goto support in LLVM

This patch accompanies the RFC posted here:
http://lists.llvm.org/pipermail/llvm-dev/2018-October/127239.html

This patch adds a new CallBr IR instruction to support asm-goto
inline assembly like gcc as used by the linux kernel. This
instruction is both a call instruction and a terminator
instruction with multiple successors. Only inline assembly
usage is supported today.

This also adds a new INLINEASM_BR opcode to SelectionDAG and
MachineIR to represent an INLINEASM block that is also
considered a terminator instruction.

There will likely be more bug fixes and optimizations to follow
this, but we felt it had reached a point where we would like to
switch to an incremental development model.

Patch by Craig Topper, Alexander Ivchenko, Mikhail Dvoretckii

Differential Revision: https://reviews.llvm.org/D53765

llvm-svn: 353563

											
										
										
											2019-02-09 04:48:56 +08:00
+								bool IRTranslator::translateCallBr(const User &U,
 								                                   MachineIRBuilder &MIRBuilder) {
 								  // FIXME: Implement this.
 								  return false;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translateLandingPad(const User &U,
 								                                       MachineIRBuilder &MIRBuilder) {
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  const LandingPadInst &LP = cast<LandingPadInst>(U);
 								  MachineBasicBlock &MBB = MIRBuilder.getMBB();
 								  MBB.setIsEHPad();
 								  // If there aren't registers to copy the values into (e.g., during SjLj
 								  // exceptions), then don't bother.
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  auto &TLI = *MF->getSubtarget().getTargetLowering();
-												MachineFunction: Return reference from getFunction(); NFC

The Function can never be nullptr so we can return a reference.

llvm-svn: 320884

											
										
										
											2017-12-16 06:22:58 +08:00
+								  const Constant *PersonalityFn = MF->getFunction().getPersonalityFn();
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
 								      TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
 								    return true;
 								  // If landingpad's return type is token type, we don't create DAG nodes
 								  // for its exception pointer and selector value. The extraction of exception
 								  // pointer or selector value from token type landingpads is not currently
 								  // supported.
 								  if (LP.getType()->isTokenTy())
 								    return true;
 								  // Add a label to mark the beginning of the landing pad.  Deletion of the
 								  // landing pad can thus be detected via the MachineModuleInfo.
 								  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								    .addSym(MF->addLandingPad(&MBB));
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
-												[AArch64][SVE] Preserve full vector regs over EH edge.

Unwinders may only preserve the lower 64bits of Neon and SVE registers,
as only the registers in the base ABI are guaranteed to be preserved
over the exception edge. The caller will need to preserve additional
registers for when the call throws an exception and the unwinder has
tried to recover state.

For  e.g.

    svint32_t bar(svint32_t);
    svint32_t foo(svint32_t x, bool *err) {
      try { bar(x); } catch (...) { *err = true; }
      return x;
    }

`z0` needs to be spilled before the call to `bar(x)` and reloaded before
returning from foo, as the exception handler may have clobbered z0.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D84737

											
										
										
											2020-09-02 17:12:27 +08:00
+								  // If the unwinder does not preserve all registers, ensure that the
 								  // function marks the clobbered registers as used.
 								  const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
 								  if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
 								    MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
-												Fix additional constructor call missed by r297241.

It was added between my build+test and my commit.

llvm-svn: 297244

											
										
										
											2017-03-08 07:32:10 +08:00
+								  LLT Ty = getLLTForType(*LP.getType(), *DL);
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register Undef = MRI->createGenericVirtualRegister(Ty);
-												GlobalISel: use inserts for landingpad instead of sequences.

llvm-svn: 297237

											
										
										
											2017-03-08 07:04:06 +08:00
+								  MIRBuilder.buildUndef(Undef);
-												GlobalISel: Use the correct types when translating landingpad instructions

There was a bug here where we were using p0 instead of s32 for the
selector type in the landingpad. Instead of hardcoding these types we
should get the types from the landingpad instruction directly.

Note that we replicate an assert from SDAG here to only support
two-valued landingpads.

llvm-svn: 292995

											
										
										
											2017-01-25 08:16:53 +08:00
+								  SmallVector<LLT, 2> Tys;
 								  for (Type *Ty : cast<StructType>(LP.getType())->elements())
-												Recommit: [globalisel] Change LLT constructor string into an LLT-based object that knows how to generate it.

Summary:
This will allow future patches to inspect the details of the LLT. The implementation is now split between
the Support and CodeGen libraries to allow TableGen to use this class without introducing layering concerns.

Thanks to Ahmed Bougacha for finding a reasonable way to avoid the layering issue and providing the version of this patch without that problem.

The problem with the previous commit appears to have been that TableGen was including CodeGen/LowLevelType.h instead of Support/LowLevelTypeImpl.h.

Reviewers: t.p.northover, qcolombet, rovka, aditya_nandakumar, ab, javed.absar

Subscribers: arsenm, nhaehnle, mgorny, dberris, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30046

llvm-svn: 297241

											
										
										
											2017-03-08 07:20:35 +08:00
+								    Tys.push_back(getLLTForType(*Ty, *DL));
-												GlobalISel: Use the correct types when translating landingpad instructions

There was a bug here where we were using p0 instead of s32 for the
selector type in the landingpad. Instead of hardcoding these types we
should get the types from the landingpad instruction directly.

Note that we replicate an assert from SDAG here to only support
two-valued landingpads.

llvm-svn: 292995

											
										
										
											2017-01-25 08:16:53 +08:00
+								  assert(Tys.size() == 2 && "Only two-valued landingpads are supported");
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
+								  // Mark exception register as live in.
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
-												GlobalISel: use inserts for landingpad instead of sequences.

llvm-svn: 297237

											
										
										
											2017-03-08 07:04:06 +08:00
+								  if (!ExceptionReg)
 								    return false;
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
-												GlobalISel: use inserts for landingpad instead of sequences.

llvm-svn: 297237

											
										
										
											2017-03-08 07:04:06 +08:00
+								  MBB.addLiveIn(ExceptionReg);
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								  ArrayRef<Register> ResRegs = getOrCreateVRegs(LP);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);
-												GlobalISel: use inserts for landingpad instead of sequences.

llvm-svn: 297237

											
										
										
											2017-03-08 07:04:06 +08:00
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
-												GlobalISel: use inserts for landingpad instead of sequences.

llvm-svn: 297237

											
										
										
											2017-03-08 07:04:06 +08:00
+								  if (!SelectorReg)
 								    return false;
 								  MBB.addLiveIn(SelectorReg);
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
-												GlobalISel: use inserts for landingpad instead of sequences.

llvm-svn: 297237

											
										
										
											2017-03-08 07:04:06 +08:00
+								  MIRBuilder.buildCopy(PtrVReg, SelectorReg);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  MIRBuilder.buildCast(ResRegs[1], PtrVReg);
-												GlobalISel: translate invoke and landingpad instructions

Pretty bare-bones support for exception handling (no weird MSVC stuff, no SjLj
etc), but it should get things going.

llvm-svn: 286407

											
										
										
											2016-11-10 06:39:54 +08:00
 								  return true;
 								}
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								bool IRTranslator::translateAlloca(const User &U,
 								                                   MachineIRBuilder &MIRBuilder) {
 								  auto &AI = cast<AllocaInst>(U);
-												[GlobalISel] Fall back to SDISel for swifterror/swiftself attributes.

We don't currently support these, fall back until we do.

llvm-svn: 337994

											
										
										
											2018-07-26 09:25:58 +08:00
+								  if (AI.isSwiftError())
-												GlobalISel: support swifterror attribute on AArch64.

swifterror marks an argument as a register pretending to be a pointer, so we
need a guaranteed mem2reg-like analysis of its uses. Fortunately most of the
infrastructure can be reused from the DAG world.

llvm-svn: 361608

											
										
										
											2019-05-24 16:40:13 +08:00
+								    return true;
-												[GlobalISel] Fall back to SDISel for swifterror/swiftself attributes.

We don't currently support these, fall back until we do.

llvm-svn: 337994

											
										
										
											2018-07-26 09:25:58 +08:00
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								  if (AI.isStaticAlloca()) {
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								    Register Res = getOrCreateVReg(AI);
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								    int FI = getOrCreateFrameIndex(AI);
 								    MIRBuilder.buildFrameIndex(Res, FI);
 								    return true;
 								  }
-												[AArch64] Implement dynamic stack probing for windows

This makes sure that alloca() function calls properly probe the
stack as needed.

Differential Revision: https://reviews.llvm.org/D42356

llvm-svn: 325433

											
										
										
											2018-02-17 22:26:32 +08:00
+								  // FIXME: support stack probing for Windows.
 								  if (MF->getTarget().getTargetTriple().isOSWindows())
 								    return false;
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								  // Now we're in the harder dynamic case.
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register NumElts = getOrCreateVReg(*AI.getArraySize());
-												[GlobalISel] Avoid translating synthetic constants to new G_CONSTANTS.

Currently, we create a G_CONSTANT for every "synthetic" integer
constant operand (for instance, for the G_GEP offset).
Instead, share the G_CONSTANTs we might have created by going through
the ValueToVReg machinery.

When we're emitting synthetic constants, we do need to get Constants from
the context.  One could argue that we shouldn't modify the context at
all (for instance, this means that we're going to use a tad more memory
if the constant wasn't used elsewhere), but constants are mostly
harmless.  We currently do this for extractvalue and all.

For constant fcmp, this does mean we'll emit an extra COPY, which is not
necessarily more optimal than an extra materialized constant.
But that preserves the current intended design of uniqued G_CONSTANTs,
and the rematerialization problem exists elsewhere and should be
resolved with a single coherent solution.

llvm-svn: 297875

											
										
										
											2017-03-16 03:21:11 +08:00
+								  Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
 								  LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL);
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								  if (MRI->getType(NumElts) != IntPtrTy) {
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								    Register ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								    MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts);
 								    NumElts = ExtElts;
 								  }
-												[NFC] Preparatory work for D77292

											
										
										
											2020-04-02 17:30:33 +08:00
+								  Type *Ty = AI.getAllocatedType();
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
 								  Register TySize =
-												[GlobalISel] Replace hard coded dynamic alloca handling with G_DYN_STACKALLOC.

This change moves the actual stack pointer manipulation into the legalizer,
available to targets via lower(). The codegen is slightly different because
we're using explicit masks instead of G_PTRMASK, and using G_SUB rather than
adding a negative amount via G_GEP.

Differential Revision: https://reviews.llvm.org/D66678

llvm-svn: 370104

											
										
										
											2019-08-28 03:54:27 +08:00
+								      getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty)));
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								  MIRBuilder.buildMul(AllocSize, NumElts, TySize);
-												[GlobalISel] Replace hard coded dynamic alloca handling with G_DYN_STACKALLOC.

This change moves the actual stack pointer manipulation into the legalizer,
available to targets via lower(). The codegen is slightly different because
we're using explicit masks instead of G_PTRMASK, and using G_SUB rather than
adding a negative amount via G_GEP.

Differential Revision: https://reviews.llvm.org/D66678

llvm-svn: 370104

											
										
										
											2019-08-28 03:54:27 +08:00
+								  // Round the size of the allocation up to the stack alignment size
 								  // by add SA-1 to the size. This doesn't overflow because we're computing
 								  // an address inside an alloca.
-												[Alignment][NFC] Convert MachineIRBuilder::buildDynStackAlloc to Align

Summary:
The change in IRTranslator is not trivial but is NFC as far as I can tell.

This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77292

											
										
										
											2020-04-02 17:15:06 +08:00
+								  Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
 								  auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
-												[GlobalISel] Replace hard coded dynamic alloca handling with G_DYN_STACKALLOC.

This change moves the actual stack pointer manipulation into the legalizer,
available to targets via lower(). The codegen is slightly different because
we're using explicit masks instead of G_PTRMASK, and using G_SUB rather than
adding a negative amount via G_GEP.

Differential Revision: https://reviews.llvm.org/D66678

llvm-svn: 370104

											
										
										
											2019-08-28 03:54:27 +08:00
+								  auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
 								                                      MachineInstr::NoUWrap);
 								  auto AlignCst =
-												[Alignment][NFC] Convert MachineIRBuilder::buildDynStackAlloc to Align

Summary:
The change in IRTranslator is not trivial but is NFC as far as I can tell.

This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77292

											
										
										
											2020-04-02 17:15:06 +08:00
+								      MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
-												[GlobalISel] Replace hard coded dynamic alloca handling with G_DYN_STACKALLOC.

This change moves the actual stack pointer manipulation into the legalizer,
available to targets via lower(). The codegen is slightly different because
we're using explicit masks instead of G_PTRMASK, and using G_SUB rather than
adding a negative amount via G_GEP.

Differential Revision: https://reviews.llvm.org/D66678

llvm-svn: 370104

											
										
										
											2019-08-28 03:54:27 +08:00
+								  auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
-												AllocaInst should store Align instead of MaybeAlign.

Along the lines of D77454 and D79968.  Unlike loads and stores, the
default alignment is getPrefTypeAlign, to match the existing handling in
various places, including SelectionDAG and InstCombine.

Differential Revision: https://reviews.llvm.org/D80044

											
										
										
											2020-05-16 04:23:14 +08:00
+								  Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty));
-												[Alignment][NFC] Convert MachineIRBuilder::buildDynStackAlloc to Align

Summary:
The change in IRTranslator is not trivial but is NFC as far as I can tell.

This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77292

											
										
										
											2020-04-02 17:15:06 +08:00
+								  if (Alignment <= StackAlign)
 								    Alignment = Align(1);
 								  MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
-												[Alignment][NFC] Convert MachineIRBuilder::buildDynStackAlloc to Align

Summary:
The change in IRTranslator is not trivial but is NFC as far as I can tell.

This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77292

											
										
										
											2020-04-02 17:15:06 +08:00
+								  MF->getFrameInfo().CreateVariableSizedObject(Alignment, &AI);
-												GlobalISel: translate dynamic alloca instructions.

llvm-svn: 294022

											
										
										
											2017-02-04 02:22:45 +08:00
+								  assert(MF->getFrameInfo().hasVarSizedObjects());
-												GlobalISel: implement alloca instruction

llvm-svn: 276433

											
										
										
											2016-07-23 00:59:52 +08:00
+								  return true;
 								}
-												GlobalISel: support translating va_arg

Since (say) i128 and [16 x i8] map to the same type in generic MIR, we also
need to attach the required alignment info.

llvm-svn: 295254

											
										
										
											2017-02-16 07:22:33 +08:00
+								bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
 								  // FIXME: We may need more info about the type. Because of how LLT works,
 								  // we're completely discarding the i64/double distinction here (amongst
 								  // others). Fortunately the ABIs I know of where that matters don't use va_arg
 								  // anyway but that's not guaranteed.
-												[GlobalISel] Use more MachineIRBuilder helper methods

Reviewers: arsenm, nhaehnle

Subscribers: wdng, rovka, hiraditya, volkan, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D72833

											
										
										
											2020-01-16 20:09:48 +08:00
+								  MIRBuilder.buildInstr(TargetOpcode::G_VAARG, {getOrCreateVReg(U)},
 								                        {getOrCreateVReg(*U.getOperand(0)),
-												[Alignment][NFC] Transition and simplify calls to DL::getABITypeAlignment

This patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Differential Revision: https://reviews.llvm.org/D82956

											
										
										
											2020-07-01 22:31:56 +08:00
+								                         DL->getABITypeAlign(U.getType()).value()});
-												GlobalISel: support translating va_arg

Since (say) i128 and [16 x i8] map to the same type in generic MIR, we also
need to attach the required alignment info.

llvm-svn: 295254

											
										
										
											2017-02-16 07:22:33 +08:00
+								  return true;
 								}
-												[GlobalISel] Translate insertelement and extractelement

Reviewers: qcolombet, aditya_nandakumar, dsanders, ab, t.p.northover, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30761

llvm-svn: 297495

											
										
										
											2017-03-11 03:08:28 +08:00
+								bool IRTranslator::translateInsertElement(const User &U,
 								                                          MachineIRBuilder &MIRBuilder) {
 								  // If it is a <1 x Ty> vector, use the scalar as it is
 								  // not a legal vector type in LLT.
-												[SVE] Remove calls to VectorType::getNumElements from CodeGen

Reviewers: efriedma, fpetrogalli, sdesmalen, RKSimon, arsenm

Reviewed By: RKSimon

Subscribers: wdng, tschuett, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D82210

											
										
										
											2020-07-10 02:51:03 +08:00
+								  if (cast<FixedVectorType>(U.getType())->getNumElements() == 1)
-												[GlobalISel][IRTranslator] New helper function translateCopy. NFC.

Reviewers: arsenm, volkan, t.p.northover, aditya_nandakumar

Subscribers: wdng, rovka, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78377

											
										
										
											2020-04-17 21:36:01 +08:00
+								    return translateCopy(U, *U.getOperand(1), MIRBuilder);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register Res = getOrCreateVReg(U);
 								  Register Val = getOrCreateVReg(*U.getOperand(0));
 								  Register Elt = getOrCreateVReg(*U.getOperand(1));
 								  Register Idx = getOrCreateVReg(*U.getOperand(2));
-												[GlobalISel] Remove non-determinism from IRTranslator.

This showed up in r300535/r300537, which were reverted in r300538 due to
some of the introduced tests in there failing on some bots, due to the
non-determinism fixed in this commit.

Re-committing r300535/r300537 will add 2 tests for the change in this
commit.

llvm-svn: 300663

											
										
										
											2017-04-19 14:38:37 +08:00
+								  MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
-												[GlobalISel] Translate insertelement and extractelement

Reviewers: qcolombet, aditya_nandakumar, dsanders, ab, t.p.northover, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30761

llvm-svn: 297495

											
										
										
											2017-03-11 03:08:28 +08:00
+								  return true;
 								}
 								bool IRTranslator::translateExtractElement(const User &U,
 								                                           MachineIRBuilder &MIRBuilder) {
 								  // If it is a <1 x Ty> vector, use the scalar as it is
 								  // not a legal vector type in LLT.
-												[SVE] Remove calls to VectorType::getNumElements from CodeGen

Reviewers: efriedma, fpetrogalli, sdesmalen, RKSimon, arsenm

Reviewed By: RKSimon

Subscribers: wdng, tschuett, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D82210

											
										
										
											2020-07-10 02:51:03 +08:00
+								  if (cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements() == 1)
-												[GlobalISel][IRTranslator] New helper function translateCopy. NFC.

Reviewers: arsenm, volkan, t.p.northover, aditya_nandakumar

Subscribers: wdng, rovka, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78377

											
										
										
											2020-04-17 21:36:01 +08:00
+								    return translateCopy(U, *U.getOperand(0), MIRBuilder);
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register Res = getOrCreateVReg(U);
 								  Register Val = getOrCreateVReg(*U.getOperand(0));
-												[GlobalISel] Use the target preferred type for G_EXTRACT_VECTOR_ELT index.

Allows for better imported pattern re-use.

llvm-svn: 345265

											
										
										
											2018-10-25 22:04:54 +08:00
+								  const auto &TLI = *MF->getSubtarget().getTargetLowering();
 								  unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register Idx;
-												[GlobalISel] Use the target preferred type for G_EXTRACT_VECTOR_ELT index.

Allows for better imported pattern re-use.

llvm-svn: 345265

											
										
										
											2018-10-25 22:04:54 +08:00
+								  if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
 								    if (CI->getBitWidth() != PreferredVecIdxWidth) {
 								      APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
 								      auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
 								      Idx = getOrCreateVReg(*NewIdxCI);
 								    }
 								  }
 								  if (!Idx)
 								    Idx = getOrCreateVReg(*U.getOperand(1));
 								  if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
-												GlobalISel: Don't use LLT references

These should always be passed by value

											
										
										
											2020-02-14 03:30:50 +08:00
+								    const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
-												[CodeGen] Make use of MachineInstrBuilder::getReg

Reviewers: arsenm

Subscribers: wdng, hiraditya, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73262

											
										
										
											2020-01-23 19:51:35 +08:00
+								    Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx).getReg(0);
-												[GlobalISel] Use the target preferred type for G_EXTRACT_VECTOR_ELT index.

Allows for better imported pattern re-use.

llvm-svn: 345265

											
										
										
											2018-10-25 22:04:54 +08:00
+								  }
-												[GlobalISel] Remove non-determinism from IRTranslator.

This showed up in r300535/r300537, which were reverted in r300538 due to
some of the introduced tests in there failing on some bots, due to the
non-determinism fixed in this commit.

Re-committing r300535/r300537 will add 2 tests for the change in this
commit.

llvm-svn: 300663

											
										
										
											2017-04-19 14:38:37 +08:00
+								  MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
-												[GlobalISel] Translate insertelement and extractelement

Reviewers: qcolombet, aditya_nandakumar, dsanders, ab, t.p.northover, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30761

llvm-svn: 297495

											
										
										
											2017-03-11 03:08:28 +08:00
+								  return true;
 								}
-												[GlobalISel] Translate shufflevector

Reviewers: qcolombet, aditya_nandakumar, t.p.northover, javed.absar, ab, dsanders

Reviewed By: javed.absar

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30962

llvm-svn: 298347

											
										
										
											2017-03-21 16:44:13 +08:00
+								bool IRTranslator::translateShuffleVector(const User &U,
 								                                          MachineIRBuilder &MIRBuilder) {
-												Remove "mask" operand from shufflevector.

Instead, represent the mask as out-of-line data in the instruction. This
should be more efficient in the places that currently use
getShuffleVector(), and paves the way for further changes to add new
shuffles for scalable vectors.

This doesn't change the syntax in textual IR. And I don't currently plan
to change the bitcode encoding in this patch, although we'll probably
need to do something once we extend shufflevector for scalable types.

I expect that once this is finished, we can then replace the raw "mask"
with something more appropriate for scalable vectors.  Not sure exactly
what this looks like at the moment, but there are a few different ways
we could handle it.  Maybe we could try to describe specific shuffles.
Or maybe we could define it in terms of a function to convert a fixed-length
array into an appropriate scalable vector, using a "step", or something
like that.

Differential Revision: https://reviews.llvm.org/D72467

											
										
										
											2020-04-01 04:08:59 +08:00
+								  ArrayRef<int> Mask;
 								  if (auto *SVI = dyn_cast<ShuffleVectorInst>(&U))
 								    Mask = SVI->getShuffleMask();
 								  else
 								    Mask = cast<ConstantExpr>(U).getShuffleMask();
-												[GlobalISel] Change representation of shuffle masks in MachineOperand.

We're planning to remove the shufflemask operand from ShuffleVectorInst
(D72467); fix GlobalISel so it doesn't depend on that Constant.

The change to prelegalizercombiner-shuffle-vector.mir happens because
the input contains a literal "-1" in the mask (so the parser/verifier
weren't really handling it properly). We now treat it as equivalent to
"undef" in all contexts.

Differential Revision: https://reviews.llvm.org/D72663

											
										
										
											2020-01-14 07:32:45 +08:00
+								  ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask);
-												[GlobalISel] Use more MachineIRBuilder helper methods

Reviewers: arsenm, nhaehnle

Subscribers: wdng, rovka, hiraditya, volkan, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D72833

											
										
										
											2020-01-16 20:09:48 +08:00
+								  MIRBuilder
 								      .buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {getOrCreateVReg(U)},
 								                  {getOrCreateVReg(*U.getOperand(0)),
 								                   getOrCreateVReg(*U.getOperand(1))})
-												[GlobalISel] Change representation of shuffle masks in MachineOperand.

We're planning to remove the shufflemask operand from ShuffleVectorInst
(D72467); fix GlobalISel so it doesn't depend on that Constant.

The change to prelegalizercombiner-shuffle-vector.mir happens because
the input contains a literal "-1" in the mask (so the parser/verifier
weren't really handling it properly). We now treat it as equivalent to
"undef" in all contexts.

Differential Revision: https://reviews.llvm.org/D72663

											
										
										
											2020-01-14 07:32:45 +08:00
+								      .addShuffleMask(MaskAlloc);
-												[GlobalISel] Translate shufflevector

Reviewers: qcolombet, aditya_nandakumar, t.p.northover, javed.absar, ab, dsanders

Reviewed By: javed.absar

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30962

llvm-svn: 298347

											
										
										
											2017-03-21 16:44:13 +08:00
+								  return true;
 								}
-												GlobalISel: use correct builder for ConstantExprs.

ConstantExpr instances were emitting code into the current block rather than
the entry block. This meant they didn't necessarily dominate all uses, which is
clearly wrong.

llvm-svn: 288985

											
										
										
											2016-12-08 05:29:15 +08:00
+								bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								  const PHINode &PI = cast<PHINode>(U);
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  SmallVector<MachineInstr *, 4> Insts;
 								  for (auto Reg : getOrCreateVRegs(PI)) {
-												[GISel]: Refactor MachineIRBuilder to allow passing additional parameters to build Instrs

https://reviews.llvm.org/D55294

Previously MachineIRBuilder::buildInstr used to accept variadic
arguments for sources (which were either unsigned or
MachineInstrBuilder). While this worked well in common cases, it doesn't
allow us to build instructions that have multiple destinations.
Additionally passing in other optional parameters in the end (such as
flags) is not possible trivially. Also a trivial call such as

B.buildInstr(Opc, Reg1, Reg2, Reg3)
can be interpreted differently based on the opcode (2defs + 1 src for
unmerge vs 1 def + 2srcs).
This patch refactors the buildInstr to

buildInstr(Opc, ArrayRef<DstOps>, ArrayRef<SrcOps>)
where DstOps and SrcOps are typed unions that know how to add itself to
MachineInstrBuilder.
After this patch, most invocations would look like

B.buildInstr(Opc, {s32, DstReg}, {SrcRegs..., SrcMIBs..});
Now all the other calls (such as buildAdd, buildSub etc) forward to
buildInstr. It also makes it possible to build instructions with
multiple defs.
Additionally in a subsequent patch, we should make it possible to add
flags directly while building instructions.
Additionally, the main buildInstr method is now virtual and other
builders now only have to override buildInstr (for say constant
folding/cseing) is straightforward.

Also attached here (https://reviews.llvm.org/F7675680) is a clang-tidy
patch that should upgrade the API calls if necessary.

llvm-svn: 348815

											
										
										
											2018-12-11 08:48:50 +08:00
+								    auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, {Reg}, {});
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    Insts.push_back(MIB.getInstr());
 								  }
 								  PendingPHIs.emplace_back(&PI, std::move(Insts));
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
+								  return true;
 								}
-												[globalisel][irtranslator] Add support for atomicrmw and (strong) cmpxchg

Summary:
This patch adds support for the atomicrmw instructions and the strong
cmpxchg instruction to the IRTranslator.

I've left out weak cmpxchg because LangRef.rst isn't entirely clear on what
difference it makes to the backend. As far as I can tell from the code, it
only matters to AtomicExpandPass which is run at the LLVM-IR level.

Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, javed.absar

Reviewed By: qcolombet

Subscribers: kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D40092

llvm-svn: 336589

											
										
										
											2018-07-10 03:33:40 +08:00
+								bool IRTranslator::translateAtomicCmpXchg(const User &U,
 								                                          MachineIRBuilder &MIRBuilder) {
 								  const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U);
-												GlobalISel: Apply target MMO flags to atomics

Unify MMO flag handling with SelectionDAG like with loads and stores.

											
										
										
											2020-01-13 04:54:09 +08:00
+								  auto &TLI = *MF->getSubtarget().getTargetLowering();
 								  auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
-												[globalisel][irtranslator] Add support for atomicrmw and (strong) cmpxchg

Summary:
This patch adds support for the atomicrmw instructions and the strong
cmpxchg instruction to the IRTranslator.

I've left out weak cmpxchg because LangRef.rst isn't entirely clear on what
difference it makes to the backend. As far as I can tell from the code, it
only matters to AtomicExpandPass which is run at the LLVM-IR level.

Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, javed.absar

Reviewed By: qcolombet

Subscribers: kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D40092

llvm-svn: 336589

											
										
										
											2018-07-10 03:33:40 +08:00
 								  Type *ResType = I.getType();
 								  Type *ValType = ResType->Type::getStructElementType(0);
 								  auto Res = getOrCreateVRegs(I);
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register OldValRes = Res[0];
 								  Register SuccessRes = Res[1];
 								  Register Addr = getOrCreateVReg(*I.getPointerOperand());
 								  Register Cmp = getOrCreateVReg(*I.getCompareOperand());
 								  Register NewVal = getOrCreateVReg(*I.getNewValOperand());
-												[globalisel][irtranslator] Add support for atomicrmw and (strong) cmpxchg

Summary:
This patch adds support for the atomicrmw instructions and the strong
cmpxchg instruction to the IRTranslator.

I've left out weak cmpxchg because LangRef.rst isn't entirely clear on what
difference it makes to the backend. As far as I can tell from the code, it
only matters to AtomicExpandPass which is run at the LLVM-IR level.

Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, javed.absar

Reviewed By: qcolombet

Subscribers: kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D40092

llvm-svn: 336589

											
										
										
											2018-07-10 03:33:40 +08:00
-												[globalisel][irtanslator] The IRTranslator should preserve TBAA information

											
										
										
											2019-11-15 04:11:00 +08:00
+								  AAMDNodes AAMetadata;
 								  I.getAAMetadata(AAMetadata);
-												[globalisel][irtranslator] Add support for atomicrmw and (strong) cmpxchg

Summary:
This patch adds support for the atomicrmw instructions and the strong
cmpxchg instruction to the IRTranslator.

I've left out weak cmpxchg because LangRef.rst isn't entirely clear on what
difference it makes to the backend. As far as I can tell from the code, it
only matters to AtomicExpandPass which is run at the LLVM-IR level.

Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, javed.absar

Reviewed By: qcolombet

Subscribers: kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D40092

llvm-svn: 336589

											
										
										
											2018-07-10 03:33:40 +08:00
+								  MIRBuilder.buildAtomicCmpXchgWithSuccess(
 								      OldValRes, SuccessRes, Addr, Cmp, NewVal,
-												[Alignment][NFC] Transitionning more getMachineMemOperand call sites

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77127

											
										
										
											2020-03-31 17:43:50 +08:00
+								      *MF->getMachineMemOperand(
 								          MachinePointerInfo(I.getPointerOperand()), Flags,
 								          DL->getTypeStoreSize(ValType), getMemOpAlign(I), AAMetadata, nullptr,
 								          I.getSyncScopeID(), I.getSuccessOrdering(), I.getFailureOrdering()));
-												[globalisel][irtranslator] Add support for atomicrmw and (strong) cmpxchg

Summary:
This patch adds support for the atomicrmw instructions and the strong
cmpxchg instruction to the IRTranslator.

I've left out weak cmpxchg because LangRef.rst isn't entirely clear on what
difference it makes to the backend. As far as I can tell from the code, it
only matters to AtomicExpandPass which is run at the LLVM-IR level.

Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, javed.absar

Reviewed By: qcolombet

Subscribers: kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D40092

llvm-svn: 336589

											
										
										
											2018-07-10 03:33:40 +08:00
+								  return true;
 								}
 								bool IRTranslator::translateAtomicRMW(const User &U,
 								                                      MachineIRBuilder &MIRBuilder) {
 								  const AtomicRMWInst &I = cast<AtomicRMWInst>(U);
-												GlobalISel: Apply target MMO flags to atomics

Unify MMO flag handling with SelectionDAG like with loads and stores.

											
										
										
											2020-01-13 04:54:09 +08:00
+								  auto &TLI = *MF->getSubtarget().getTargetLowering();
 								  auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
-												[globalisel][irtranslator] Add support for atomicrmw and (strong) cmpxchg

Summary:
This patch adds support for the atomicrmw instructions and the strong
cmpxchg instruction to the IRTranslator.

I've left out weak cmpxchg because LangRef.rst isn't entirely clear on what
difference it makes to the backend. As far as I can tell from the code, it
only matters to AtomicExpandPass which is run at the LLVM-IR level.

Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, javed.absar

Reviewed By: qcolombet

Subscribers: kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D40092

llvm-svn: 336589

											
										
										
											2018-07-10 03:33:40 +08:00
 								  Type *ResType = I.getType();
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								  Register Res = getOrCreateVReg(I);
 								  Register Addr = getOrCreateVReg(*I.getPointerOperand());
 								  Register Val = getOrCreateVReg(*I.getValOperand());
-												[globalisel][irtranslator] Add support for atomicrmw and (strong) cmpxchg

Summary:
This patch adds support for the atomicrmw instructions and the strong
cmpxchg instruction to the IRTranslator.

I've left out weak cmpxchg because LangRef.rst isn't entirely clear on what
difference it makes to the backend. As far as I can tell from the code, it
only matters to AtomicExpandPass which is run at the LLVM-IR level.

Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, javed.absar

Reviewed By: qcolombet

Subscribers: kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D40092

llvm-svn: 336589

											
										
										
											2018-07-10 03:33:40 +08:00
 								  unsigned Opcode = 0;
 								  switch (I.getOperation()) {
 								  default:
 								    return false;
 								  case AtomicRMWInst::Xchg:
 								    Opcode = TargetOpcode::G_ATOMICRMW_XCHG;
 								    break;
 								  case AtomicRMWInst::Add:
 								    Opcode = TargetOpcode::G_ATOMICRMW_ADD;
 								    break;
 								  case AtomicRMWInst::Sub:
 								    Opcode = TargetOpcode::G_ATOMICRMW_SUB;
 								    break;
 								  case AtomicRMWInst::And:
 								    Opcode = TargetOpcode::G_ATOMICRMW_AND;
 								    break;
 								  case AtomicRMWInst::Nand:
 								    Opcode = TargetOpcode::G_ATOMICRMW_NAND;
 								    break;
 								  case AtomicRMWInst::Or:
 								    Opcode = TargetOpcode::G_ATOMICRMW_OR;
 								    break;
 								  case AtomicRMWInst::Xor:
 								    Opcode = TargetOpcode::G_ATOMICRMW_XOR;
 								    break;
 								  case AtomicRMWInst::Max:
 								    Opcode = TargetOpcode::G_ATOMICRMW_MAX;
 								    break;
 								  case AtomicRMWInst::Min:
 								    Opcode = TargetOpcode::G_ATOMICRMW_MIN;
 								    break;
 								  case AtomicRMWInst::UMax:
 								    Opcode = TargetOpcode::G_ATOMICRMW_UMAX;
 								    break;
 								  case AtomicRMWInst::UMin:
 								    Opcode = TargetOpcode::G_ATOMICRMW_UMIN;
 								    break;
-												GlobalISel: Add G_ATOMICRMW_{FADD|FSUB}

llvm-svn: 367369

											
										
										
											2019-07-31 07:56:30 +08:00
+								  case AtomicRMWInst::FAdd:
 								    Opcode = TargetOpcode::G_ATOMICRMW_FADD;
 								    break;
 								  case AtomicRMWInst::FSub:
 								    Opcode = TargetOpcode::G_ATOMICRMW_FSUB;
 								    break;
-												[globalisel][irtranslator] Add support for atomicrmw and (strong) cmpxchg

Summary:
This patch adds support for the atomicrmw instructions and the strong
cmpxchg instruction to the IRTranslator.

I've left out weak cmpxchg because LangRef.rst isn't entirely clear on what
difference it makes to the backend. As far as I can tell from the code, it
only matters to AtomicExpandPass which is run at the LLVM-IR level.

Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, javed.absar

Reviewed By: qcolombet

Subscribers: kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D40092

llvm-svn: 336589

											
										
										
											2018-07-10 03:33:40 +08:00
+								  }
-												[globalisel][irtanslator] The IRTranslator should preserve TBAA information

											
										
										
											2019-11-15 04:11:00 +08:00
+								  AAMDNodes AAMetadata;
 								  I.getAAMetadata(AAMetadata);
-												[globalisel][irtranslator] Add support for atomicrmw and (strong) cmpxchg

Summary:
This patch adds support for the atomicrmw instructions and the strong
cmpxchg instruction to the IRTranslator.

I've left out weak cmpxchg because LangRef.rst isn't entirely clear on what
difference it makes to the backend. As far as I can tell from the code, it
only matters to AtomicExpandPass which is run at the LLVM-IR level.

Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, javed.absar

Reviewed By: qcolombet

Subscribers: kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D40092

llvm-svn: 336589

											
										
										
											2018-07-10 03:33:40 +08:00
+								  MIRBuilder.buildAtomicRMW(
 								      Opcode, Res, Addr, Val,
 								      *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
 								                                Flags, DL->getTypeStoreSize(ResType),
-												[Alignment][NFC] Transitionning more getMachineMemOperand call sites

Summary:
This is patch is part of a series to introduce an Alignment type.
See this thread for context: http://lists.llvm.org/pipermail/llvm-dev/2019-July/133851.html
See this patch for the introduction of the type: https://reviews.llvm.org/D64790

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77127

											
										
										
											2020-03-31 17:43:50 +08:00
+								                                getMemOpAlign(I), AAMetadata, nullptr,
 								                                I.getSyncScopeID(), I.getOrdering()));
-												[globalisel][irtranslator] Add support for atomicrmw and (strong) cmpxchg

Summary:
This patch adds support for the atomicrmw instructions and the strong
cmpxchg instruction to the IRTranslator.

I've left out weak cmpxchg because LangRef.rst isn't entirely clear on what
difference it makes to the backend. As far as I can tell from the code, it
only matters to AtomicExpandPass which is run at the LLVM-IR level.

Reviewers: ab, t.p.northover, qcolombet, rovka, aditya_nandakumar, volkan, javed.absar

Reviewed By: qcolombet

Subscribers: kristof.beyls, javed.absar, igorb, llvm-commits

Differential Revision: https://reviews.llvm.org/D40092

llvm-svn: 336589

											
										
										
											2018-07-10 03:33:40 +08:00
+								  return true;
 								}
-												GlobalISel: Add G_FENCE

The pattern importer is for some reason emitting checks for G_CONSTANT
for the immediate operands.

llvm-svn: 364926

											
										
										
											2019-07-02 22:16:39 +08:00
+								bool IRTranslator::translateFence(const User &U,
 								                                  MachineIRBuilder &MIRBuilder) {
 								  const FenceInst &Fence = cast<FenceInst>(U);
 								  MIRBuilder.buildFence(static_cast<unsigned>(Fence.getOrdering()),
 								                        Fence.getSyncScopeID());
 								  return true;
 								}
-												[GlobalISel] translate freeze to new generic G_FREEZE

Summary:
As a follow up to https://reviews.llvm.org/D29014, add translation
support for freeze.

Introduce a new generic instruction G_FREEZE and translate freeze to it.

Reviewers: dsanders, aqjune, arsenm, aditya_nandakumar, t.p.northover, lebedev.ri, paquette, aemerson

Reviewed By: aqjune, arsenm

Subscribers: fhahn, lebedev.ri, wdng, rovka, hiraditya, jfb, volkan, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77795

											
										
										
											2020-04-09 20:33:59 +08:00
+								bool IRTranslator::translateFreeze(const User &U,
 								                                   MachineIRBuilder &MIRBuilder) {
 								  const ArrayRef<Register> DstRegs = getOrCreateVRegs(U);
 								  const ArrayRef<Register> SrcRegs = getOrCreateVRegs(*U.getOperand(0));
 								  assert(DstRegs.size() == SrcRegs.size() &&
 								         "Freeze with different source and destination type?");
 								  for (unsigned I = 0; I < DstRegs.size(); ++I) {
 								    MIRBuilder.buildFreeze(DstRegs[I], SrcRegs[I]);
 								  }
 								  return true;
 								}
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
+								void IRTranslator::finishPendingPhis() {
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								#ifndef NDEBUG
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								  DILocationVerifier Verifier;
 								  GISelObserverWrapper WrapperObserver(&Verifier);
 								  RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								#endif // ifndef NDEBUG
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  for (auto &Phi : PendingPHIs) {
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
+								    const PHINode *PI = Phi.first;
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								    ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
-												[GlobalISel][IRTranslator] Fix some PHI bugs related to jump tables when optimizations are used.

The new switch lowering code that tries to generate jump tables and range checks
were tested at -O0 on arm64, but on -O3 the generic switch lowering code goes to
town on trying to generate optimized lowerings, e.g. multiple jump tables, range
checks etc. This exposed bugs in the way PHI nodes are handled because the CFG
looks even stranger after all of this is done.

llvm-svn: 364613

											
										
										
											2019-06-28 07:56:34 +08:00
+								    MachineBasicBlock *PhiMBB = ComponentPHIs[0]->getParent();
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								    EntryBuilder->setDebugLoc(PI->getDebugLoc());
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								#ifndef NDEBUG
 								    Verifier.setCurrentInst(PI);
 								#endif // ifndef NDEBUG
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								    SmallSet<const MachineBasicBlock *, 16> SeenPreds;
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
+								    for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								      auto IRPred = PI->getIncomingBlock(i);
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								      ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								      for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
-												[GlobalISel][IRTranslator] Fix some PHI bugs related to jump tables when optimizations are used.

The new switch lowering code that tries to generate jump tables and range checks
were tested at -O0 on arm64, but on -O3 the generic switch lowering code goes to
town on trying to generate optimized lowerings, e.g. multiple jump tables, range
checks etc. This exposed bugs in the way PHI nodes are handled because the CFG
looks even stranger after all of this is done.

llvm-svn: 364613

											
										
										
											2019-06-28 07:56:34 +08:00
+								        if (SeenPreds.count(Pred) || !PhiMBB->isPredecessor(Pred))
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								          continue;
 								        SeenPreds.insert(Pred);
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								        for (unsigned j = 0; j < ValRegs.size(); ++j) {
 								          MachineInstrBuilder MIB(*MF, ComponentPHIs[j]);
 								          MIB.addUse(ValRegs[j]);
 								          MIB.addMBB(Pred);
 								        }
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								      }
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
+								    }
 								  }
 								}
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								bool IRTranslator::valueIsSplit(const Value &V,
 								                                SmallVectorImpl<uint64_t> *Offsets) {
 								  SmallVector<LLT, 4> SplitTys;
-												[GlobalISel][IRTranslator] Fix a bug in handling repeating struct types during argument lowering.

Differential Revision: https://reviews.llvm.org/D49442

llvm-svn: 339674

											
										
										
											2018-08-14 20:04:25 +08:00
+								  if (Offsets && !Offsets->empty())
 								    Offsets->clear();
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets);
 								  return SplitTys.size() > 1;
 								}
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
+								bool IRTranslator::translate(const Instruction &Inst) {
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								  CurBuilder->setDebugLoc(Inst.getDebugLoc());
-												[GlobalISel][IRTranslator] Add debug loc with line 0 to constants emitted into the entry block.

Constants, including G_GLOBAL_VALUE, are all emitted into the entry block which
lets us use the vreg def assuming it dominates all other users. However, it can
cause jumpy debug behaviour since the DebugLoc attached to these MIs are from
a user instruction that could be in a different block.

Fixes PR40887.

Differential Revision: https://reviews.llvm.org/D63286

llvm-svn: 363331

											
										
										
											2019-06-14 06:15:35 +08:00
-												[SVE] Fall back on DAG ISel at -O0 when encountering scalable types

At the moment we use Global ISel by default at -O0, however it is
currently not capable of dealing with scalable vectors for two
reasons:

1. The register banks know nothing about SVE registers.
2. The LLT (Low Level Type) class knows nothing about scalable
   vectors.

For now, the easiest way to avoid users hitting issues when using
the SVE ACLE is to fall back on normal DAG ISel when encountering
instructions that operate on scalable vector types.

I've added a couple of RUN lines to existing SVE tests to ensure
we can compile at -O0. I've also added some new tests to

  CodeGen/AArch64/GlobalISel/arm64-fallback.ll

that demonstrate we correctly fallback to DAG ISel at -O0 when
lowering formal arguments or translating instructions that involve
scalable vector types.

Differential Revision: https://reviews.llvm.org/D81557

											
										
										
											2020-06-09 21:51:38 +08:00
+								  auto &TLI = *MF->getSubtarget().getTargetLowering();
 								  if (TLI.fallBackToDAGISel(Inst))
 								    return false;
-												[GlobalISel][IRTranslator] Add debug loc with line 0 to constants emitted into the entry block.

Constants, including G_GLOBAL_VALUE, are all emitted into the entry block which
lets us use the vreg def assuming it dominates all other users. However, it can
cause jumpy debug behaviour since the DebugLoc attached to these MIs are from
a user instruction that could be in a different block.

Fixes PR40887.

Differential Revision: https://reviews.llvm.org/D63286

llvm-svn: 363331

											
										
										
											2019-06-14 06:15:35 +08:00
+								  switch (Inst.getOpcode()) {
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								#define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
 								  case Instruction::OPCODE:                                                    \
 								    return translate##OPCODE(Inst, *CurBuilder.get());
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								#include "llvm/IR/Instruction.def"
-												[GlobalISel] Teach the IRTranslator how to lower returns.

llvm-svn: 260562

											
										
										
											2016-02-12 02:53:28 +08:00
+								  default:
-												[IRTranslator] Simplify error handling for translating constants. NFC.

We don't need to check whether the fallback path is enabled to return
false. Just do that all the time on error cases, the caller knows (or
at least should know!) how to handle the failing case.

llvm-svn: 297535

											
										
										
											2017-03-11 08:28:33 +08:00
+								    return false;
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
+								  }
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
+								}
-												GlobalISel: Remove unsigned variant of SrcOp

Force using Register.

One downside is the generated register enums require explicit
conversion.

llvm-svn: 364194

											
										
										
											2019-06-25 00:16:12 +08:00
+								bool IRTranslator::translate(const Constant &C, Register Reg) {
-												[GlobalISel][IRTranslator] Move line zero DebugLoc creation to constant translation. NFC.

This is a compile time optimization. DILocation:get() is expensive to call, and
we were calling it to create a line zero debug loc for *every* instruction we
translated. We only really need to do this just before we build constants in the
entry block, so I moved this code there. This reduces the LLVM -O0 codegen time
of sqlite3 IR by around 0.7% instructions executed and by about ~2% in CPU time.

We can probably do better with a more involved change, since the reason we need
to create one for each new constant is because we're using the debug scope and
inlined-at loc. If we just use a single instruction's scope and drop the
inlined-at, we can just cache these and have them be free.

											
										
										
											2021-04-29 14:16:54 +08:00
+								  // We only emit constants into the entry block from here. To prevent jumpy
 								  // debug behaviour set the line to 0.
 								  if (auto CurrInstDL = CurBuilder->getDL())
 								    EntryBuilder->setDebugLoc(DILocation::get(C.getContext(), 0, 0,
 								                                              CurrInstDL.getScope(),
 								                                              CurrInstDL.getInlinedAt()));
-												GlobalISel: support 'undef' constant.

llvm-svn: 278174

											
										
										
											2016-08-10 07:01:30 +08:00
+								  if (auto CI = dyn_cast<ConstantInt>(&C))
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								    EntryBuilder->buildConstant(Reg, *CI);
-												GlobalISel: translate floating-point constants

llvm-svn: 279311

											
										
										
											2016-08-20 04:09:15 +08:00
+								  else if (auto CF = dyn_cast<ConstantFP>(&C))
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								    EntryBuilder->buildFConstant(Reg, *CF);
-												GlobalISel: support 'undef' constant.

llvm-svn: 278174

											
										
										
											2016-08-10 07:01:30 +08:00
+								  else if (isa<UndefValue>(C))
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								    EntryBuilder->buildUndef(Reg);
-												Revert "[GISel]: Fix incorrect IRTranslation while translating null pointer types"

This reverts commit b3297ef05179e1fee616b97b1c65b58e4c7fef17.

This change is incorrect. The current semantic of null in the IR is a
pointer with the bitvalue 0. It is not a cast from an integer 0, so
this should preserve the pointer type.

											
										
										
											2020-02-18 03:40:44 +08:00
+								  else if (isa<ConstantPointerNull>(C))
 								    EntryBuilder->buildConstant(Reg, 0);
 								  else if (auto GV = dyn_cast<GlobalValue>(&C))
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								    EntryBuilder->buildGlobalValue(Reg, GV);
-												GlobalISel: Translate ConstantAggregateZero vectors

Reviewers: qcolombet, aditya_nandakumar, dsanders, ab, t.p.northover, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30259

llvm-svn: 297509

											
										
										
											2017-03-11 05:23:13 +08:00
+								  else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
 								    if (!CAZ->getType()->isVectorTy())
 								      return false;
-												[GlobalISel] IRTranslator: Return the scalar for <1 x Ty> constant vectors

Summary:
<1 x Ty> is not a legal vector type in LLT, we shouldn’t build G_MERGE_VALUES
instruction for them.

Reviewers: qcolombet, aditya_nandakumar, dsanders, t.p.northover, ab, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, kristof.beyls, llvm-commits

Differential Revision: https://reviews.llvm.org/D30948

llvm-svn: 297792

											
										
										
											2017-03-15 07:45:06 +08:00
+								    // Return the scalar if it is a <1 x Ty> vector.
 								    if (CAZ->getNumElements() == 1)
-												[GlobalISel][IRTranslator] Fix <1 x Ty> handling in ConstantExprs

Summary:
ConstantExprs involving operations on <1 x Ty> could translate into MIR
that failed to verify with:
*** Bad machine code: Reading virtual register without a def ***

The problem was that translate(const Constant &C, Register Reg) had
recursive calls that passed the same Reg in for the translation of a
subexpression, but without updating VMap for the subexpression first as
translate(const Constant &C, Register Reg) expects.

Fix this by using the same translateCopy helper function that we use for
translating Instructions. In some cases this causes extra G_COPY
MIR instructions to be generated.

Fixes https://bugs.llvm.org/show_bug.cgi?id=45576

Reviewers: arsenm, volkan, t.p.northover, aditya_nandakumar

Subscribers: jvesely, wdng, nhaehnle, rovka, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78378

											
										
										
											2020-04-17 23:17:26 +08:00
+								      return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder.get());
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								    SmallVector<Register, 4> Ops;
-												GlobalISel: Translate ConstantAggregateZero vectors

Reviewers: qcolombet, aditya_nandakumar, dsanders, ab, t.p.northover, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30259

llvm-svn: 297509

											
										
										
											2017-03-11 05:23:13 +08:00
+								    for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
 								      Constant &Elt = *CAZ->getElementValue(i);
 								      Ops.push_back(getOrCreateVReg(Elt));
 								    }
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								    EntryBuilder->buildBuildVector(Reg, Ops);
-												GlobalISel: Translate ConstantDataVector

Reviewers: qcolombet, aditya_nandakumar, dsanders, t.p.northover, javed.absar, ab

Reviewed By: qcolombet, dsanders, ab

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30216

llvm-svn: 297670

											
										
										
											2017-03-14 05:36:19 +08:00
+								  } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
-												[GlobalISel] IRTranslator: Return the scalar for <1 x Ty> constant vectors

Summary:
<1 x Ty> is not a legal vector type in LLT, we shouldn’t build G_MERGE_VALUES
instruction for them.

Reviewers: qcolombet, aditya_nandakumar, dsanders, t.p.northover, ab, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, kristof.beyls, llvm-commits

Differential Revision: https://reviews.llvm.org/D30948

llvm-svn: 297792

											
										
										
											2017-03-15 07:45:06 +08:00
+								    // Return the scalar if it is a <1 x Ty> vector.
 								    if (CV->getNumElements() == 1)
-												[GlobalISel][IRTranslator] Fix <1 x Ty> handling in ConstantExprs

Summary:
ConstantExprs involving operations on <1 x Ty> could translate into MIR
that failed to verify with:
*** Bad machine code: Reading virtual register without a def ***

The problem was that translate(const Constant &C, Register Reg) had
recursive calls that passed the same Reg in for the translation of a
subexpression, but without updating VMap for the subexpression first as
translate(const Constant &C, Register Reg) expects.

Fix this by using the same translateCopy helper function that we use for
translating Instructions. In some cases this causes extra G_COPY
MIR instructions to be generated.

Fixes https://bugs.llvm.org/show_bug.cgi?id=45576

Reviewers: arsenm, volkan, t.p.northover, aditya_nandakumar

Subscribers: jvesely, wdng, nhaehnle, rovka, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78378

											
										
										
											2020-04-17 23:17:26 +08:00
+								      return translateCopy(C, *CV->getElementAsConstant(0),
 								                           *EntryBuilder.get());
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								    SmallVector<Register, 4> Ops;
-												GlobalISel: Translate ConstantDataVector

Reviewers: qcolombet, aditya_nandakumar, dsanders, t.p.northover, javed.absar, ab

Reviewed By: qcolombet, dsanders, ab

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30216

llvm-svn: 297670

											
										
										
											2017-03-14 05:36:19 +08:00
+								    for (unsigned i = 0; i < CV->getNumElements(); ++i) {
 								      Constant &Elt = *CV->getElementAsConstant(i);
 								      Ops.push_back(getOrCreateVReg(Elt));
 								    }
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								    EntryBuilder->buildBuildVector(Reg, Ops);
-												GlobalISel: Translate ConstantAggregateZero vectors

Reviewers: qcolombet, aditya_nandakumar, dsanders, ab, t.p.northover, javed.absar

Reviewed By: qcolombet

Subscribers: dberris, rovka, llvm-commits, kristof.beyls

Differential Revision: https://reviews.llvm.org/D30259

llvm-svn: 297509

											
										
										
											2017-03-11 05:23:13 +08:00
+								  } else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								    switch(CE->getOpcode()) {
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								#define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
 								  case Instruction::OPCODE:                                                    \
 								    return translate##OPCODE(*CE, *EntryBuilder.get());
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								#include "llvm/IR/Instruction.def"
 								    default:
-												[IRTranslator] Simplify error handling for translating constants. NFC.

We don't need to check whether the fallback path is enabled to return
false. Just do that all the time on error cases, the caller knows (or
at least should know!) how to handle the failing case.

llvm-svn: 297535

											
										
										
											2017-03-11 08:28:33 +08:00
+								      return false;
-												GlobalISel: support same ConstantExprs as Instructions.

It's more than just inttoptr, but the others can't be tested until we have
support for non-trivial constants (they currently get unavoidably folded to a
ConstantInt).

llvm-svn: 278303

											
										
										
											2016-08-11 07:02:41 +08:00
+								    }
-												[GISel]: Add support to translate ConstantVectors

Reviewed by Quentin
https://reviews.llvm.org/D32814

llvm-svn: 302196

											
										
										
											2017-05-05 05:43:12 +08:00
+								  } else if (auto CV = dyn_cast<ConstantVector>(&C)) {
 								    if (CV->getNumOperands() == 1)
-												[GlobalISel][IRTranslator] Fix <1 x Ty> handling in ConstantExprs

Summary:
ConstantExprs involving operations on <1 x Ty> could translate into MIR
that failed to verify with:
*** Bad machine code: Reading virtual register without a def ***

The problem was that translate(const Constant &C, Register Reg) had
recursive calls that passed the same Reg in for the translation of a
subexpression, but without updating VMap for the subexpression first as
translate(const Constant &C, Register Reg) expects.

Fix this by using the same translateCopy helper function that we use for
translating Instructions. In some cases this causes extra G_COPY
MIR instructions to be generated.

Fixes https://bugs.llvm.org/show_bug.cgi?id=45576

Reviewers: arsenm, volkan, t.p.northover, aditya_nandakumar

Subscribers: jvesely, wdng, nhaehnle, rovka, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78378

											
										
										
											2020-04-17 23:17:26 +08:00
+								      return translateCopy(C, *CV->getOperand(0), *EntryBuilder.get());
-												CodeGen: Introduce a class for registers

Avoids using a plain unsigned for registers throughoug codegen.
Doesn't attempt to change every register use, just something a little
more than the set needed to build after changing the return type of
MachineOperand::getReg().

llvm-svn: 364191

											
										
										
											2019-06-24 23:50:29 +08:00
+								    SmallVector<Register, 4> Ops;
-												[GISel]: Add support to translate ConstantVectors

Reviewed by Quentin
https://reviews.llvm.org/D32814

llvm-svn: 302196

											
										
										
											2017-05-05 05:43:12 +08:00
+								    for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
 								      Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
 								    }
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								    EntryBuilder->buildBuildVector(Reg, Ops);
-												[GlobalISel] Add a G_BLOCK_ADDR opcode to handle IR blockaddress constants.

Differential Revision: https://reviews.llvm.org/D49900

llvm-svn: 338335

											
										
										
											2018-07-31 08:08:50 +08:00
+								  } else if (auto *BA = dyn_cast<BlockAddress>(&C)) {
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								    EntryBuilder->buildBlockAddress(Reg, BA);
-												[IRTranslator] Simplify error handling for translating constants. NFC.

We don't need to check whether the fallback path is enabled to return
false. Just do that all the time on error cases, the caller knows (or
at least should know!) how to handle the failing case.

llvm-svn: 297535

											
										
										
											2017-03-11 08:28:33 +08:00
+								  } else
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								    return false;
-												GlobalISel: first translation support for Constants.

For now put them all in the entry block. This should be correct but may give
poor runtime performance. Hopefully MachineSinking combined with
isReMaterializable can solve those issues, but if not the interface is sound
enough to support alternatives.

llvm-svn: 278168

											
										
										
											2016-08-10 05:28:04 +08:00
-												GlobalISel: support 'undef' constant.

llvm-svn: 278174

											
										
										
											2016-08-10 07:01:30 +08:00
+								  return true;
-												GlobalISel: first translation support for Constants.

For now put them all in the entry block. This should be correct but may give
poor runtime performance. Hopefully MachineSinking combined with
isReMaterializable can solve those issues, but if not the interface is sound
enough to support alternatives.

llvm-svn: 278168

											
										
										
											2016-08-10 05:28:04 +08:00
+								}
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								void IRTranslator::finalizeBasicBlock() {
-												[GlobalISel] Implement bit-test switch table optimization.

This is mostly a straight port from SelectionDAG. We re-use the actual bit-test
analysis part from SwitchLoweringUtils, which was factored out earlier to
support jump-tables.

Differential Revision: https://reviews.llvm.org/D85233

											
										
										
											2020-08-05 01:55:27 +08:00
+								  for (auto &BTB : SL->BitTestCases) {
 								    // Emit header first, if it wasn't already emitted.
 								    if (!BTB.Emitted)
 								      emitBitTestHeader(BTB, BTB.Parent);
 								    BranchProbability UnhandledProb = BTB.Prob;
 								    for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
 								      UnhandledProb -= BTB.Cases[j].ExtraProb;
 								      // Set the current basic block to the mbb we wish to insert the code into
 								      MachineBasicBlock *MBB = BTB.Cases[j].ThisBB;
 								      // If all cases cover a contiguous range, it is not necessary to jump to
 								      // the default block after the last bit test fails. This is because the
 								      // range check during bit test header creation has guaranteed that every
 								      // case here doesn't go outside the range. In this case, there is no need
 								      // to perform the last bit test, as it will always be true. Instead, make
 								      // the second-to-last bit-test fall through to the target of the last bit
 								      // test, and delete the last bit test.
 								      MachineBasicBlock *NextMBB;
 								      if (BTB.ContiguousRange && j + 2 == ej) {
 								        // Second-to-last bit-test with contiguous range: fall through to the
 								        // target of the final bit test.
 								        NextMBB = BTB.Cases[j + 1].TargetBB;
 								      } else if (j + 1 == ej) {
 								        // For the last bit test, fall through to Default.
 								        NextMBB = BTB.Default;
 								      } else {
 								        // Otherwise, fall through to the next bit test.
 								        NextMBB = BTB.Cases[j + 1].ThisBB;
 								      }
 								      emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB);
 								      // FIXME delete this block below?
 								      if (BTB.ContiguousRange && j + 2 == ej) {
 								        // Since we're not going to use the final bit test, remove it.
 								        BTB.Cases.pop_back();
 								        break;
 								      }
 								    }
 								    // This is "default" BB. We have two jumps to it. From "header" BB and from
 								    // last "case" BB, unless the latter was skipped.
 								    CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(),
 								                                   BTB.Default->getBasicBlock()};
 								    addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent);
 								    if (!BTB.ContiguousRange) {
 								      addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB);
 								    }
 								  }
 								  SL->BitTestCases.clear();
-												[GlobalISel][IRTranslator] Fix some PHI bugs related to jump tables when optimizations are used.

The new switch lowering code that tries to generate jump tables and range checks
were tested at -O0 on arm64, but on -O3 the generic switch lowering code goes to
town on trying to generate optimized lowerings, e.g. multiple jump tables, range
checks etc. This exposed bugs in the way PHI nodes are handled because the CFG
looks even stranger after all of this is done.

llvm-svn: 364613

											
										
										
											2019-06-28 07:56:34 +08:00
+								  for (auto &JTCase : SL->JTCases) {
 								    // Emit header first, if it wasn't already emitted.
 								    if (!JTCase.first.Emitted)
 								      emitJumpTableHeader(JTCase.second, JTCase.first, JTCase.first.HeaderBB);
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								    emitJumpTable(JTCase.second, JTCase.second.MBB);
-												[GlobalISel][IRTranslator] Fix some PHI bugs related to jump tables when optimizations are used.

The new switch lowering code that tries to generate jump tables and range checks
were tested at -O0 on arm64, but on -O3 the generic switch lowering code goes to
town on trying to generate optimized lowerings, e.g. multiple jump tables, range
checks etc. This exposed bugs in the way PHI nodes are handled because the CFG
looks even stranger after all of this is done.

llvm-svn: 364613

											
										
										
											2019-06-28 07:56:34 +08:00
+								  }
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								  SL->JTCases.clear();
-												[GlobalISel][IRTranslator] Generate better conditional branch lowering.

This is a port of the functionality from SelectionDAG, which tries to find
a tree of conditions from compares that are then combined using OR or AND,
before using that result as the input to a branch. Instead of naively
lowering the code as is, this change converts that into a sequence of
conditional branches on the sub-expressions of the tree.

Like SelectionDAG, we re-use the case block codegen functionality from
the switch lowering utils, which causes us to generate some different code.
The result of which I've tried to mitigate in earlier combine patches.

Differential Revision: https://reviews.llvm.org/D86665

											
										
										
											2020-08-25 05:10:38 +08:00
 								  for (auto &SwCase : SL->SwitchCases)
 								    emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);
 								  SL->SwitchCases.clear();
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								}
-												GlobalISel: clear vreg mapping after translating each function

Otherwise we only materialize (shared) constants in the first function they
appear in. This doesn't go well.

llvm-svn: 278351

											
										
										
											2016-08-12 00:21:29 +08:00
+								void IRTranslator::finalizeFunction() {
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
+								  // Release the memory used by the different maps we
 								  // needed during the translation.
-												GlobalISel: avoid looking too closely at PHIs when we bail.

The function used to finish off PHIs by adding the relevant basic blocks can
fail if we're aborting and still don't actually have the needed
MachineBasicBlocks. So avoid trying in that case.

llvm-svn: 288727

											
										
										
											2016-12-06 07:10:19 +08:00
+								  PendingPHIs.clear();
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
+								  VMap.reset();
-												GlobalISel: translate stack protector intrinsics

llvm-svn: 285614

											
										
										
											2016-11-01 02:30:59 +08:00
+								  FrameIndices.clear();
-												[GlobalISel] track predecessor mapping during switch lowering.

Correctly populating Machine PHIs relies on knowing exactly how the IR level
CFG was lowered to MachineIR. This needs to be tracked by any translation
phases that meddle (currently only SwitchInst handling).

This reapplies r291973 which was reverted because of testing failures. Fixes:

 + Don't return an ArrayRef to a local temporary.
 + Incorporate Kristof's suggested comment improvements.

llvm-svn: 292278

											
										
										
											2017-01-18 06:13:50 +08:00
+								  MachinePreds.clear();
-												[GISel]: Fix undefined behavior in IRTranslator

Make sure IRTranslator->MachineIRBuilder->DebugLoc doesn't
outlive the DILocation. Clear it at the end of
IRTranslator::runOnMachineFunction

llvm-svn: 303277

											
										
										
											2017-05-18 01:41:55 +08:00
+								  // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
 								  // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
 								  // destroying it twice (in ~IRTranslator() and ~LLVMContext())
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								  EntryBuilder.reset();
 								  CurBuilder.reset();
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								  FuncInfo.clear();
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
+								}
-												[AArch64][GlobalISel] Support lowering variadic musttail calls

This adds support for lowering variadic musttail calls. To do this, we have
to...

- Detect a musttail call in a variadic function before attempting to lower the
  call's formal arguments. This is done in the IRTranslator.
- Compute forwarded registers in `lowerFormalArguments`, and add copies for
  those registers.
- Restore the forwarded registers in `lowerTailCall`.

Because there doesn't seem to be any nice way to wrap these up into the outgoing
argument handler, the restore code in `lowerTailCall` is done separately.

Also, irritatingly, you have to make sure that the registers don't overlap with
any passed parameters. Otherwise, the scheduler doesn't know what to do with the
extra copies and asserts.

Add call-translator-variadic-musttail.ll to test this. This is pretty much the
same as the X86 musttail-varargs.ll test. We didn't have as nice of a test to
base this off of, but the idea is the same.

Differential Revision: https://reviews.llvm.org/D68043

llvm-svn: 373226

											
										
										
											2019-10-01 00:49:13 +08:00
+								/// Returns true if a BasicBlock \p BB within a variadic function contains a
 								/// variadic musttail call.
 								static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
 								  if (!IsVarArg)
 								    return false;
 								  // Walk the block backwards, because tail calls usually only appear at the end
 								  // of a block.
 								  return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) {
 								    const auto *CI = dyn_cast<CallInst>(&I);
 								    return CI && CI->isMustTailCall();
 								  });
 								}
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
 								  MF = &CurMF;
-												MachineFunction: Return reference from getFunction(); NFC

The Function can never be nullptr so we can return a reference.

llvm-svn: 320884

											
										
										
											2017-12-16 06:22:58 +08:00
+								  const Function &F = MF->getFunction();
-												[GlobalISel] Add the necessary plumbing to lower formal arguments.

llvm-svn: 260579

											
										
										
											2016-02-12 03:59:41 +08:00
+								  if (F.empty())
 								    return false;
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								  GISelCSEAnalysisWrapper &Wrapper =
 								      getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
 								  // Set the CSEConfig and run the analysis.
 								  GISelCSEInfo *CSEInfo = nullptr;
 								  TPC = &getAnalysis<TargetPassConfig>();
-												[GISel]: Change how CSE is enabled by default for each pass

https://reviews.llvm.org/D57178

Now add a hook in TargetPassConfig to query if CSE needs to be
enabled. By default this hook returns false only for O0 opt level but
this can be overridden by the target.
As a consequence of the default of enabled for non O0, a few tests
needed to be updated to not use CSE (by passing in -O0) to the run
line.

reviewed by: arsenm

llvm-svn: 352126

											
										
										
											2019-01-25 07:11:25 +08:00
+								  bool EnableCSE = EnableCSEInIRTranslator.getNumOccurrences()
 								                       ? EnableCSEInIRTranslator
 								                       : TPC->isGISelCSEEnabled();
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								  if (EnableCSE) {
-												[llvm] Migrate llvm::make_unique to std::make_unique

Now that we've moved to C++14, we no longer need the llvm::make_unique
implementation from STLExtras.h. This patch is a mechanical replacement
of (hopefully) all the llvm::make_unique instances across the monorepo.

llvm-svn: 369013

											
										
										
											2019-08-15 23:54:37 +08:00
+								    EntryBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
-												[GlobalISel] Introduce a CSEConfigBase class to allow targets to define their own CSE configs.

Because CodeGen can't depend on GlobalISel, we need a way to encapsulate the CSE
configs that can be passed between TargetPassConfig and the targets' custom
pass configs. This CSEConfigBase allows targets to create custom CSE configs
which is then used by the GISel passes for the CSEMIRBuilder.

This support will be used in a follow up commit to allow constant-only CSE for
-O0 compiles in D60580.

llvm-svn: 358368

											
										
										
											2019-04-15 12:53:46 +08:00
+								    CSEInfo = &Wrapper.get(TPC->getCSEConfig());
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								    EntryBuilder->setCSEInfo(CSEInfo);
-												[llvm] Migrate llvm::make_unique to std::make_unique

Now that we've moved to C++14, we no longer need the llvm::make_unique
implementation from STLExtras.h. This patch is a mechanical replacement
of (hopefully) all the llvm::make_unique instances across the monorepo.

llvm-svn: 369013

											
										
										
											2019-08-15 23:54:37 +08:00
+								    CurBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								    CurBuilder->setCSEInfo(CSEInfo);
 								  } else {
-												[llvm] Migrate llvm::make_unique to std::make_unique

Now that we've moved to C++14, we no longer need the llvm::make_unique
implementation from STLExtras.h. This patch is a mechanical replacement
of (hopefully) all the llvm::make_unique instances across the monorepo.

llvm-svn: 369013

											
										
										
											2019-08-15 23:54:37 +08:00
+								    EntryBuilder = std::make_unique<MachineIRBuilder>();
 								    CurBuilder = std::make_unique<MachineIRBuilder>();
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								  }
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  CLI = MF->getSubtarget().getCallLowering();
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								  CurBuilder->setMF(*MF);
 								  EntryBuilder->setMF(*MF);
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  MRI = &MF->getRegInfo();
-												GlobalISel: implement alloca instruction

llvm-svn: 276433

											
										
										
											2016-07-23 00:59:52 +08:00
+								  DL = &F.getParent()->getDataLayout();
-												[llvm] Migrate llvm::make_unique to std::make_unique

Now that we've moved to C++14, we no longer need the llvm::make_unique
implementation from STLExtras.h. This patch is a mechanical replacement
of (hopefully) all the llvm::make_unique instances across the monorepo.

llvm-svn: 369013

											
										
										
											2019-08-15 23:54:37 +08:00
+								  ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
-												[GlobalISel] Enable usage of BranchProbabilityInfo in IRTranslator.

We weren't using this before, so none of the MachineFunction CFG edges had the
branch probability information added. As a result, block placement later in the
pipeline was flying blind.

This is enabled only with optimizations enabled like SelectionDAG.

Differential Revision: https://reviews.llvm.org/D86824

											
										
										
											2020-08-29 07:21:34 +08:00
+								  const TargetMachine &TM = MF->getTarget();
-												GlobalISel/IRTranslator resetTargetOptions based on function attributes

Update TargetMachine.Options with function attributes before we start
to generate MIR instructions. This allows access to correct function
attributes via TargetMachine.Options (it used to access attributes of
the function that was translated first).
This affects some existing tests with "no-nans-fp-math" attribute.
Follow-up on D87456.

Differential Revision: https://reviews.llvm.org/D87511

											
										
										
											2020-09-15 16:25:38 +08:00
+								  TM.resetTargetOptions(F);
-												[GlobalISel] Enable usage of BranchProbabilityInfo in IRTranslator.

We weren't using this before, so none of the MachineFunction CFG edges had the
branch probability information added. As a result, block placement later in the
pipeline was flying blind.

This is enabled only with optimizations enabled like SelectionDAG.

Differential Revision: https://reviews.llvm.org/D86824

											
										
										
											2020-08-29 07:21:34 +08:00
+								  EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F);
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								  FuncInfo.MF = MF;
-												[GlobalISel] Enable usage of BranchProbabilityInfo in IRTranslator.

We weren't using this before, so none of the MachineFunction CFG edges had the
branch probability information added. As a result, block placement later in the
pipeline was flying blind.

This is enabled only with optimizations enabled like SelectionDAG.

Differential Revision: https://reviews.llvm.org/D86824

											
										
										
											2020-08-29 07:21:34 +08:00
+								  if (EnableOpts)
 								    FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
 								  else
 								    FuncInfo.BPI = nullptr;
-												[GlobalISel] Base implementation for sret demotion.

If the return values can't be lowered to registers
SelectionDAG performs the sret demotion. This patch
contains the basic implementation for the same in
the GlobalISel pipeline.

Furthermore, targets should bring relevant changes
during lowerFormalArguments, lowerReturn and
lowerCall to make use of this feature.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D92953

											
										
										
											2020-12-23 14:52:36 +08:00
+								  FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF);
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								  const auto &TLI = *MF->getSubtarget().getTargetLowering();
-												[GlobalISel] Enable usage of BranchProbabilityInfo in IRTranslator.

We weren't using this before, so none of the MachineFunction CFG edges had the
branch probability information added. As a result, block placement later in the
pipeline was flying blind.

This is enabled only with optimizations enabled like SelectionDAG.

Differential Revision: https://reviews.llvm.org/D86824

											
										
										
											2020-08-29 07:21:34 +08:00
-												[llvm] Migrate llvm::make_unique to std::make_unique

Now that we've moved to C++14, we no longer need the llvm::make_unique
implementation from STLExtras.h. This patch is a mechanical replacement
of (hopefully) all the llvm::make_unique instances across the monorepo.

llvm-svn: 369013

											
										
										
											2019-08-15 23:54:37 +08:00
+								  SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo);
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
+								  SL->init(TLI, TM, *DL);
-												[GlobalISel] Enable usage of BranchProbabilityInfo in IRTranslator.

We weren't using this before, so none of the MachineFunction CFG edges had the
branch probability information added. As a result, block placement later in the
pipeline was flying blind.

This is enabled only with optimizations enabled like SelectionDAG.

Differential Revision: https://reviews.llvm.org/D86824

											
										
										
											2020-08-29 07:21:34 +08:00
-												GlobalISel: implement alloca instruction

llvm-svn: 276433

											
										
										
											2016-07-23 00:59:52 +08:00
-												GlobalISel: clear pending phis after MachineFunction translated

Test is just reordering the existing functions (it would trigger for any
function after one with a phi).

llvm-svn: 277841

											
										
										
											2016-08-06 01:50:36 +08:00
+								  assert(PendingPHIs.empty() && "stale PHIs");
-												[GlobalISel] Disable GISel for big endian.

This is due to PR26161 needing to be resolved before we can fix
big endian bugs like PR35359. The work to split aggregates into smaller LLTs
instead of using one large scalar will take some time, so in the mean time
we'll fall back to SDAG.

Some ARM BE tests xfailed for now as a result.

Differential Revision: https://reviews.llvm.org/D40789

llvm-svn: 320388

											
										
										
											2017-12-12 00:58:29 +08:00
+								  if (!DL->isLittleEndian()) {
 								    // Currently we don't properly handle big endian code.
 								    OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
-												MachineFunction: Return reference from getFunction(); NFC

The Function can never be nullptr so we can return a reference.

llvm-svn: 320884

											
										
										
											2017-12-16 06:22:58 +08:00
+								                               F.getSubprogram(), &F.getEntryBlock());
-												[GlobalISel] Disable GISel for big endian.

This is due to PR26161 needing to be resolved before we can fix
big endian bugs like PR35359. The work to split aggregates into smaller LLTs
instead of using one large scalar will take some time, so in the mean time
we'll fall back to SDAG.

Some ARM BE tests xfailed for now as a result.

Differential Revision: https://reviews.llvm.org/D40789

llvm-svn: 320388

											
										
										
											2017-12-12 00:58:29 +08:00
+								    R << "unable to translate in big endian mode";
 								    reportTranslationError(*MF, *TPC, *ORE, R);
 								  }
-												[GlobalISel] Finalize translated function on scope exit. NFC.

This is the compromise between having a per-function IRTranslator
and manually managing the per-function state.

llvm-svn: 296046

											
										
										
											2017-02-24 07:57:28 +08:00
+								  // Release the per-function state when we return, whether we succeeded or not.
 								  auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); });
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								  // Setup a separate basic-block for the arguments and constants
-												GlobalISel: store the current MachineFunction as direct state. NFC.

Having to ask the MIRBuilder for the current function is a little awkward, and
I'm intending to improve how that's threaded through anyway.

llvm-svn: 288983

											
										
										
											2016-12-08 05:17:47 +08:00
+								  MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
 								  MF->push_back(EntryBB);
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								  EntryBuilder->setMBB(*EntryBB);
-												GlobalISel: simplify MachineIRBuilder interface.

MachineIRBuilder had weird before/after and beginning/end flags for the insert
point. Unfortunately the non-default means that instructions will be inserted
in reverse order which is almost never what anyone wants.

Really, I think we just want (like IRBuilder has) the ability to insert at any
C++ iterator-style point (i.e. before any instruction or before MBB.end()). So
this fixes MIRBuilders to behave like IRBuilders in this respect.

llvm-svn: 288980

											
										
										
											2016-12-08 05:05:38 +08:00
-												GlobalISel: support swifterror attribute on AArch64.

swifterror marks an argument as a register pretending to be a pointer, so we
need a guaranteed mem2reg-like analysis of its uses. Fortunately most of the
infrastructure can be reused from the DAG world.

llvm-svn: 361608

											
										
										
											2019-05-24 16:40:13 +08:00
+								  DebugLoc DbgLoc = F.getEntryBlock().getFirstNonPHI()->getDebugLoc();
 								  SwiftError.setFunction(CurMF);
 								  SwiftError.createEntriesInEntryBlock(DbgLoc);
-												[AArch64][GlobalISel] Support lowering variadic musttail calls

This adds support for lowering variadic musttail calls. To do this, we have
to...

- Detect a musttail call in a variadic function before attempting to lower the
  call's formal arguments. This is done in the IRTranslator.
- Compute forwarded registers in `lowerFormalArguments`, and add copies for
  those registers.
- Restore the forwarded registers in `lowerTailCall`.

Because there doesn't seem to be any nice way to wrap these up into the outgoing
argument handler, the restore code in `lowerTailCall` is done separately.

Also, irritatingly, you have to make sure that the registers don't overlap with
any passed parameters. Otherwise, the scheduler doesn't know what to do with the
extra copies and asserts.

Add call-translator-variadic-musttail.ll to test this. This is pretty much the
same as the X86 musttail-varargs.ll test. We didn't have as nice of a test to
base this off of, but the idea is the same.

Differential Revision: https://reviews.llvm.org/D68043

llvm-svn: 373226

											
										
										
											2019-10-01 00:49:13 +08:00
+								  bool IsVarArg = F.isVarArg();
 								  bool HasMustTailInVarArgFn = false;
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								  // Create all blocks, in IR order, to preserve the layout.
 								  for (const BasicBlock &BB: F) {
 								    auto *&MBB = BBToMBB[&BB];
 								    MBB = MF->CreateMachineBasicBlock(&BB);
 								    MF->push_back(MBB);
 								    if (BB.hasAddressTaken())
 								      MBB->setHasAddressTaken();
-												[AArch64][GlobalISel] Support lowering variadic musttail calls

This adds support for lowering variadic musttail calls. To do this, we have
to...

- Detect a musttail call in a variadic function before attempting to lower the
  call's formal arguments. This is done in the IRTranslator.
- Compute forwarded registers in `lowerFormalArguments`, and add copies for
  those registers.
- Restore the forwarded registers in `lowerTailCall`.

Because there doesn't seem to be any nice way to wrap these up into the outgoing
argument handler, the restore code in `lowerTailCall` is done separately.

Also, irritatingly, you have to make sure that the registers don't overlap with
any passed parameters. Otherwise, the scheduler doesn't know what to do with the
extra copies and asserts.

Add call-translator-variadic-musttail.ll to test this. This is pretty much the
same as the X86 musttail-varargs.ll test. We didn't have as nice of a test to
base this off of, but the idea is the same.

Differential Revision: https://reviews.llvm.org/D68043

llvm-svn: 373226

											
										
										
											2019-10-01 00:49:13 +08:00
 								    if (!HasMustTailInVarArgFn)
 								      HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB);
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								  }
-												[AArch64][GlobalISel] Support lowering variadic musttail calls

This adds support for lowering variadic musttail calls. To do this, we have
to...

- Detect a musttail call in a variadic function before attempting to lower the
  call's formal arguments. This is done in the IRTranslator.
- Compute forwarded registers in `lowerFormalArguments`, and add copies for
  those registers.
- Restore the forwarded registers in `lowerTailCall`.

Because there doesn't seem to be any nice way to wrap these up into the outgoing
argument handler, the restore code in `lowerTailCall` is done separately.

Also, irritatingly, you have to make sure that the registers don't overlap with
any passed parameters. Otherwise, the scheduler doesn't know what to do with the
extra copies and asserts.

Add call-translator-variadic-musttail.ll to test this. This is pretty much the
same as the X86 musttail-varargs.ll test. We didn't have as nice of a test to
base this off of, but the idea is the same.

Differential Revision: https://reviews.llvm.org/D68043

llvm-svn: 373226

											
										
										
											2019-10-01 00:49:13 +08:00
+								  MF->getFrameInfo().setHasMustTailInVarArgFunc(HasMustTailInVarArgFn);
-												[GlobalISel] Preserve IR block layout.

It makes the output function layout more predictable;  the layout has
an effect on performance, we don't want it to be at the mercy of the
translator's visitation order and such.
The predictable output is also easier to digest.

getOrCreateBB isn't appropriately named anymore, as it never needs to
create anything.  Rename it and extract the MBB creation logic out of it.

A couple tests were sensitive to the order. Update them.

llvm-svn: 297868

											
										
										
											2017-03-16 02:22:33 +08:00
+								  // Make our arguments/constants entry block fallthrough to the IR entry block.
 								  EntryBB->addSuccessor(&getMBB(F.front()));
-												[AArch64][GlobalISel] Fall back if disabling neon/fp in the translator.

The previous technique relied on early-exiting the legalizer predicate
initialization, leaving an empty rule table. That causes a fallback
for most instructions, but some have legacy rules defined like G_ZEXT
which can try continue, but then crash.

We should fall back earlier, in the translator, to avoid this issue.

Differential Revision: https://reviews.llvm.org/D98730

											
										
										
											2021-03-17 02:56:32 +08:00
+								  if (CLI->fallBackToDAGISel(*MF)) {
-												[SVE][CodeGen] Fix bug when falling back to DAG ISel

In an earlier commit 584d0d5c1749c13625a5d322178ccb4121eea610 I
added functionality to allow AArch64 CodeGen support for falling
back to DAG ISel when Global ISel encounters scalable vector
types. However, it seems that we were not falling back early
enough as llvm::getLLTForType was still being invoked for scalable
vector types.

I've added a new fallback function to the call lowering class in
order to catch this problem early enough, rather than wait for
lowerFormalArguments to reject scalable vector types.

Differential Revision: https://reviews.llvm.org/D82524

											
										
										
											2020-06-25 15:19:49 +08:00
+								    OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
 								                               F.getSubprogram(), &F.getEntryBlock());
 								    R << "unable to lower function: " << ore::NV("Prototype", F.getType());
 								    reportTranslationError(*MF, *TPC, *ORE, R);
 								    return false;
 								  }
-												GlobalISel: simplify MachineIRBuilder interface.

MachineIRBuilder had weird before/after and beginning/end flags for the insert
point. Unfortunately the non-default means that instructions will be inserted
in reverse order which is almost never what anyone wants.

Really, I think we just want (like IRBuilder has) the ability to insert at any
C++ iterator-style point (i.e. before any instruction or before MBB.end()). So
this fixes MIRBuilders to behave like IRBuilders in this respect.

llvm-svn: 288980

											
										
										
											2016-12-08 05:05:38 +08:00
+								  // Lower the actual args into this basic block.
-												[GlobalISel] Accept multiple vregs in lowerFormalArgs

Change the interface of CallLowering::lowerFormalArguments to accept
several virtual registers for each formal argument, instead of just one.
This is a follow-up to D46018.

CallLowering::lowerReturn was similarly refactored in D49660. lowerCall
will be refactored in the same way in follow-up patches.

With this change, we forward the virtual registers generated for
aggregates to CallLowering. Therefore, the target can decide itself
whether it wants to handle them as separate pieces or use one big
register. We also copy the pack/unpackRegs helpers to CallLowering to
facilitate this.

ARM and AArch64 have been updated to use the passed in virtual registers
directly, which means we no longer need to generate so many
merge/extract instructions.

AArch64 seems to have had a bug when lowering e.g. [1 x i8*], which was
put into a s64 instead of a p0. Added a test-case which illustrates the
problem more clearly (it crashes without this patch) and fixed the
existing test-case to expect p0.

AMDGPU has been updated to unpack into the virtual registers for
kernels. I think the other code paths fall back for aggregates, so this
should be NFC.

Mips doesn't support aggregates yet, so it's also NFC.

x86 seems to have code for dealing with aggregates, but I couldn't find
the tests for it, so I just added a fallback to DAGISel if we get more
than one virtual register for an argument.

Differential Revision: https://reviews.llvm.org/D63549

llvm-svn: 364510

											
										
										
											2019-06-27 16:54:17 +08:00
+								  SmallVector<ArrayRef<Register>, 8> VRegArgs;
-												[GlobalISel][IRTranslator] Fix crash during translation of zero sized loads/stores/args/returns.

This fixes PR35358.

rdar://35619533

Differential Revision: https://reviews.llvm.org/D40604

llvm-svn: 319465

											
										
										
											2017-12-01 04:06:02 +08:00
+								  for (const Argument &Arg: F.args()) {
-												[CodeGen][SVE] Don't drop scalable flag in DAGCombiner::visitEXTRACT_SUBVECTOR

There was a rogue 'assert' in AArch64ISelLowering for the tuple.get intrinsics,
that shouldn't really have been there (I suspect this was a remnant from when
we expected the wider vector always to have come from a vector CONCAT).

When I tried to create a more minimal reproducer, I found a bug in
DAGCombiner where it drops the scalable flag when trying to fold:

      extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')

This patch fixes both issues.

Reviewers: david-arm, efriedma, spatel

Reviewed By: efriedma

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D82910

											
										
										
											2020-07-02 17:06:41 +08:00
+								    if (DL->getTypeStoreSize(Arg.getType()).isZero())
-												[GlobalISel][IRTranslator] Fix crash during translation of zero sized loads/stores/args/returns.

This fixes PR35358.

rdar://35619533

Differential Revision: https://reviews.llvm.org/D40604

llvm-svn: 319465

											
										
										
											2017-12-01 04:06:02 +08:00
+								      continue; // Don't handle zero sized types.
-												[GlobalISel] Accept multiple vregs in lowerFormalArgs

Change the interface of CallLowering::lowerFormalArguments to accept
several virtual registers for each formal argument, instead of just one.
This is a follow-up to D46018.

CallLowering::lowerReturn was similarly refactored in D49660. lowerCall
will be refactored in the same way in follow-up patches.

With this change, we forward the virtual registers generated for
aggregates to CallLowering. Therefore, the target can decide itself
whether it wants to handle them as separate pieces or use one big
register. We also copy the pack/unpackRegs helpers to CallLowering to
facilitate this.

ARM and AArch64 have been updated to use the passed in virtual registers
directly, which means we no longer need to generate so many
merge/extract instructions.

AArch64 seems to have had a bug when lowering e.g. [1 x i8*], which was
put into a s64 instead of a p0. Added a test-case which illustrates the
problem more clearly (it crashes without this patch) and fixed the
existing test-case to expect p0.

AMDGPU has been updated to unpack into the virtual registers for
kernels. I think the other code paths fall back for aggregates, so this
should be NFC.

Mips doesn't support aggregates yet, so it's also NFC.

x86 seems to have code for dealing with aggregates, but I couldn't find
the tests for it, so I just added a fallback to DAGISel if we get more
than one virtual register for an argument.

Differential Revision: https://reviews.llvm.org/D63549

llvm-svn: 364510

											
										
										
											2019-06-27 16:54:17 +08:00
+								    ArrayRef<Register> VRegs = getOrCreateVRegs(Arg);
 								    VRegArgs.push_back(VRegs);
-												GlobalISel: support swifterror attribute on AArch64.

swifterror marks an argument as a register pretending to be a pointer, so we
need a guaranteed mem2reg-like analysis of its uses. Fortunately most of the
infrastructure can be reused from the DAG world.

llvm-svn: 361608

											
										
										
											2019-05-24 16:40:13 +08:00
-												[GlobalISel] Accept multiple vregs in lowerFormalArgs

Change the interface of CallLowering::lowerFormalArguments to accept
several virtual registers for each formal argument, instead of just one.
This is a follow-up to D46018.

CallLowering::lowerReturn was similarly refactored in D49660. lowerCall
will be refactored in the same way in follow-up patches.

With this change, we forward the virtual registers generated for
aggregates to CallLowering. Therefore, the target can decide itself
whether it wants to handle them as separate pieces or use one big
register. We also copy the pack/unpackRegs helpers to CallLowering to
facilitate this.

ARM and AArch64 have been updated to use the passed in virtual registers
directly, which means we no longer need to generate so many
merge/extract instructions.

AArch64 seems to have had a bug when lowering e.g. [1 x i8*], which was
put into a s64 instead of a p0. Added a test-case which illustrates the
problem more clearly (it crashes without this patch) and fixed the
existing test-case to expect p0.

AMDGPU has been updated to unpack into the virtual registers for
kernels. I think the other code paths fall back for aggregates, so this
should be NFC.

Mips doesn't support aggregates yet, so it's also NFC.

x86 seems to have code for dealing with aggregates, but I couldn't find
the tests for it, so I just added a fallback to DAGISel if we get more
than one virtual register for an argument.

Differential Revision: https://reviews.llvm.org/D63549

llvm-svn: 364510

											
										
										
											2019-06-27 16:54:17 +08:00
+								    if (Arg.hasSwiftErrorAttr()) {
 								      assert(VRegs.size() == 1 && "Too many vregs for Swift error");
 								      SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(), VRegs[0]);
 								    }
-												[GlobalISel][IRTranslator] Fix crash during translation of zero sized loads/stores/args/returns.

This fixes PR35358.

rdar://35619533

Differential Revision: https://reviews.llvm.org/D40604

llvm-svn: 319465

											
										
										
											2017-12-01 04:06:02 +08:00
+								  }
-												[GlobalISel][IRTranslator] Split aggregates during IR translation.

We currently handle all aggregates by creating one large LLT, and letting the
legalizer deal with splitting them up. However using this approach means that
we can't support big endian code correctly.

This patch changes the way that the IRTranslator deals with aggregate values,
by splitting them up into their constituent element values. To do this, parts
of the translator need to be modified to deal with multiple VRegs for a single
Value.

A new Value to VReg mapper is introduced to help keep compile time under
control, currently there is no measurable impact on CTMark despite the extra
code being generated in some cases.

Patch is based on the original work of Tim Northover.

Differential Revision: https://reviews.llvm.org/D46018

llvm-svn: 332449

											
										
										
											2018-05-16 18:32:02 +08:00
-												[GlobalISel] Base implementation for sret demotion.

If the return values can't be lowered to registers
SelectionDAG performs the sret demotion. This patch
contains the basic implementation for the same in
the GlobalISel pipeline.

Furthermore, targets should bring relevant changes
during lowerFormalArguments, lowerReturn and
lowerCall to make use of this feature.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D92953

											
										
										
											2020-12-23 14:52:36 +08:00
+								  if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) {
-												[GlobalISel] Use the DISubprogram for translation failure remarks.

Justin added support for DISubprogram locs in r295531 and r296052.
Use that instead of no-loc for constants and arguments.

llvm-svn: 296058

											
										
										
											2017-02-24 08:34:44 +08:00
+								    OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
-												MachineFunction: Return reference from getFunction(); NFC

The Function can never be nullptr so we can return a reference.

llvm-svn: 320884

											
										
										
											2017-12-16 06:22:58 +08:00
+								                               F.getSubprogram(), &F.getEntryBlock());
-												[GlobalISel] Emit opt remarks on isel fallbacks.

Having more fine-grained information on the specific construct that
caused us to fallback is valuable for large-scale data collection.

We still have the fallback warning, that's also used for FastISel.
We still need to remove the fallback warning, and teach FastISel to also
emit remarks (it currently has a combination of the warning, stats, and
debug prints: the remarks could unify all three).

The abort-on-fallback path could also be better handled using remarks:
one could imagine a "-Rpass-error", analoguous to "-Werror", which would
promote missed/failed remarks to errors.  It's not clear whether that
would be useful for other remarks though, so we're not there yet.

llvm-svn: 296013

											
										
										
											2017-02-24 05:05:42 +08:00
+								    R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
 								    reportTranslationError(*MF, *TPC, *ORE, R);
 								    return false;
-												[IRTranslator] Do not abort when the target wants to fall back.

Every pass in the GlobalISel pipeline will need to do something similar.

llvm-svn: 279886

											
										
										
											2016-08-27 07:49:05 +08:00
+								  }
-												[GlobalISel] Add the necessary plumbing to lower formal arguments.

llvm-svn: 260579

											
										
										
											2016-02-12 03:59:41 +08:00
-												[GlobalISel][IRTranslator] Use RPO traversal when visiting blocks to translate.

Previously we were just visiting the blocks in the function in IR order, which
is rather arbitrary. Therefore we wouldn't always visit defs before uses, but
the translation code relies on this assumption in some places.

Only codegen change seen in tests is an elision of a redundant copy.

Fixes PR38396

llvm-svn: 338476

											
										
										
											2018-08-01 10:17:42 +08:00
+								  // Need to visit defs before uses when translating instructions.
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								  GISelObserverWrapper WrapperObserver;
 								  if (EnableCSE && CSEInfo)
 								    WrapperObserver.addObserver(CSEInfo);
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								  {
 								    ReversePostOrderTraversal<const Function *> RPOT(&F);
 								#ifndef NDEBUG
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								    DILocationVerifier Verifier;
 								    WrapperObserver.addObserver(&Verifier);
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								#endif // ifndef NDEBUG
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								    RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
-												[GlobalISel]: Fix some non determinism exposed in CSE due to not notifying observers about mutations + add verification for CSE

https://reviews.llvm.org/D67133

While investigating some non determinism (CSE doesn't produce wrong
code, it just doesn't CSE some times) in GISel CSE on an out of tree
target, I realized that the core issue was that there were lots of code
that mutates (setReg, setRegClass etc), but doesn't notify observers
(CSE in this case but this could be any other observer). In order to
make the Observer be available in various parts of code and to avoid
having to thread it through various API, the MachineFunction now has the
observer as field. This allows it to be easily used in helper functions
such as constrainOperandRegClass.
Also added some invariant verification method in CSEInfo which can
catch these issues (when CSE is enabled).

											
										
										
											2020-02-19 06:42:49 +08:00
+								    RAIIMFObserverInstaller ObsInstall(*MF, WrapperObserver);
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								    for (const BasicBlock *BB : RPOT) {
 								      MachineBasicBlock &MBB = getMBB(*BB);
 								      // Set the insertion point of all the following translations to
 								      // the end of this basic block.
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								      CurBuilder->setMBB(MBB);
-												[GlobalISel] When a tail call is emitted in a block, stop translating it

This fixes a crash in tail call translation caused by assume and lifetime_end
intrinsics.

It's possible to have instructions other than a return after a tail call which
will still have `Analysis::isInTailCallPosition` return true. (Namely,
lifetime_end and assume intrinsics.)

If we emit a tail call, we should stop translating instructions in the block.
Otherwise, we can end up emitting an extra return, or dead instructions in
general. This makes the verifier unhappy, and is generally unfortunate for
codegen.

This also removes the code from AArch64CallLowering that checks if we have a
tail call when lowering a return. This is covered by the new code now.

Also update call-translator-tail-call.ll to show that we now properly tail call
in the presence of lifetime_end and assume.

Differential Revision: https://reviews.llvm.org/D67415

llvm-svn: 371572

											
										
										
											2019-09-11 07:34:45 +08:00
+								      HasTailCall = false;
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								      for (const Instruction &Inst : *BB) {
-												[GlobalISel] When a tail call is emitted in a block, stop translating it

This fixes a crash in tail call translation caused by assume and lifetime_end
intrinsics.

It's possible to have instructions other than a return after a tail call which
will still have `Analysis::isInTailCallPosition` return true. (Namely,
lifetime_end and assume intrinsics.)

If we emit a tail call, we should stop translating instructions in the block.
Otherwise, we can end up emitting an extra return, or dead instructions in
general. This makes the verifier unhappy, and is generally unfortunate for
codegen.

This also removes the code from AArch64CallLowering that checks if we have a
tail call when lowering a return. This is covered by the new code now.

Also update call-translator-tail-call.ll to show that we now properly tail call
in the presence of lifetime_end and assume.

Differential Revision: https://reviews.llvm.org/D67415

llvm-svn: 371572

											
										
										
											2019-09-11 07:34:45 +08:00
+								        // If we translated a tail call in the last step, then we know
 								        // everything after the call is either a return, or something that is
 								        // handled by the call itself. (E.g. a lifetime marker or assume
 								        // intrinsic.) In this case, we should stop translating the block and
 								        // move on.
 								        if (HasTailCall)
 								          break;
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								#ifndef NDEBUG
 								        Verifier.setCurrentInst(&Inst);
 								#endif // ifndef NDEBUG
 								        if (translate(Inst))
 								          continue;
 								        OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
 								                                   Inst.getDebugLoc(), BB);
 								        R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
 								        if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
 								          std::string InstStrStorage;
 								          raw_string_ostream InstStr(InstStrStorage);
 								          InstStr << Inst;
 								          R << ": '" << InstStr.str() << "'";
 								        }
-												[GlobalISel] Only build expensive remarks if they're enabled. NFC.

r313390 taught 'allowExtraAnalysis' to check whether remarks are
enabled at all.  Use that to only do the expensive instruction printing
if they are.

llvm-svn: 313552

											
										
										
											2017-09-19 02:50:09 +08:00
-												[globalisel][irtranslator] Verify that DILocations aren't lost in translation

Summary:
Also fix a couple bugs where DILocations are lost. EntryBuilder wasn't passing
on debug locations for PHI's, constants, GLOBAL_VALUE, etc.

Reviewers: aprantl, vsk, bogner, aditya_nandakumar, volkan, rtereshin, aemerson

Reviewed By: aemerson

Subscribers: aemerson, rovka, kristof.beyls, javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D53740

llvm-svn: 345743

											
										
										
											2018-11-01 01:31:23 +08:00
+								        reportTranslationError(*MF, *TPC, *ORE, R);
 								        return false;
-												[GlobalISel] Only build expensive remarks if they're enabled. NFC.

r313390 taught 'allowExtraAnalysis' to check whether remarks are
enabled at all.  Use that to only do the expensive instruction printing
if they are.

llvm-svn: 313552

											
										
										
											2017-09-19 02:50:09 +08:00
+								      }
-												[GlobalISel][IRTranslator] Change switch table translation to generate jump tables and range checks.

This change makes use of the newly refactored SwitchLoweringUtils code from
SelectionDAG to in order to generate jump tables and range checks where appropriate.

Much of this code is ported from SDAG with some modifications. We generate
G_JUMP_TABLE and G_BRJT instructions when JT opportunities are found. This means
that targets which previously relied on the naive one MBB per case stmt
translation will now start falling back until they add support for the new opcodes.

For range checks, we don't generate any previously unused operations. This
just recognizes contiguous ranges of case values and generates a single block per
range. Single case value blocks are just a special case of ranges so we get that
support almost for free.

There are still some optimizations missing that I haven't ported over, and
bit-tests are also unimplemented. This patch series is already complex enough.

Actual arm64 support for selection of jump tables is coming in a later patch.

Differential Revision: https://reviews.llvm.org/D63169

llvm-svn: 364085

											
										
										
											2019-06-22 02:10:38 +08:00
 								      finalizeBasicBlock();
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
+								    }
-												[GISel]: Add support for CSEing continuously during GISel passes.

https://reviews.llvm.org/D52803

This patch adds support to continuously CSE instructions during
each of the GISel passes. It consists of a GISelCSEInfo analysis pass
that can be used by the CSEMIRBuilder.

llvm-svn: 351283

											
										
										
											2019-01-16 08:40:37 +08:00
+								#ifndef NDEBUG
 								    WrapperObserver.removeObserver(&Verifier);
 								#endif
-												[GlobalISel] More detailed skeleton for the IRTranslator.

llvm-svn: 260456

											
										
										
											2016-02-11 06:59:27 +08:00
+								  }
-												GlobalISel: freeze reserved regs after IRTranslator.

We can freeze the registers after the MachineFrameInfo has been configured (by
telling it about calls, inline asm, ...). This doesn't happen at all yet, but
will be part of IR translation.

Fixes -verify-machineinstrs assertion.

llvm-svn: 275221

											
										
										
											2016-07-13 06:23:42 +08:00
-												[GlobalISel] Don't translate other blocks when one failed.

We were stopping the translation of the parent block when the
translation of an instruction failed, but we were still trying to
translate the other blocks of the parent function.

Don't do that.

llvm-svn: 296047

											
										
										
											2017-02-24 07:57:36 +08:00
+								  finishPendingPhis();
-												GlobalISel: support swifterror attribute on AArch64.

swifterror marks an argument as a register pretending to be a pointer, so we
need a guaranteed mem2reg-like analysis of its uses. Fortunately most of the
infrastructure can be reused from the DAG world.

llvm-svn: 361608

											
										
										
											2019-05-24 16:40:13 +08:00
+								  SwiftError.propagateVRegs();
-												[GlobalISel] Don't translate other blocks when one failed.

We were stopping the translation of the parent block when the
translation of an instruction failed, but we were still trying to
translate the other blocks of the parent function.

Don't do that.

llvm-svn: 296047

											
										
										
											2017-02-24 07:57:36 +08:00
+								  // Merge the argument lowering and constants block with its single
 								  // successor, the LLVM-IR entry block.  We want the basic block to
 								  // be maximal.
 								  assert(EntryBB->succ_size() == 1 &&
 								         "Custom BB used for lowering should have only one successor");
 								  // Get the successor of the current entry block.
 								  MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
 								  assert(NewEntryBB.pred_size() == 1 &&
 								         "LLVM-IR entry block has a predecessor!?");
 								  // Move all the instruction from the current entry block to the
 								  // new entry block.
 								  NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
 								                    EntryBB->end());
 								  // Update the live-in information for the new entry block.
 								  for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
 								    NewEntryBB.addLiveIn(LiveIn);
 								  NewEntryBB.sortUniqueLiveIns();
 								  // Get rid of the now empty basic block.
 								  EntryBB->removeSuccessor(&NewEntryBB);
 								  MF->remove(EntryBB);
 								  MF->DeleteMachineBasicBlock(EntryBB);
 								  assert(&MF->front() == &NewEntryBB &&
 								         "New entry wasn't next in the list of basic block!");
-												GlobalISel: IRTranslate PHI instructions

llvm-svn: 277835

											
										
										
											2016-08-06 01:16:40 +08:00
-												CodeGen: Remove pipeline dependencies on StackProtector; NFC

This re-applies r336929 with a fix to accomodate for the Mips target
scheduling multiple SelectionDAG instances into the pass pipeline.

PrologEpilogInserter and StackColoring depend on the StackProtector analysis
being alive from the point it is run until PEI, which requires that they are all
scheduled in the same FunctionPassManager. Inserting a (machine) ModulePass
between StackProtector and PEI results in these passes being in separate
FunctionPassManagers and the StackProtector is not available for PEI.

PEI and StackColoring don't use much information from the StackProtector pass,
so transfering the required information to MachineFrameInfo is cleaner than
keeping the StackProtector pass around. This commit moves the SSP layout
information to MFI instead of keeping it in the pass.

This patch set (D37580, D37581, D37582, D37583, D37584, D37585, D37586, D37587)
is a first draft of the pagerando implementation described in
http://lists.llvm.org/pipermail/llvm-dev/2017-June/113794.html.

Patch by Stephen Crane <sjc@immunant.com>

Differential Revision: https://reviews.llvm.org/D49256

llvm-svn: 336964

											
										
										
											2018-07-13 08:08:38 +08:00
+								  // Initialize stack protector information.
 								  StackProtector &SP = getAnalysis<StackProtector>();
 								  SP.copyToMachineFrameInfo(MF->getFrameInfo());
-												[GlobalISel] Add the proper cmake plumbing.

This patch adds the necessary plumbing to cmake to build the sources related to
GlobalISel.

To build the sources related to GlobalISel, we need to add -DBUILD_GLOBAL_ISEL=ON.
By default, this is OFF, thus GlobalISel sources will not impact people that do
not explicitly opt-in.

Differential Revision: http://reviews.llvm.org/D15983

llvm-svn: 258344

											
										
										
											2016-01-21 04:58:56 +08:00
+								  return false;
 								}