llvm-project/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

//===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
//===----------------------------------------------------------------------===//
//

#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"

#define DEBUG_TYPE "si-fold-operands"
using namespace llvm;

namespace {

class SIFoldOperands : public MachineFunctionPass {
public:
  static char ID;

public:
  SIFoldOperands() : MachineFunctionPass(ID) {
    initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
  }

  bool runOnMachineFunction(MachineFunction &MF) override;

  const char *getPassName() const override {
    return "SI Fold Operands";
  }

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<MachineDominatorTree>();
    AU.setPreservesCFG();
    MachineFunctionPass::getAnalysisUsage(AU);
  }
};

struct FoldCandidate {
  MachineInstr *UseMI;
  unsigned UseOpNo;
  MachineOperand *OpToFold;
  uint64_t ImmToFold;

  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
                UseMI(MI), UseOpNo(OpNo) {

    if (FoldOp->isImm()) {
      OpToFold = nullptr;
      ImmToFold = FoldOp->getImm();
    } else {
      assert(FoldOp->isReg());
      OpToFold = FoldOp;
    }
  }

  bool isImm() const {
    return !OpToFold;
  }
};

} // End anonymous namespace.

INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,
                      "SI Fold Operands", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE,
                    "SI Fold Operands", false, false)

char SIFoldOperands::ID = 0;

char &llvm::SIFoldOperandsID = SIFoldOperands::ID;

FunctionPass *llvm::createSIFoldOperandsPass() {
  return new SIFoldOperands();
}

static bool isSafeToFold(unsigned Opcode) {
  switch(Opcode) {
  case AMDGPU::V_MOV_B32_e32:
  case AMDGPU::V_MOV_B32_e64:
  case AMDGPU::V_MOV_B64_PSEUDO:
  case AMDGPU::S_MOV_B32:
  case AMDGPU::S_MOV_B64:
  case AMDGPU::COPY:
    return true;
  default:
    return false;
  }
}

static bool updateOperand(FoldCandidate &Fold,
                          const TargetRegisterInfo &TRI) {
  MachineInstr *MI = Fold.UseMI;
  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
  assert(Old.isReg());

  if (Fold.isImm()) {
    Old.ChangeToImmediate(Fold.ImmToFold);
    return true;
  }

  MachineOperand *New = Fold.OpToFold;
  if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
      TargetRegisterInfo::isVirtualRegister(New->getReg())) {
    Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
    return true;
  }

  // FIXME: Handle physical registers.

  return false;
}

static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList,
                              const MachineInstr *MI) {
  for (auto Candidate : FoldList) {
    if (Candidate.UseMI == MI)
      return true;
  }
  return false;
}

static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
                             MachineInstr *MI, unsigned OpNo,
                             MachineOperand *OpToFold,
                             const SIInstrInfo *TII) {
  if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {

    // Special case for v_mac_f32_e64 if we are trying to fold into src2
    unsigned Opc = MI->getOpcode();
    if (Opc == AMDGPU::V_MAC_F32_e64 &&
        (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
      // Check if changing this to a v_mad_f32 instruction will allow us to
      // fold the operand.
      MI->setDesc(TII->get(AMDGPU::V_MAD_F32));
      bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
      if (FoldAsMAD) {
        MI->untieRegOperand(OpNo);
        return true;
      }
      MI->setDesc(TII->get(Opc));
    }

    // If we are already folding into another operand of MI, then
    // we can't commute the instruction, otherwise we risk making the
    // other fold illegal.
    if (isUseMIInFoldList(FoldList, MI))
      return false;

    // Operand is not legal, so try to commute the instruction to
    // see if this makes it possible to fold.
    unsigned CommuteIdx0;
    unsigned CommuteIdx1;
    bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);

    if (CanCommute) {
      if (CommuteIdx0 == OpNo)
        OpNo = CommuteIdx1;
      else if (CommuteIdx1 == OpNo)
        OpNo = CommuteIdx0;
    }

    if (!CanCommute || !TII->commuteInstruction(MI))
      return false;

    if (!TII->isOperandLegal(MI, OpNo, OpToFold))
      return false;
  }

  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
  return true;
}

static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
                        unsigned UseOpIdx,
                        std::vector<FoldCandidate> &FoldList,
                        const SIInstrInfo *TII, const SIRegisterInfo &TRI,
                        MachineRegisterInfo &MRI) {
  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);

  // FIXME: Fold operands with subregs.
  if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
      UseOp.isImplicit())) {
    return;
  }

  bool FoldingImm = OpToFold.isImm();
  APInt Imm;

  if (FoldingImm) {
    unsigned UseReg = UseOp.getReg();
    const TargetRegisterClass *UseRC
      = TargetRegisterInfo::isVirtualRegister(UseReg) ?
      MRI.getRegClass(UseReg) :
      TRI.getPhysRegClass(UseReg);

    Imm = APInt(64, OpToFold.getImm());

    const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode());
    const TargetRegisterClass *FoldRC =
        TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);

    // Split 64-bit constants into 32-bits for folding.
    if (FoldRC->getSize() == 8 && UseOp.getSubReg()) {
      if (UseRC->getSize() != 8)
        return;

      if (UseOp.getSubReg() == AMDGPU::sub0) {
        Imm = Imm.getLoBits(32);
      } else {
        assert(UseOp.getSubReg() == AMDGPU::sub1);
        Imm = Imm.getHiBits(32);
      }
    }

    // In order to fold immediates into copies, we need to change the
    // copy to a MOV.
    if (UseMI->getOpcode() == AMDGPU::COPY) {
      unsigned DestReg = UseMI->getOperand(0).getReg();
      const TargetRegisterClass *DestRC
        = TargetRegisterInfo::isVirtualRegister(DestReg) ?
        MRI.getRegClass(DestReg) :
        TRI.getPhysRegClass(DestReg);

      unsigned MovOp = TII->getMovOpcode(DestRC);
      if (MovOp == AMDGPU::COPY)
        return;

      UseMI->setDesc(TII->get(MovOp));
    }
  }

  // Special case for REG_SEQUENCE: We can't fold literals into
  // REG_SEQUENCE instructions, so we have to fold them into the
  // uses of REG_SEQUENCE.
  if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {
    unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
    unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();

    for (MachineRegisterInfo::use_iterator
         RSUse = MRI.use_begin(RegSeqDstReg),
         RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {

      MachineInstr *RSUseMI = RSUse->getParent();
      if (RSUse->getSubReg() != RegSeqDstSubReg)
        continue;

      foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
                  TII, TRI, MRI);
    }
    return;
  }

  const MCInstrDesc &UseDesc = UseMI->getDesc();

  // Don't fold into target independent nodes.  Target independent opcodes
  // don't have defined register classes.
  if (UseDesc.isVariadic() ||
      UseDesc.OpInfo[UseOpIdx].RegClass == -1)
    return;

  if (FoldingImm) {
    MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
    tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
    return;
  }

  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);

  // FIXME: We could try to change the instruction from 64-bit to 32-bit
  // to enable more folding opportunites.  The shrink operands pass
  // already does this.
  return;
}

bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const SIInstrInfo *TII =
      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                  BI != BE; ++BI) {

    MachineBasicBlock &MBB = *BI;
    MachineBasicBlock::iterator I, Next;
    for (I = MBB.begin(); I != MBB.end(); I = Next) {
      Next = std::next(I);
      MachineInstr &MI = *I;

      if (!isSafeToFold(MI.getOpcode()))
        continue;

      unsigned OpSize = TII->getOpSize(MI, 1);
      MachineOperand &OpToFold = MI.getOperand(1);
      bool FoldingImm = OpToFold.isImm();

      // FIXME: We could also be folding things like FrameIndexes and
      // TargetIndexes.
      if (!FoldingImm && !OpToFold.isReg())
        continue;

      // Folding immediates with more than one use will increase program size.
      // FIXME: This will also reduce register usage, which may be better
      // in some cases.  A better heuristic is needed.
      if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
          !MRI.hasOneUse(MI.getOperand(0).getReg()))
        continue;

      // FIXME: Fold operands with subregs.
      if (OpToFold.isReg() &&
          (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
           OpToFold.getSubReg()))
        continue;

      std::vector<FoldCandidate> FoldList;
      for (MachineRegisterInfo::use_iterator
           Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
           Use != E; ++Use) {

        MachineInstr *UseMI = Use->getParent();

        foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList,
                    TII, TRI, MRI);
      }

      for (FoldCandidate &Fold : FoldList) {
        if (updateOperand(Fold, TRI)) {
          // Clear kill flags.
          if (!Fold.isImm()) {
            assert(Fold.OpToFold && Fold.OpToFold->isReg());
            Fold.OpToFold->setIsKill(false);
          }
          DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
                Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
        }
      }
    }
  }
  return false;
}
R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`//===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`/// \file`
			`//===----------------------------------------------------------------------===//`
			`//`

			`#include "AMDGPU.h"`
			`#include "AMDGPUSubtarget.h"`
			`#include "SIInstrInfo.h"`
			`#include "llvm/CodeGen/LiveIntervalAnalysis.h"`
			`#include "llvm/CodeGen/MachineDominators.h"`
			`#include "llvm/CodeGen/MachineFunctionPass.h"`
			`#include "llvm/CodeGen/MachineInstrBuilder.h"`
			`#include "llvm/CodeGen/MachineRegisterInfo.h"`
			`#include "llvm/IR/Function.h"`
Re-sort includes with sort-includes.py and insert raw_ostream.h where it's used. llvm-svn: 232998 2015-03-24 03:32:43 +08:00			`#include "llvm/IR/LLVMContext.h"`
R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`#include "llvm/Support/Debug.h"`
Re-sort includes with sort-includes.py and insert raw_ostream.h where it's used. llvm-svn: 232998 2015-03-24 03:32:43 +08:00			`#include "llvm/Support/raw_ostream.h"`
R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`#include "llvm/Target/TargetMachine.h"`

			`#define DEBUG_TYPE "si-fold-operands"`
			`using namespace llvm;`

			`namespace {`

			`class SIFoldOperands : public MachineFunctionPass {`
			`public:`
			`static char ID;`

			`public:`
			`SIFoldOperands() : MachineFunctionPass(ID) {`
			`initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());`
			`}`

			`bool runOnMachineFunction(MachineFunction &MF) override;`

			`const char *getPassName() const override {`
			`return "SI Fold Operands";`
			`}`

			`void getAnalysisUsage(AnalysisUsage &AU) const override {`
			`AU.addRequired<MachineDominatorTree>();`
			`AU.setPreservesCFG();`
			`MachineFunctionPass::getAnalysisUsage(AU);`
			`}`
			`};`

R600/SI: Refactor SIFoldOperands to simplify immediate folding This will make a future patch much less intrusive. llvm-svn: 225358 2015-01-08 01:42:16 +08:00			`struct FoldCandidate {`
			`MachineInstr *UseMI;`
			`unsigned UseOpNo;`
			`MachineOperand *OpToFold;`
			`uint64_t ImmToFold;`

			`FoldCandidate(MachineInstr MI, unsigned OpNo, MachineOperand FoldOp) :`
R600/SI: Commute instructions to enable more folding opportunities llvm-svn: 225410 2015-01-08 06:44:19 +08:00			`UseMI(MI), UseOpNo(OpNo) {`

			`if (FoldOp->isImm()) {`
			`OpToFold = nullptr;`
			`ImmToFold = FoldOp->getImm();`
			`} else {`
			`assert(FoldOp->isReg());`
			`OpToFold = FoldOp;`
			`}`
			`}`
R600/SI: Refactor SIFoldOperands to simplify immediate folding This will make a future patch much less intrusive. llvm-svn: 225358 2015-01-08 01:42:16 +08:00
			`bool isImm() const {`
			`return !OpToFold;`
			`}`
			`};`

R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`} // End anonymous namespace.`

			`INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,`
			`"SI Fold Operands", false, false)`
			`INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)`
			`INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE,`
			`"SI Fold Operands", false, false)`

			`char SIFoldOperands::ID = 0;`

			`char &llvm::SIFoldOperandsID = SIFoldOperands::ID;`

			`FunctionPass *llvm::createSIFoldOperandsPass() {`
			`return new SIFoldOperands();`
			`}`

			`static bool isSafeToFold(unsigned Opcode) {`
			`switch(Opcode) {`
			`case AMDGPU::V_MOV_B32_e32:`
			`case AMDGPU::V_MOV_B32_e64:`
R600/SI: Add a V_MOV_B64 pseudo instruction This is used to simplify the SIFoldOperands pass and make it easier to fold immediates. llvm-svn: 225373 2015-01-08 04:27:25 +08:00			`case AMDGPU::V_MOV_B64_PSEUDO:`
R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`case AMDGPU::S_MOV_B32:`
			`case AMDGPU::S_MOV_B64:`
			`case AMDGPU::COPY:`
			`return true;`
			`default:`
			`return false;`
			`}`
			`}`

R600/SI: Refactor SIFoldOperands to simplify immediate folding This will make a future patch much less intrusive. llvm-svn: 225358 2015-01-08 01:42:16 +08:00			`static bool updateOperand(FoldCandidate &Fold,`
R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`const TargetRegisterInfo &TRI) {`
R600/SI: Refactor SIFoldOperands to simplify immediate folding This will make a future patch much less intrusive. llvm-svn: 225358 2015-01-08 01:42:16 +08:00			`MachineInstr *MI = Fold.UseMI;`
			`MachineOperand &Old = MI->getOperand(Fold.UseOpNo);`
R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`assert(Old.isReg());`

R600/SI: Refactor SIFoldOperands to simplify immediate folding This will make a future patch much less intrusive. llvm-svn: 225358 2015-01-08 01:42:16 +08:00			`if (Fold.isImm()) {`
			`Old.ChangeToImmediate(Fold.ImmToFold);`
R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`return true;`
			`}`

R600/SI: Refactor SIFoldOperands to simplify immediate folding This will make a future patch much less intrusive. llvm-svn: 225358 2015-01-08 01:42:16 +08:00			`MachineOperand *New = Fold.OpToFold;`
			`if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&`
			`TargetRegisterInfo::isVirtualRegister(New->getReg())) {`
			`Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);`
R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`return true;`
			`}`

			`// FIXME: Handle physical registers.`

			`return false;`
			`}`

AMDGPU/SI: Select mad patterns to v_mac_f32 The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 llvm-svn: 242038 2015-07-13 23:47:57 +08:00			`static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList,`
			`const MachineInstr *MI) {`
			`for (auto Candidate : FoldList) {`
			`if (Candidate.UseMI == MI)`
			`return true;`
			`}`
			`return false;`
			`}`

R600/SI: Commute instructions to enable more folding opportunities llvm-svn: 225410 2015-01-08 06:44:19 +08:00			`static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,`
			`MachineInstr *MI, unsigned OpNo,`
			`MachineOperand *OpToFold,`
			`const SIInstrInfo *TII) {`
			`if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {`
AMDGPU/SI: Select mad patterns to v_mac_f32 The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 llvm-svn: 242038 2015-07-13 23:47:57 +08:00
			`// Special case for v_mac_f32_e64 if we are trying to fold into src2`
			`unsigned Opc = MI->getOpcode();`
			`if (Opc == AMDGPU::V_MAC_F32_e64 &&`
			`(int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {`
			`// Check if changing this to a v_mad_f32 instruction will allow us to`
			`// fold the operand.`
			`MI->setDesc(TII->get(AMDGPU::V_MAD_F32));`
			`bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);`
			`if (FoldAsMAD) {`
			`MI->untieRegOperand(OpNo);`
			`return true;`
			`}`
			`MI->setDesc(TII->get(Opc));`
			`}`

			`// If we are already folding into another operand of MI, then`
			`// we can't commute the instruction, otherwise we risk making the`
			`// other fold illegal.`
			`if (isUseMIInFoldList(FoldList, MI))`
			`return false;`

R600/SI: Commute instructions to enable more folding opportunities llvm-svn: 225410 2015-01-08 06:44:19 +08:00			`// Operand is not legal, so try to commute the instruction to`
			`// see if this makes it possible to fold.`
			`unsigned CommuteIdx0;`
			`unsigned CommuteIdx1;`
			`bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);`

			`if (CanCommute) {`
			`if (CommuteIdx0 == OpNo)`
			`OpNo = CommuteIdx1;`
			`else if (CommuteIdx1 == OpNo)`
			`OpNo = CommuteIdx0;`
			`}`

			`if (!CanCommute \|\| !TII->commuteInstruction(MI))`
			`return false;`

			`if (!TII->isOperandLegal(MI, OpNo, OpToFold))`
			`return false;`
			`}`

			`FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));`
			`return true;`
			`}`

AMDGPU/SI: Factor operand folding code into its own function Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D12254 llvm-svn: 246353 2015-08-29 07:45:19 +08:00			`static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,`
			`unsigned UseOpIdx,`
			`std::vector<FoldCandidate> &FoldList,`
			`const SIInstrInfo *TII, const SIRegisterInfo &TRI,`
			`MachineRegisterInfo &MRI) {`
			`const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);`

			`// FIXME: Fold operands with subregs.`
			`if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) \|\|`
			`UseOp.isImplicit())) {`
			`return;`
			`}`

			`bool FoldingImm = OpToFold.isImm();`
			`APInt Imm;`

			`if (FoldingImm) {`
			`unsigned UseReg = UseOp.getReg();`
			`const TargetRegisterClass *UseRC`
			`= TargetRegisterInfo::isVirtualRegister(UseReg) ?`
			`MRI.getRegClass(UseReg) :`
			`TRI.getPhysRegClass(UseReg);`

			`Imm = APInt(64, OpToFold.getImm());`

AMDGPU/SI: Fix some invaild assumptions when folding 64-bit immediates Summary: We were assuming tha if the use operand had a sub-register that the immediate was 64-bits, but this was breaking the case of folding a 64-bit immediate into another 64-bit instruction. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D12255 llvm-svn: 246354 2015-08-29 09:58:21 +08:00			`const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode());`
			`const TargetRegisterClass *FoldRC =`
			`TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);`

AMDGPU/SI: Factor operand folding code into its own function Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D12254 llvm-svn: 246353 2015-08-29 07:45:19 +08:00			`// Split 64-bit constants into 32-bits for folding.`
AMDGPU/SI: Fix some invaild assumptions when folding 64-bit immediates Summary: We were assuming tha if the use operand had a sub-register that the immediate was 64-bits, but this was breaking the case of folding a 64-bit immediate into another 64-bit instruction. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D12255 llvm-svn: 246354 2015-08-29 09:58:21 +08:00			`if (FoldRC->getSize() == 8 && UseOp.getSubReg()) {`
AMDGPU/SI: Factor operand folding code into its own function Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D12254 llvm-svn: 246353 2015-08-29 07:45:19 +08:00			`if (UseRC->getSize() != 8)`
			`return;`

			`if (UseOp.getSubReg() == AMDGPU::sub0) {`
			`Imm = Imm.getLoBits(32);`
			`} else {`
			`assert(UseOp.getSubReg() == AMDGPU::sub1);`
			`Imm = Imm.getHiBits(32);`
			`}`
			`}`

			`// In order to fold immediates into copies, we need to change the`
			`// copy to a MOV.`
			`if (UseMI->getOpcode() == AMDGPU::COPY) {`
			`unsigned DestReg = UseMI->getOperand(0).getReg();`
			`const TargetRegisterClass *DestRC`
			`= TargetRegisterInfo::isVirtualRegister(DestReg) ?`
			`MRI.getRegClass(DestReg) :`
			`TRI.getPhysRegClass(DestReg);`

			`unsigned MovOp = TII->getMovOpcode(DestRC);`
			`if (MovOp == AMDGPU::COPY)`
			`return;`

			`UseMI->setDesc(TII->get(MovOp));`
			`}`
			`}`

AMDGPU/SI: Fold operands through REG_SEQUENCE instructions Summary: This helps mostly when we use add instructions for address calculations that contain immediates. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D12256 llvm-svn: 247157 2015-09-09 23:43:26 +08:00			`// Special case for REG_SEQUENCE: We can't fold literals into`
			`// REG_SEQUENCE instructions, so we have to fold them into the`
			`// uses of REG_SEQUENCE.`
			`if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {`
			`unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();`
			`unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();`

			`for (MachineRegisterInfo::use_iterator`
			`RSUse = MRI.use_begin(RegSeqDstReg),`
			`RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {`

			`MachineInstr *RSUseMI = RSUse->getParent();`
			`if (RSUse->getSubReg() != RegSeqDstSubReg)`
			`continue;`

			`foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,`
			`TII, TRI, MRI);`
			`}`
			`return;`
			`}`

AMDGPU/SI: Factor operand folding code into its own function Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D12254 llvm-svn: 246353 2015-08-29 07:45:19 +08:00			`const MCInstrDesc &UseDesc = UseMI->getDesc();`

			`// Don't fold into target independent nodes. Target independent opcodes`
			`// don't have defined register classes.`
			`if (UseDesc.isVariadic() \|\|`
			`UseDesc.OpInfo[UseOpIdx].RegClass == -1)`
			`return;`

			`if (FoldingImm) {`
			`MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());`
			`tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);`
			`return;`
			`}`

			`tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);`

			`// FIXME: We could try to change the instruction from 64-bit to 32-bit`
			`// to enable more folding opportunites. The shrink operands pass`
			`// already does this.`
			`return;`
			`}`

R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {`
			`MachineRegisterInfo &MRI = MF.getRegInfo();`
			`const SIInstrInfo *TII =`
			`static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());`
			`const SIRegisterInfo &TRI = TII->getRegisterInfo();`

			`for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();`
			`BI != BE; ++BI) {`

			`MachineBasicBlock &MBB = *BI;`
			`MachineBasicBlock::iterator I, Next;`
			`for (I = MBB.begin(); I != MBB.end(); I = Next) {`
			`Next = std::next(I);`
			`MachineInstr &MI = *I;`

			`if (!isSafeToFold(MI.getOpcode()))`
			`continue;`

R600/SI: Allow f64 inline immediates in i64 operands This requires considering the size of the operand when checking immediate legality. llvm-svn: 229135 2015-02-14 03:05:03 +08:00			`unsigned OpSize = TII->getOpSize(MI, 1);`
R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`MachineOperand &OpToFold = MI.getOperand(1);`
R600/SI: Add pattern for bitcasting fp immediates to integers The backend now assumes that all immediates are integers. This allows us to simplify immediate handling code, becasue we no longer need to handle fp and integer immediates differently. llvm-svn: 225844 2015-01-14 06:59:41 +08:00			`bool FoldingImm = OpToFold.isImm();`
R600/SI: Only fold immediates that have one use Folding the same immediate into multiple instruction will increase program size, which can hurt performance. llvm-svn: 225405 2015-01-08 06:18:27 +08:00
R600/SI: Commute instructions to enable more folding opportunities llvm-svn: 225410 2015-01-08 06:44:19 +08:00			`// FIXME: We could also be folding things like FrameIndexes and`
			`// TargetIndexes.`
			`if (!FoldingImm && !OpToFold.isReg())`
			`continue;`

Fix typo llvm-svn: 227697 2015-02-01 07:37:27 +08:00			`// Folding immediates with more than one use will increase program size.`
R600/SI: Only fold immediates that have one use Folding the same immediate into multiple instruction will increase program size, which can hurt performance. llvm-svn: 225405 2015-01-08 06:18:27 +08:00			`// FIXME: This will also reduce register usage, which may be better`
			`// in some cases. A better heuristic is needed.`
R600/SI: Allow f64 inline immediates in i64 operands This requires considering the size of the operand when checking immediate legality. llvm-svn: 229135 2015-02-14 03:05:03 +08:00			`if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&`
R600/SI: Only fold immediates that have one use Folding the same immediate into multiple instruction will increase program size, which can hurt performance. llvm-svn: 225405 2015-01-08 06:18:27 +08:00			`!MRI.hasOneUse(MI.getOperand(0).getReg()))`
			`continue;`
R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00
			`// FIXME: Fold operands with subregs.`
			`if (OpToFold.isReg() &&`
			`(!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) \|\|`
			`OpToFold.getSubReg()))`
			`continue;`

R600/SI: Refactor SIFoldOperands to simplify immediate folding This will make a future patch much less intrusive. llvm-svn: 225358 2015-01-08 01:42:16 +08:00			`std::vector<FoldCandidate> FoldList;`
R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`for (MachineRegisterInfo::use_iterator`
			`Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();`
			`Use != E; ++Use) {`

			`MachineInstr *UseMI = Use->getParent();`

AMDGPU/SI: Factor operand folding code into its own function Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D12254 llvm-svn: 246353 2015-08-29 07:45:19 +08:00			`foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList,`
			`TII, TRI, MRI);`
R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`}`

R600/SI: Refactor SIFoldOperands to simplify immediate folding This will make a future patch much less intrusive. llvm-svn: 225358 2015-01-08 01:42:16 +08:00			`for (FoldCandidate &Fold : FoldList) {`
			`if (updateOperand(Fold, TRI)) {`
R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`// Clear kill flags.`
R600/SI: Refactor SIFoldOperands to simplify immediate folding This will make a future patch much less intrusive. llvm-svn: 225358 2015-01-08 01:42:16 +08:00			`if (!Fold.isImm()) {`
			`assert(Fold.OpToFold && Fold.OpToFold->isReg());`
			`Fold.OpToFold->setIsKill(false);`
			`}`
R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<`
R600/SI: Refactor SIFoldOperands to simplify immediate folding This will make a future patch much less intrusive. llvm-svn: 225358 2015-01-08 01:42:16 +08:00			`Fold.UseOpNo << " of " << *Fold.UseMI << '\n');`
R600/SI: Add SIFoldOperands pass This pass attempts to fold the source operands of mov and copy instructions into their uses. llvm-svn: 222581 2014-11-22 06:06:37 +08:00			`}`
			`}`
			`}`
			`}`
			`return false;`
			`}`