AMDGPU: Cleanup immediate folding code

Move code down to use, reorder to avoid hard to follow immediate folding logic. llvm-svn: 287818
2016-11-23 21:51:07 +00:00 · 2016-11-23 21:51:07 +00:00 · a24d84beb9
parent 391c3ea9bc
commit a24d84beb9
1 changed files with 62 additions and 64 deletions
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@ -249,63 +249,16 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
      return;
  }

-  bool FoldingImm = OpToFold.isImm();
-  APInt Imm;
-
-  if (FoldingImm) {
-    unsigned UseReg = UseOp.getReg();
-    const TargetRegisterClass *UseRC
-      = TargetRegisterInfo::isVirtualRegister(UseReg) ?
-      MRI.getRegClass(UseReg) :
-      TRI.getPhysRegClass(UseReg);
-
-    Imm = APInt(64, OpToFold.getImm());
-
-    const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode());
-    const TargetRegisterClass *FoldRC =
-        TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);
-
-    // Split 64-bit constants into 32-bits for folding.
-    if (FoldRC->getSize() == 8 && UseOp.getSubReg()) {
-      if (UseRC->getSize() != 8)
-        return;
-
-      if (UseOp.getSubReg() == AMDGPU::sub0) {
-        Imm = Imm.getLoBits(32);
-      } else {
-        assert(UseOp.getSubReg() == AMDGPU::sub1);
-        Imm = Imm.getHiBits(32);
-      }
-    }
-
-    // In order to fold immediates into copies, we need to change the
-    // copy to a MOV.
-    if (UseMI->getOpcode() == AMDGPU::COPY) {
-      unsigned DestReg = UseMI->getOperand(0).getReg();
-      const TargetRegisterClass *DestRC
-        = TargetRegisterInfo::isVirtualRegister(DestReg) ?
-        MRI.getRegClass(DestReg) :
-        TRI.getPhysRegClass(DestReg);
-
-      unsigned MovOp = TII->getMovOpcode(DestRC);
-      if (MovOp == AMDGPU::COPY)
-        return;
-
-      UseMI->setDesc(TII->get(MovOp));
-      CopiesToReplace.push_back(UseMI);
-    }
-  }
-
  // Special case for REG_SEQUENCE: We can't fold literals into
  // REG_SEQUENCE instructions, so we have to fold them into the
  // uses of REG_SEQUENCE.
-  if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+  if (UseMI->isRegSequence()) {
    unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
    unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();

    for (MachineRegisterInfo::use_iterator
-         RSUse = MRI.use_begin(RegSeqDstReg),
-         RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {
+           RSUse = MRI.use_begin(RegSeqDstReg), RSE = MRI.use_end();
+         RSUse != RSE; ++RSUse) {

      MachineInstr *RSUseMI = RSUse->getParent();
      if (RSUse->getSubReg() != RegSeqDstSubReg)
@ -314,29 +267,74 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
      foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
                  CopiesToReplace, TII, TRI, MRI);
    }
+
    return;
  }

-  const MCInstrDesc &UseDesc = UseMI->getDesc();

-  // Don't fold into target independent nodes.  Target independent opcodes
-  // don't have defined register classes.
-  if (UseDesc.isVariadic() ||
-      UseDesc.OpInfo[UseOpIdx].RegClass == -1)
-    return;
+  bool FoldingImm = OpToFold.isImm();

-  if (FoldingImm) {
-    MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
-    tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
+  // In order to fold immediates into copies, we need to change the
+  // copy to a MOV.
+  if (FoldingImm && UseMI->isCopy()) {
+    unsigned DestReg = UseMI->getOperand(0).getReg();
+    const TargetRegisterClass *DestRC
+      = TargetRegisterInfo::isVirtualRegister(DestReg) ?
+      MRI.getRegClass(DestReg) :
+      TRI.getPhysRegClass(DestReg);
+
+    unsigned MovOp = TII->getMovOpcode(DestRC);
+    if (MovOp == AMDGPU::COPY)
+      return;
+
+    UseMI->setDesc(TII->get(MovOp));
+    CopiesToReplace.push_back(UseMI);
+  } else {
+    const MCInstrDesc &UseDesc = UseMI->getDesc();
+
+    // Don't fold into target independent nodes.  Target independent opcodes
+    // don't have defined register classes.
+    if (UseDesc.isVariadic() ||
+        UseDesc.OpInfo[UseOpIdx].RegClass == -1)
+      return;
+  }
+
+  if (!FoldingImm) {
+    tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
+
+    // FIXME: We could try to change the instruction from 64-bit to 32-bit
+    // to enable more folding opportunites.  The shrink operands pass
+    // already does this.
    return;
  }

-  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
+  APInt Imm(64, OpToFold.getImm());

-  // FIXME: We could try to change the instruction from 64-bit to 32-bit
-  // to enable more folding opportunites.  The shrink operands pass
-  // already does this.
-  return;
+  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
+  const TargetRegisterClass *FoldRC =
+    TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);
+
+  // Split 64-bit constants into 32-bits for folding.
+  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
+    unsigned UseReg = UseOp.getReg();
+    const TargetRegisterClass *UseRC
+      = TargetRegisterInfo::isVirtualRegister(UseReg) ?
+      MRI.getRegClass(UseReg) :
+      TRI.getPhysRegClass(UseReg);
+
+    if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
+      return;
+
+    if (UseOp.getSubReg() == AMDGPU::sub0) {
+      Imm = Imm.getLoBits(32);
+    } else {
+      assert(UseOp.getSubReg() == AMDGPU::sub1);
+      Imm = Imm.getHiBits(32);
+    }
+  }
+
+  MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
+  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
 }

 static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,