R600/SI: Do abs/neg folding with ComplexPatterns

Abs/neg folding has moved out of foldOperands and into the instruction selection phase using complex patterns. As a consequence of this change, we now prefer to select the 64-bit encoding for most instructions and the modifier operands have been dropped from integer VOP3 instructions. llvm-svn: 214467
2014-08-01 00:32:39 +00:00 · 2014-08-01 00:32:39 +00:00 · b4a313a76f
parent 6655dd699f
commit b4a313a76f
16 changed files with 1069 additions and 696 deletions
--- a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@ -96,6 +96,9 @@ private:
                         SDValue &SOffset, SDValue &Offset, SDValue &Offen,
                         SDValue &Idxen, SDValue &GLC, SDValue &SLC,
                         SDValue &TFE) const;
+  bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+                       SDValue &Clamp, SDValue &Omod) const;

  SDNode *SelectADD_SUB_I64(SDNode *N);
  SDNode *SelectDIV_SCALE(SDNode *N);
@ -879,6 +882,38 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr32(SDValue Addr, SDValue &SRsrc,
  return SelectMUBUFScratch(Addr, SRsrc, VAddr, SOffset, Offset);
 }

+bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
+                                        SDValue &SrcMods) const {
+
+  unsigned Mods = 0;
+
+  Src = In;
+
+  if (Src.getOpcode() == ISD::FNEG) {
+    Mods |= SISrcMods::NEG;
+    Src = Src.getOperand(0);
+  }
+
+  if (Src.getOpcode() == ISD::FABS) {
+    Mods |= SISrcMods::ABS;
+    Src = Src.getOperand(0);
+  }
+
+  SrcMods = CurDAG->getTargetConstant(Mods, MVT::i32);
+
+  return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
+                                         SDValue &SrcMods, SDValue &Clamp,
+                                         SDValue &Omod) const {
+  // FIXME: Handle Clamp and Omod
+  Clamp = CurDAG->getTargetConstant(0, MVT::i32);
+  Omod = CurDAG->getTargetConstant(0, MVT::i32);
+
+  return SelectVOP3Mods(In, Src, SrcMods);
+}
+
 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
  const AMDGPUTargetLowering& Lowering =
    *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
--- a/llvm/lib/Target/R600/AMDGPUInstructions.td
+++ b/llvm/lib/Target/R600/AMDGPUInstructions.td
@ -323,6 +323,14 @@ def atomic_cmp_swap_64_local :
         AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 }]>;

+//===----------------------------------------------------------------------===//
+// Misc Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def fmad : PatFrag <
+  (ops node:$src0, node:$src1, node:$src2),
+  (fadd (fmul node:$src0, node:$src1), node:$src2)
+>;

 class Constants {
 int TWO_PI = 0x40c90fdb;
--- a/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//

 #include "AMDGPU.h"
+#include "SIDefines.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
 #include "MCTargetDesc/AMDGPUFixupKinds.h"
@ -84,6 +85,15 @@ MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,

 bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
                                   unsigned OpNo) const {
+  // FIXME: We need a better way to figure out which operands can be immediate
+  // values
+  //
+  // Some VOP* instructions like ADDC use VReg32 as the register class
+  // for source 0, because they read VCC and can't take an SGPR as an
+  // argument due to constant bus restrictions.
+  if (OpNo == 1 && (Desc.TSFlags & (SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
+                                    SIInstrFlags::VOPC)))
+    return true;

  unsigned RegClass = Desc.OpInfo[OpNo].RegClass;
  return (AMDGPU::SSrc_32RegClassID == RegClass) ||
--- a/llvm/lib/Target/R600/SIISelLowering.cpp
+++ b/llvm/lib/Target/R600/SIISelLowering.cpp
@ -566,8 +566,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
      .addReg(MI->getOperand(1).getReg())
      .addImm(1)  // SRC1 modifiers
      .addReg(MI->getOperand(2).getReg())
-      .addImm(0)  // SRC2 modifiers
-      .addImm(0)  // src2
      .addImm(0)  // CLAMP
      .addImm(0); // OMOD
    MI->eraseFromParent();
@ -1636,40 +1634,24 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
      continue;
    if (!Operand.isMachineOpcode())
      continue;
-    if (Operand.getMachineOpcode() == AMDGPU::FNEG_SI) {
-      Ops.pop_back();
-      Ops.push_back(Operand.getOperand(0));
-      InputModifiers[i] = 1;
-      Promote2e64 = true;
-      if (!DescE64)
-        continue;
-      Desc = DescE64;
-      DescE64 = nullptr;
-    }
-    else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) {
-      Ops.pop_back();
-      Ops.push_back(Operand.getOperand(0));
-      InputModifiers[i] = 2;
-      Promote2e64 = true;
-      if (!DescE64)
-        continue;
-      Desc = DescE64;
-      DescE64 = nullptr;
-    }
  }

  if (Promote2e64) {
    std::vector<SDValue> OldOps(Ops);
    Ops.clear();
+    bool HasModifiers = TII->hasModifiers(Desc->Opcode);
    for (unsigned i = 0; i < OldOps.size(); ++i) {
      // src_modifier
+      if (HasModifiers)
        Ops.push_back(DAG.getTargetConstant(InputModifiers[i], MVT::i32));
      Ops.push_back(OldOps[i]);
    }
    // Add the modifier flags while promoting
+    if (HasModifiers) {
      for (unsigned i = 0; i < 2; ++i)
        Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
    }
+  }

  // Add optional chain and glue
  for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
--- a/llvm/lib/Target/R600/SIInstrFormats.td
+++ b/llvm/lib/Target/R600/SIInstrFormats.td
@ -61,9 +61,16 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
  let mayStore = 0;
  let hasSideEffects = 0;
  let UseNamedOperandTable = 1;
+  // Using complex patterns gives VOP3 patterns a very high complexity rating,
+  // but standalone patterns are almost always prefered, so we need to adjust the
+  // priority lower.  The goal is to use a high number to reduce complexity to
+  // zero (or less than zero).
+  let AddedComplexity = -1000;
+
  let VOP3 = 1;

  int Size = 8;
+  let Uses = [EXEC];
 }

 //===----------------------------------------------------------------------===//
--- a/llvm/lib/Target/R600/SIInstrInfo.cpp
+++ b/llvm/lib/Target/R600/SIInstrInfo.cpp
@ -488,12 +488,19 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
      return nullptr;
    }

-    // XXX: Commute VOP3 instructions with abs and neg set.
-    if (isVOP3(MI->getOpcode()) &&
-        (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
-                        AMDGPU::OpName::abs)).getImm() ||
-         MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
-                        AMDGPU::OpName::neg)).getImm()))
+    // XXX: Commute VOP3 instructions with abs and neg set .
+    const MachineOperand *Abs = getNamedOperand(*MI, AMDGPU::OpName::abs);
+    const MachineOperand *Neg = getNamedOperand(*MI, AMDGPU::OpName::neg);
+    const MachineOperand *Src0Mods = getNamedOperand(*MI,
+                                          AMDGPU::OpName::src0_modifiers);
+    const MachineOperand *Src1Mods = getNamedOperand(*MI,
+                                          AMDGPU::OpName::src1_modifiers);
+    const MachineOperand *Src2Mods = getNamedOperand(*MI,
+                                          AMDGPU::OpName::src2_modifiers);
+
+    if ((Abs && Abs->getImm()) || (Neg && Neg->getImm()) ||
+        (Src0Mods && Src0Mods->getImm()) || (Src1Mods && Src1Mods->getImm()) ||
+        (Src2Mods && Src2Mods->getImm()))
      return nullptr;

    unsigned Reg = MI->getOperand(1).getReg();
@ -672,6 +679,14 @@ bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
  return AMDGPU::getVOPe32(Opcode) != -1;
 }

+bool SIInstrInfo::hasModifiers(unsigned Opcode) const {
+  // The src0_modifier operand is present on all instructions
+  // that have modifiers.
+
+  return AMDGPU::getNamedOperandIdx(Opcode,
+                                    AMDGPU::OpName::src0_modifiers) != -1;
+}
+
 bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
                                    StringRef &ErrInfo) const {
  uint16_t Opcode = MI->getOpcode();
@ -688,16 +703,22 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
  }

  // Make sure the register classes are correct
-  for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) {
+  for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
    switch (Desc.OpInfo[i].OperandType) {
    case MCOI::OPERAND_REGISTER: {
      int RegClass = Desc.OpInfo[i].RegClass;
      if (!RI.regClassCanUseImmediate(RegClass) &&
          (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm())) {
+        // Handle some special cases:
+        // Src0 can of VOP1, VOP2, VOPC can be an immediate no matter what
+        // the register class.
+        if (i != Src0Idx || (!isVOP1(Opcode) && !isVOP2(Opcode) &&
+                                  !isVOPC(Opcode))) {
          ErrInfo = "Expected register, but got immediate";
          return false;
        }
      }
+    }
      break;
    case MCOI::OPERAND_IMMEDIATE:
      // Check if this operand is an immediate.
@ -1423,17 +1444,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
      // We are converting these to a BFE, so we need to add the missing
      // operands for the size and offset.
      unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
-      Inst->addOperand(Inst->getOperand(1));
-      Inst->getOperand(1).ChangeToImmediate(0);
-      Inst->addOperand(MachineOperand::CreateImm(0));
-      Inst->addOperand(MachineOperand::CreateImm(0));
      Inst->addOperand(MachineOperand::CreateImm(0));
      Inst->addOperand(MachineOperand::CreateImm(Size));

-      // XXX - Other pointless operands. There are 4, but it seems you only need
-      // 3 to not hit an assertion later in MCInstLower.
-      Inst->addOperand(MachineOperand::CreateImm(0));
-      Inst->addOperand(MachineOperand::CreateImm(0));
    } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
      // The VALU version adds the second operand to the result, so insert an
      // extra 0 operand.
@ -1452,16 +1465,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {

      uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
      uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
-
      Inst->RemoveOperand(2); // Remove old immediate.
-      Inst->addOperand(Inst->getOperand(1));
-      Inst->getOperand(1).ChangeToImmediate(0);
-      Inst->addOperand(MachineOperand::CreateImm(0));
      Inst->addOperand(MachineOperand::CreateImm(Offset));
-      Inst->addOperand(MachineOperand::CreateImm(0));
      Inst->addOperand(MachineOperand::CreateImm(BitWidth));
-      Inst->addOperand(MachineOperand::CreateImm(0));
-      Inst->addOperand(MachineOperand::CreateImm(0));
    }

    // Update the destination register class.
--- a/llvm/lib/Target/R600/SIInstrInfo.h
+++ b/llvm/lib/Target/R600/SIInstrInfo.h
@ -119,6 +119,9 @@ public:
  /// This function will return false if you pass it a 32-bit instruction.
  bool hasVALU32BitEncoding(unsigned Opcode) const;

+  /// \brief Return true if this instruction has any modifiers.
+  ///  e.g. src[012]_mod, omod, clamp.
+  bool hasModifiers(unsigned Opcode) const;
  bool verifyInstruction(const MachineInstr *MI,
                         StringRef &ErrInfo) const override;

@ -219,4 +222,11 @@ namespace SIInstrFlags {
  };
 }

+namespace SISrcMods {
+  enum {
+   NEG = 1 << 0,
+   ABS = 1 << 1
+  };
+}
+
 #endif //SIINSTRINFO_H
--- a/llvm/lib/Target/R600/SIInstrInfo.td
+++ b/llvm/lib/Target/R600/SIInstrInfo.td
@ -159,6 +159,8 @@ def sopp_brtarget : Operand<OtherVT> {
  let OperandType = "OPERAND_PCREL";
 }

+include "SIInstrFormats.td"
+
 //===----------------------------------------------------------------------===//
 // Complex patterns
 //===----------------------------------------------------------------------===//
@ -167,6 +169,9 @@ def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
 def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
 def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;

+def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
+def VOP3Mods  : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
+
 //===----------------------------------------------------------------------===//
 // SI assembler operands
 //===----------------------------------------------------------------------===//
@ -176,7 +181,17 @@ def SIOperand {
  int VCC = 0x6A;
 }

-include "SIInstrFormats.td"
+def SRCMODS {
+  int NONE = 0;
+}
+
+def DSTCLAMP {
+  int NONE = 0;
+}
+
+def DSTOMOD {
+  int NONE = 0;
+}

 //===----------------------------------------------------------------------===//
 //
@ -270,6 +285,195 @@ multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass,
 // Vector ALU classes
 //===----------------------------------------------------------------------===//

+// This must always be right before the operand being input modified.
+def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
+  let PrintMethod = "printOperandAndMods";
+}
+def InputModsNoDefault : Operand <i32> {
+  let PrintMethod = "printOperandAndMods";
+}
+
+class getNumSrcArgs<ValueType Src1, ValueType Src2> {
+  int ret =
+    !if (!eq(Src1.Value, untyped.Value),      1,   // VOP1
+         !if (!eq(Src2.Value, untyped.Value), 2,   // VOP2
+                                              3)); // VOP3
+}
+
+// Returns the register class to use for the destination of VOP[123C]
+// instructions for the given VT.
+class getVALUDstForVT<ValueType VT> {
+  RegisterClass ret = !if(!eq(VT.Size, 32), VReg_32, VReg_64);
+}
+
+// Returns the register class to use for source 0 of VOP[12C]
+// instructions for the given VT.
+class getVOPSrc0ForVT<ValueType VT> {
+  RegisterClass ret = !if(!eq(VT.Size, 32), VSrc_32, VSrc_64);
+}
+
+// Returns the register class to use for source 1 of VOP[12C] for the
+// given VT.
+class getVOPSrc1ForVT<ValueType VT> {
+  RegisterClass ret = !if(!eq(VT.Size, 32), VReg_32, VReg_64);
+}
+
+// Returns the register classes for the source arguments of a VOP[12C]
+// instruction for the given SrcVTs.
+class getInRC32 <list<ValueType> SrcVT> {
+  list<RegisterClass> ret = [
+    getVOPSrc0ForVT<SrcVT[0]>.ret,
+    getVOPSrc1ForVT<SrcVT[1]>.ret
+  ];
+}
+
+// Returns the register class to use for sources of VOP3 instructions for the
+// given VT.
+class getVOP3SrcForVT<ValueType VT> {
+  RegisterClass ret = !if(!eq(VT.Size, 32), VSrc_32, VSrc_64);
+}
+
+// Returns the register classes for the source arguments of a VOP3
+// instruction for the given SrcVTs.
+class getInRC64 <list<ValueType> SrcVT> {
+  list<RegisterClass> ret = [
+    getVOP3SrcForVT<SrcVT[0]>.ret,
+    getVOP3SrcForVT<SrcVT[1]>.ret,
+    getVOP3SrcForVT<SrcVT[2]>.ret
+  ];
+}
+
+// Returns 1 if the source arguments have modifiers, 0 if they do not.
+class hasModifiers<ValueType SrcVT> {
+  bit ret = !if(!eq(SrcVT.Value, f32.Value), 1,
+            !if(!eq(SrcVT.Value, f64.Value), 1, 0));
+}
+
+// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
+class getIns32 <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
+  dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0),               // VOP1
+            !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
+                                    (ins)));
+}
+
+// Returns the input arguments for VOP3 instructions for the given SrcVT.
+class getIns64 <RegisterClass Src0RC, RegisterClass Src1RC,
+                RegisterClass Src2RC, int NumSrcArgs,
+                bit HasModifiers> {
+
+  dag ret =
+    !if (!eq(NumSrcArgs, 1),
+      !if (!eq(HasModifiers, 1),
+        // VOP1 with modifiers
+        (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+             i32imm:$clamp, i32imm:$omod)
+      /* else */,
+        // VOP1 without modifiers
+        (ins Src0RC:$src0)
+      /* endif */ ),
+    !if (!eq(NumSrcArgs, 2),
+      !if (!eq(HasModifiers, 1),
+        // VOP 2 with modifiers
+        (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+             InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
+             i32imm:$clamp, i32imm:$omod)
+      /* else */,
+        // VOP2 without modifiers
+        (ins Src0RC:$src0, Src1RC:$src1)
+      /* endif */ )
+    /* NumSrcArgs == 3 */,
+      !if (!eq(HasModifiers, 1),
+        // VOP3 with modifiers
+        (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
+             InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
+             InputModsNoDefault:$src2_modifiers, Src2RC:$src2,
+             i32imm:$clamp, i32imm:$omod)
+      /* else */,
+        // VOP3 without modifiers
+        (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)
+      /* endif */ )));
+}
+
+// Returns the assembly string for the inputs and outputs of a VOP[12C]
+// instruction.  This does not add the _e32 suffix, so it can be reused
+// by getAsm64.
+class getAsm32 <int NumSrcArgs> {
+  string src1 = ", $src1";
+  string src2 = ", $src2";
+  string ret = " $dst, $src0"#
+               !if(!eq(NumSrcArgs, 1), "", src1)#
+               !if(!eq(NumSrcArgs, 3), src2, "");
+}
+
+// Returns the assembly string for the inputs and outputs of a VOP3
+// instruction.
+class getAsm64 <int NumSrcArgs, bit HasModifiers> {
+  string src0 = "$src0_modifiers,";
+  string src1 = !if(!eq(NumSrcArgs, 1), "", " $src1_modifiers,");
+  string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers,", "");
+  string ret =
+  !if(!eq(HasModifiers, 0),
+      getAsm32<NumSrcArgs>.ret,
+      " $dst, "#src0#src1#src2#" $clamp, $omod");
+}
+
+
+class VOPProfile <list<ValueType> _ArgVT> {
+
+  field list<ValueType> ArgVT = _ArgVT;
+
+  field ValueType DstVT = ArgVT[0];
+  field ValueType Src0VT = ArgVT[1];
+  field ValueType Src1VT = ArgVT[2];
+  field ValueType Src2VT = ArgVT[3];
+  field RegisterClass DstRC = getVALUDstForVT<DstVT>.ret;
+  field RegisterClass Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
+  field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret;
+  field RegisterClass Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
+  field RegisterClass Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
+  field RegisterClass Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
+
+  field int NumSrcArgs = getNumSrcArgs<Src1VT, Src2VT>.ret;
+  field bit HasModifiers = hasModifiers<Src0VT>.ret;
+
+  field dag Outs = (outs DstRC:$dst);
+
+  field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
+  field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
+                             HasModifiers>.ret;
+
+  field string Asm32 = "_e32 "#getAsm32<NumSrcArgs>.ret;
+  field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
+}
+
+def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
+def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
+def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
+def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>;
+def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>;
+def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
+def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
+def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
+def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
+
+def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
+def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
+def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
+def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
+def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
+def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
+def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
+  let Src0RC32 = VReg_32;
+}
+def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
+def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
+
+def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
+def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
+def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
+def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
+
+
 class VOP <string opName> {
  string OpName = opName;
 }
@ -284,6 +488,17 @@ class SIMCInstr <string pseudo, int subtarget> {
  int Subtarget = subtarget;
 }

+class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
+
+  bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
+  bits<2> src1_modifiers = !if(HasModifiers, !if(HasSrc1, ?, 0), 0);
+  bits<2> src2_modifiers = !if(HasModifiers, !if(HasSrc2, ? ,0) ,0);
+  bits<2> omod = !if(HasModifiers, ?, 0);
+  bits<1> clamp = !if(HasModifiers, ?, 0);
+  bits<9> src1 = !if(HasSrc1, ?, 0);
+  bits<9> src2 = !if(HasSrc2, ?, 0);
+}
+
 class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
  VOP3Common <outs, ins, "", pattern>,
  VOP <opName>,
@ -296,212 +511,259 @@ class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
  SIMCInstr<opName, SISubtarget.SI>;

 multiclass VOP3_m <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern,
-                   string opName> {
+                   string opName, int NumSrcArgs, bit HasMods = 1> {

  def "" : VOP3_Pseudo <outs, ins, pattern, opName>;

-  def _si : VOP3_Real_si <op, outs, ins, asm, opName>;
+  def _si : VOP3_Real_si <op, outs, ins, asm, opName>,
+            VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
+                              !if(!eq(NumSrcArgs, 2), 0, 1),
+                              HasMods>;

 }

 multiclass VOP3_1_m <bits<8> op, dag outs, dag ins, string asm,
-                     list<dag> pattern, string opName> {
+                     list<dag> pattern, string opName, bit HasMods = 1> {

  def "" : VOP3_Pseudo <outs, ins, pattern, opName>;

-  let src1 = 0, src1_modifiers = 0, src2 = 0, src2_modifiers = 0 in {
-
  def _si : VOP3_Real_si <
              {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
-      outs, ins, asm, opName
-    >;
-
-  } // src1 = 0, src1_modifiers = 0, src2 = 0, src2_modifiers = 0
+              outs, ins, asm, opName>,
+            VOP3DisableFields<0, 0, HasMods>;
 }

-multiclass VOP3_2_m <bits<6> op, dag outs, dag ins, string asm,
-                     list<dag> pattern, string opName, string revOp> {
+multiclass VOP3_2_m <bits<9> op, dag outs, dag ins, string asm,
+                     list<dag> pattern, string opName, string revOp,
+                     bit HasMods = 1, bit UseFullOp = 0> {
+
+  def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+
+  def _si : VOP3_Real_si <op,
+              outs, ins, asm, opName>,
+            VOP2_REV<revOp#"_e64_si", !eq(revOp, opName)>,
+            VOP3DisableFields<1, 0, HasMods>;
+}
+
+multiclass VOP3b_2_m <bits<9> op, dag outs, dag ins, string asm,
+                      list<dag> pattern, string opName, string revOp,
+                      bit HasMods = 1, bit UseFullOp = 0> {
+  def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+
+  // The VOP2 variant puts the carry out into VCC, the VOP3 variant
+  // can write it into any SGPR. We currently don't use the carry out,
+  // so for now hardcode it to VCC as well.
+  let sdst = SIOperand.VCC, Defs = [VCC] in {
+    def _si : VOP3b <op, outs, ins, asm, pattern>,
+              VOP3DisableFields<1, 0, HasMods>,
+              SIMCInstr<opName, SISubtarget.SI>,
+              VOP2_REV<revOp#"_e64_si", !eq(revOp, opName)>;
+  } // End sdst = SIOperand.VCC, Defs = [VCC]
+}
+
+multiclass VOP3_C_m <bits<8> op, dag outs, dag ins, string asm,
+                     list<dag> pattern, string opName,
+                     bit HasMods, bit defExec> {

  def "" : VOP3_Pseudo <outs, ins, pattern, opName>;

-  let src2 = 0, src2_modifiers = 0 in {
-
    def _si : VOP3_Real_si <
-        {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
-        outs, ins, asm, opName>,
-        VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
-
-  } // src2 = 0, src2_modifiers = 0
-}
-
-// This must always be right before the operand being input modified.
-def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
-  let PrintMethod = "printOperandAndMods";
-}
-
-multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
-                        string opName, list<dag> pattern> {
-
-  def _e32 : VOP1 <
-    op, (outs drc:$dst), (ins src:$src0),
-    opName#"_e32 $dst, $src0", pattern
-  >, VOP <opName>;
-
-  defm _e64 : VOP3_1_m <
-    op,
-    (outs drc:$dst),
-    (ins InputMods:$src0_modifiers, src:$src0, i32imm:$clamp, i32imm:$omod),
-    opName#"_e64 $dst, $src0_modifiers, $clamp, $omod", [], opName>;
-}
-
-multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern>
-  : VOP1_Helper <op, VReg_32, VSrc_32, opName, pattern>;
-
-multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern>
-  : VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>;
-
-multiclass VOP1_32_64 <bits<8> op, string opName, list<dag> pattern>
-  : VOP1_Helper <op, VReg_32, VSrc_64, opName, pattern>;
-
-multiclass VOP1_64_32 <bits<8> op, string opName, list<dag> pattern>
-  : VOP1_Helper <op, VReg_64, VSrc_32, opName, pattern>;
-
-multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
-                        string opName, list<dag> pattern, string revOp> {
-  def _e32 : VOP2 <
-    op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1),
-    opName#"_e32 $dst, $src0, $src1", pattern
-  >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
-
-  defm _e64 : VOP3_2_m <
-    op,
-    (outs vrc:$dst),
-    (ins InputMods:$src0_modifiers, arc:$src0,
-         InputMods:$src1_modifiers, arc:$src1,
-         i32imm:$clamp, i32imm:$omod),
-    opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", [],
-    opName, revOp>;
-}
-
-multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern,
-                    string revOp = opName>
-  : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern, revOp>;
-
-multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
-                    string revOp = opName>
-  : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;
-
-multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
-                     RegisterClass src0_rc, string revOp = opName> {
-
-  def _e32 : VOP2 <
-    op, (outs VReg_32:$dst), (ins src0_rc:$src0, VReg_32:$src1),
-    opName#"_e32 $dst, $src0, $src1", pattern
-  >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
-
-  def _e64 : VOP3b <
-    {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
-    (outs VReg_32:$dst),
-    (ins InputMods: $src0_modifiers, VSrc_32:$src0,
-         InputMods:$src1_modifiers, VSrc_32:$src1,
-         i32imm:$clamp, i32imm:$omod),
-    opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", []
-  >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
-    let src2 = 0;
-    let src2_modifiers = 0;
-    /* the VOP2 variant puts the carry out into VCC, the VOP3 variant
-       can write it into any SGPR. We currently don't use the carry out,
-       so for now hardcode it to VCC as well */
-    let sdst = SIOperand.VCC;
-  }
-}
-
-multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
-                        string opName, ValueType vt, PatLeaf cond, bit defExec = 0> {
-  def _e32 : VOPC <
-    op, (ins arc:$src0, vrc:$src1),
-    opName#"_e32 $dst, $src0, $src1", []
-  >, VOP <opName> {
-    let Defs = !if(defExec, [EXEC], []);
-  }
-
-  def _e64 : VOP3 <
                {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
-    (outs SReg_64:$dst),
-    (ins InputMods:$src0_modifiers, arc:$src0,
-         InputMods:$src1_modifiers, arc:$src1,
-         InstFlag:$clamp, InstFlag:$omod),
-    opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod",
-    !if(!eq(!cast<string>(cond), "COND_NULL"), []<dag>,
-      [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))]
-    )
-  >, VOP <opName> {
+                outs, ins, asm, opName>,
+              VOP3DisableFields<1, 0, HasMods> {
  let Defs = !if(defExec, [EXEC], []);
-    let src2 = 0;
-    let src2_modifiers = 0;
  }
 }

-multiclass VOPC_32 <bits<8> op, string opName,
-  ValueType vt = untyped, PatLeaf cond = COND_NULL>
-  : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond>;
+multiclass VOP1_Helper <bits<8> op, string opName, dag outs,
+                        dag ins32, string asm32, list<dag> pat32,
+                        dag ins64, string asm64, list<dag> pat64,
+                        bit HasMods> {

-multiclass VOPC_64 <bits<8> op, string opName,
-  ValueType vt = untyped, PatLeaf cond = COND_NULL>
-  : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>;
+  def _e32 : VOP1 <op, outs, ins32, opName#asm32, pat32>, VOP<opName>;

-multiclass VOPCX_32 <bits<8> op, string opName,
-  ValueType vt = untyped, PatLeaf cond = COND_NULL>
-  : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond, 1>;
+  defm _e64 : VOP3_1_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName, HasMods>;
+}

-multiclass VOPCX_64 <bits<8> op, string opName,
-  ValueType vt = untyped, PatLeaf cond = COND_NULL>
-  : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond, 1>;
-
-multiclass VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3_m <
-  op, (outs VReg_32:$dst),
-  (ins InputMods: $src0_modifiers, VSrc_32:$src0, InputMods:$src1_modifiers,
-   VSrc_32:$src1, InputMods:$src2_modifiers, VSrc_32:$src2,
-   InstFlag:$clamp, InstFlag:$omod),
-  opName#" $dst, $src0_modifiers, $src1, $src2, $clamp, $omod", pattern, opName
+multiclass VOP1Inst <bits<8> op, string opName, VOPProfile P,
+                     SDPatternOperator node = null_frag> : VOP1_Helper <
+  op, opName, P.Outs,
+  P.Ins32, P.Asm32, [],
+  P.Ins64, P.Asm64,
+  !if(P.HasModifiers,
+      [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
+                                i32:$src0_modifiers, i32:$clamp, i32:$omod))))],
+      [(set P.DstVT:$dst, (node P.Src0VT:$src0))]),
+  P.HasModifiers
 >;

-class VOP3_64_32 <bits <9> op, string opName, list<dag> pattern> : VOP3 <
-  op, (outs VReg_64:$dst),
-  (ins VSrc_64:$src0, VSrc_32:$src1),
-  opName#" $dst, $src0, $src1", pattern
->, VOP <opName> {
+class VOP2_e32 <bits<6> op, string opName, dag outs, dag ins, string asm,
+                list<dag> pattern, string revOp> :
+  VOP2 <op, outs, ins, opName#asm, pattern>,
+  VOP <opName>,
+  VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;

-  let src2 = 0;
-  let src2_modifiers = 0;
-  let src0_modifiers = 0;
-  let clamp = 0;
-  let omod = 0;
+multiclass VOP2_Helper <bits<6> op, string opName, dag outs,
+                        dag ins32, string asm32, list<dag> pat32,
+                        dag ins64, string asm64, list<dag> pat64,
+                        string revOp, bit HasMods> {
+  def _e32 : VOP2_e32 <op, opName, outs, ins32, asm32, pat32, revOp>;
+
+  defm _e64 : VOP3_2_m <
+    {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+    outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
+  >;
 }

-class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
-  op, (outs VReg_64:$dst),
-  (ins InputMods:$src0_modifiers, VSrc_64:$src0,
-       InputMods:$src1_modifiers, VSrc_64:$src1,
-       InputMods:$src2_modifiers, VSrc_64:$src2,
-       InstFlag:$clamp, InstFlag:$omod),
-  opName#" $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern
->, VOP <opName>;
+multiclass VOP2Inst <bits<6> op, string opName, VOPProfile P,
+                     SDPatternOperator node = null_frag,
+                     string revOp = opName> : VOP2_Helper <
+  op, opName, P.Outs,
+  P.Ins32, P.Asm32, [],
+  P.Ins64, P.Asm64,
+  !if(P.HasModifiers,
+      [(set P.DstVT:$dst,
+           (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                      i32:$clamp, i32:$omod)),
+                 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+      [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
+  revOp, P.HasModifiers
+>;
+
+multiclass VOP2b_Helper <bits<6> op, string opName, dag outs,
+                         dag ins32, string asm32, list<dag> pat32,
+                         dag ins64, string asm64, list<dag> pat64,
+                         string revOp, bit HasMods> {
+
+  def _e32 : VOP2_e32 <op, opName, outs, ins32, asm32, pat32, revOp>;
+
+  defm _e64 : VOP3b_2_m <
+    {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
+    outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
+  >;
+}
+
+multiclass VOP2bInst <bits<6> op, string opName, VOPProfile P,
+                      SDPatternOperator node = null_frag,
+                      string revOp = opName> : VOP2b_Helper <
+  op, opName, P.Outs,
+  P.Ins32, P.Asm32, [],
+  P.Ins64, P.Asm64,
+  !if(P.HasModifiers,
+      [(set P.DstVT:$dst,
+           (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                      i32:$clamp, i32:$omod)),
+                 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+      [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
+  revOp, P.HasModifiers
+>;
+
+multiclass VOPC_Helper <bits<8> op, string opName,
+                        dag ins32, string asm32, list<dag> pat32,
+                        dag out64, dag ins64, string asm64, list<dag> pat64,
+                        bit HasMods, bit DefExec> {
+  def _e32 : VOPC <op, ins32, opName#asm32, pat32>, VOP <opName> {
+    let Defs = !if(DefExec, [EXEC], []);
+  }
+
+  defm _e64 : VOP3_C_m <op, out64, ins64, opName#"_e64"#asm64, pat64, opName,
+                        HasMods, DefExec>;
+}
+
+multiclass VOPCInst <bits<8> op, string opName,
+                     VOPProfile P, PatLeaf cond = COND_NULL,
+                     bit DefExec = 0> : VOPC_Helper <
+  op, opName,
+  P.Ins32, P.Asm32, [],
+  (outs SReg_64:$dst), P.Ins64, P.Asm64,
+  !if(P.HasModifiers,
+      [(set i1:$dst,
+          (setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                      i32:$clamp, i32:$omod)),
+                 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+                 cond))],
+      [(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
+  P.HasModifiers, DefExec
+>;
+
+multiclass VOPC_F32 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
+  VOPCInst <op, opName, VOP_F32_F32_F32, cond>;
+
+multiclass VOPC_F64 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
+  VOPCInst <op, opName, VOP_F64_F64_F64, cond>;
+
+multiclass VOPC_I32 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
+  VOPCInst <op, opName, VOP_I32_I32_I32, cond>;
+
+multiclass VOPC_I64 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
+  VOPCInst <op, opName, VOP_I64_I64_I64, cond>;


-class VOP3b_Helper <bits<9> op, RegisterClass vrc, RegisterClass arc,
-                    string opName, list<dag> pattern> : VOP3 <
+multiclass VOPCX <bits<8> op, string opName, VOPProfile P,
+                  PatLeaf cond = COND_NULL>
+  : VOPCInst <op, opName, P, cond, 1>;
+
+multiclass VOPCX_F32 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
+  VOPCX <op, opName, VOP_F32_F32_F32, cond>;
+
+multiclass VOPCX_F64 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
+  VOPCX <op, opName, VOP_F64_F64_F64, cond>;
+
+multiclass VOPCX_I32 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
+  VOPCX <op, opName, VOP_I32_I32_I32, cond>;
+
+multiclass VOPCX_I64 <bits<8> op, string opName, PatLeaf cond = COND_NULL> :
+  VOPCX <op, opName, VOP_I64_I64_I64, cond>;
+
+multiclass VOP3_Helper <bits<9> op, string opName, dag outs, dag ins, string asm,
+                        list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m <
+    op, outs, ins, opName#asm, pat, opName, NumSrcArgs, HasMods
+>;
+
+multiclass VOP3Inst <bits<9> op, string opName, VOPProfile P,
+                     SDPatternOperator node = null_frag> : VOP3_Helper <
+  op, opName, P.Outs, P.Ins64, P.Asm64,
+  !if(!eq(P.NumSrcArgs, 3),
+    !if(P.HasModifiers,
+        [(set P.DstVT:$dst,
+            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                       i32:$clamp, i32:$omod)),
+                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+                  (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))],
+        [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1,
+                                  P.Src2VT:$src2))]),
+  !if(!eq(P.NumSrcArgs, 2),
+    !if(P.HasModifiers,
+        [(set P.DstVT:$dst,
+            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                       i32:$clamp, i32:$omod)),
+                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+        [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))])
+  /* P.NumSrcArgs == 1 */,
+    !if(P.HasModifiers,
+        [(set P.DstVT:$dst,
+            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                       i32:$clamp, i32:$omod))))],
+        [(set P.DstVT:$dst, (node P.Src0VT:$src0))]))),
+  P.NumSrcArgs, P.HasModifiers
+>;
+
+multiclass VOP3b_Helper <bits<9> op, RegisterClass vrc, RegisterClass arc,
+                    string opName, list<dag> pattern> :
+  VOP3b_2_m <
  op, (outs vrc:$dst0, SReg_64:$dst1),
  (ins arc:$src0, arc:$src1, arc:$src2,
   InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
-  opName#" $dst0, $dst1, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
->, VOP <opName>;
+  opName#" $dst0, $dst1, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern,
+  opName, opName, 1, 1
+>;

-
-class VOP3b_64 <bits<9> op, string opName, list<dag> pattern> :
+multiclass VOP3b_64 <bits<9> op, string opName, list<dag> pattern> :
  VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>;

-class VOP3b_32 <bits<9> op, string opName, list<dag> pattern> :
+multiclass VOP3b_32 <bits<9> op, string opName, list<dag> pattern> :
  VOP3b_Helper <op, VReg_32, VSrc_32, opName, pattern>;

 //===----------------------------------------------------------------------===//
--- a/llvm/lib/Target/R600/SIInstructions.td
+++ b/llvm/lib/Target/R600/SIInstructions.td
--- a/llvm/lib/Target/R600/SILowerI1Copies.cpp
+++ b/llvm/lib/Target/R600/SILowerI1Copies.cpp
@ -136,11 +136,7 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
                 SrcRC == &AMDGPU::VReg_1RegClass) {
        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
                .addOperand(MI.getOperand(0))
-                .addImm(0)
                .addOperand(MI.getOperand(1))
-                .addImm(0)
-                .addImm(0)
-                .addImm(0)
                .addImm(0);
        MI.eraseFromParent();
      }
--- a/llvm/lib/Target/R600/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/R600/SIShrinkInstructions.cpp
@ -93,7 +93,7 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
  const MachineOperand *Src1Mod =
      TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);

-  if (Src1 && (!isVGPR(Src1, TRI, MRI) || Src1Mod->getImm() != 0))
+  if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0)))
    return false;

  // We don't need to check src0, all input types are legal, so just make
--- a/llvm/test/CodeGen/R600/fabs.ll
+++ b/llvm/test/CodeGen/R600/fabs.ll
@ -50,8 +50,9 @@ entry:
 }

 ; SI-CHECK-LABEL: @fabs_fold
+; SI-CHECK: S_LOAD_DWORD [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
 ; SI-CHECK-NOT: V_AND_B32_e32
-; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, |v{{[0-9]+}}|
+; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
 define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
 entry:
  %0 = call float @fabs(float %in0)
--- a/llvm/test/CodeGen/R600/fneg.ll
+++ b/llvm/test/CodeGen/R600/fneg.ll
@ -61,8 +61,9 @@ entry:
 }

 ; SI-CHECK-LABEL: @fneg_fold
+; SI-CHECK: S_LOAD_DWORD [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
 ; SI-CHECK-NOT: V_XOR_B32
-; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
+; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], v{{[0-9]+}}
 define void @fneg_fold(float addrspace(1)* %out, float %in) {
 entry:
  %0 = fsub float -0.0, %in
--- a/llvm/test/CodeGen/R600/fsub.ll
+++ b/llvm/test/CodeGen/R600/fsub.ll
@ -20,8 +20,8 @@ declare void @llvm.AMDGPU.store.output(float, i32)
 ; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
 ; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
 ; SI-CHECK: @fsub_v2f32
-; SI-CHECK: V_SUB_F32
-; SI-CHECK: V_SUB_F32
+; SI-CHECK: V_SUBREV_F32
+; SI-CHECK: V_SUBREV_F32
 define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
 entry:
  %0 = fsub <2 x float> %a, %b
@ -35,10 +35,10 @@ entry:
 ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
 ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
 ; SI-CHECK: @fsub_v4f32
-; SI-CHECK: V_SUB_F32
-; SI-CHECK: V_SUB_F32
-; SI-CHECK: V_SUB_F32
-; SI-CHECK: V_SUB_F32
+; SI-CHECK: V_SUBREV_F32
+; SI-CHECK: V_SUBREV_F32
+; SI-CHECK: V_SUBREV_F32
+; SI-CHECK: V_SUBREV_F32
 define void @fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
  %a = load <4 x float> addrspace(1) * %in
--- a/llvm/test/CodeGen/R600/mul_uint24.ll
+++ b/llvm/test/CodeGen/R600/mul_uint24.ll
@ -23,7 +23,7 @@ entry:
 ; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
 ; EG: 16
 ; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 16,
+; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 16
 define void @i16_mul24(i32 addrspace(1)* %out, i16 %a, i16 %b) {
 entry:
  %0 = mul i16 %a, %b
@ -37,7 +37,7 @@ entry:
 ; The result must be sign-extended
 ; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
 ; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8,
+; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8

 define void @i8_mul24(i32 addrspace(1)* %out, i8 %a, i8 %b) {
 entry:
--- a/llvm/test/CodeGen/R600/vop-shrink.ll
+++ b/llvm/test/CodeGen/R600/vop-shrink.ll
@ -1,9 +1,4 @@
 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; XXX: This testis for a bug in the SIShrinkInstruction pass and it will be
-;       relevant once we are selecting 64-bit instructions.  We are
-;       currently selecting mostly 32-bit instruction, so the
-;       SIShrinkInstructions pass isn't doing much.
-; XFAIL: *

 ; Test that we correctly commute a sub instruction
 ; FUNC-LABEL: @sub_rev