Initial Thumb2 support. Majority of the work is done by David Goodwin. There are

also some contribution from Jim Grosbach, Bob Wilson, and Evan Cheng. I've done my best to consolidate the patches with those that were done by Viktor Kutuzov and Anton Korzh from Access Softek, Inc. Let me know if missed anything. I've completely reorganized the thumb2 td file, made more extensive uses of multiclass, etc. Test cases will be contributed later after I re-organize what's in svn first. llvm-svn: 73965
2009-06-23 17:48:47 +00:00 · 2009-06-23 17:48:47 +00:00 · 431cf567de
parent cdf2b2dfea
commit 431cf567de
5 changed files with 524 additions and 131 deletions
--- a/llvm/lib/Target/ARM/ARMAddressingModes.h
+++ b/llvm/lib/Target/ARM/ARMAddressingModes.h
@ -248,12 +248,122 @@ namespace ARM_AM {
    return V == 0;
  }

+  /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed
+  /// by a left shift. Returns the shift amount to use.
+  static inline unsigned getThumbImm16ValShift(unsigned Imm) {
+    // 16-bit (or less) immediates are trivially immediate operand with a shift
+    // of zero.
+    if ((Imm & ~65535U) == 0) return 0;
+
+    // Use CTZ to compute the shift amount.
+    return CountTrailingZeros_32(Imm);
+  }
+
+  /// isThumbImm16ShiftedVal - Return true if the specified value can be 
+  /// obtained by left shifting a 16-bit immediate.
+  static inline bool isThumbImm16ShiftedVal(unsigned V) {
+    // If this can be handled with 
+    V = (~65535U << getThumbImm16ValShift(V)) & V;
+    return V == 0;
+  }
+
  /// getThumbImmNonShiftedVal - If V is a value that satisfies
  /// isThumbImmShiftedVal, return the non-shiftd value.
  static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
    return V >> getThumbImmValShift(V);
  }

+  /// getT2SOImmValDecode - Given a 12-bit encoded Thumb-2 modified immediate,
+  /// return the corresponding 32-bit immediate value.
+  /// See ARM Reference Manual A6.3.2.
+  static inline unsigned getT2SOImmValDecode(unsigned Imm) {
+    unsigned Base = Imm & 0xff;
+    switch ((Imm >> 8) & 0xf) {
+    case 0:
+      return Base;
+    case 1:
+      return Base | (Base << 16);
+    case 2:
+      return (Base << 8) | (Base << 24);
+    case 3:
+      return Base | (Base << 8) | (Base << 16) | (Base << 24);
+    default:
+      break;
+    }
+    
+    // shifted immediate
+    unsigned RotAmount = ((Imm >> 7) & 0x1f) - 8;
+    return (Base | 0x80) << (24 - RotAmount);
+  }
+
+  /// getT2SOImmValSplat - Return the 12-bit encoded representation
+  /// if the specified value can be obtained by splatting the low 8 bits
+  /// into every other byte or every byte of a 32-bit value. i.e.,
+  ///     00000000 00000000 00000000 abcdefgh    control = 0
+  ///     00000000 abcdefgh 00000000 abcdefgh    control = 1
+  ///     abcdefgh 00000000 abcdefgh 00000000    control = 2
+  ///     abcdefgh abcdefgh abcdefgh abcdefgh    control = 3
+  /// Return -1 if none of the above apply.
+  /// See ARM Reference Manual A6.3.2.
+  static inline int getT2SOImmValSplat (unsigned V) {
+    unsigned u, Vs, Imm;
+    // control = 0
+    if ((V & 0xffffff00) == 0) 
+      return V;
+    
+    // If the value is zeroes in the first byte, just shift those off
+    Vs = ((V & 0xff) == 0) ? V >> 8 : V;
+    // Any passing value only has 8 bits of payload, splatted across the word
+    Imm = Vs & 0xff;
+    // Likewise, any passing values have the payload splatted into the 3rd byte
+    u = Imm | (Imm << 16);
+
+    // control = 1 or 2
+    if (Vs == u)
+      return (((Vs == V) ? 1 : 2) << 8) | Imm;
+
+    // control = 3
+    if (Vs == (u | (u << 8)))
+      return (3 << 8) | Imm;
+
+    return -1;
+  }
+
+  /// getT2SOImmValRotate - Return the 12-bit encoded representation if the
+  /// specified value is a rotated 8-bit value. Return -1 if no rotation
+  /// encoding is possible.
+  /// See ARM Reference Manual A6.3.2.
+  static inline int getT2SOImmValRotate (unsigned V) {
+    unsigned RotAmt = CountLeadingZeros_32(V);
+    if (RotAmt >= 24)
+      return -1;
+
+    // If 'Arg' can be handled with a single shifter_op return the value.
+    if ((rotr32(0xff000000U, RotAmt) & V) == V)
+      return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
+
+    return -1;
+  }
+
+  /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
+  /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit 
+  /// encoding for it.  If not, return -1.
+  /// See ARM Reference Manual A6.3.2.
+  static inline int getT2SOImmVal(unsigned Arg) {
+    // If 'Arg' is an 8-bit splat, then get the encoded value.
+    int Splat = getT2SOImmValSplat(Arg);
+    if (Splat != -1)
+      return Splat;
+    
+    // If 'Arg' can be handled with a single shifter_op return the value.
+    int Rot = getT2SOImmValRotate(Arg);
+    if (Rot != -1)
+      return Rot;
+
+    return -1;
+  }
+  
+
  //===--------------------------------------------------------------------===//
  // Addressing Mode #2
  //===--------------------------------------------------------------------===//
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@ -751,6 +751,26 @@ class ThumbV5Pat<dag pattern, dag result> : Pat<pattern, result> {
  list<Predicate> Predicates = [IsThumb, HasV5T];
 }

+// T2I - Thumb2 instruction.
+
+class Thumb2I<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
+             string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
+  let OutOperandList = outs;
+  let InOperandList = ins;
+  let AsmString   = asm;
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb, HasThumb2];
+}
+
+class T2I<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb2I<outs, ins, AddrModeNone, Size4Bytes, asm, "", pattern>;
+
+// Thumb2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
+class Thumb2Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb, HasThumb2];
+}
+
 //===----------------------------------------------------------------------===//

 //===----------------------------------------------------------------------===//
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@ -98,6 +98,7 @@ def HasVFP2   : Predicate<"Subtarget->hasVFP2()">;
 def HasVFP3   : Predicate<"Subtarget->hasVFP3()">;
 def HasNEON   : Predicate<"Subtarget->hasNEON()">;
 def IsThumb   : Predicate<"Subtarget->isThumb()">;
+def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
 def HasThumb2 : Predicate<"Subtarget->hasThumb2()">;
 def IsARM     : Predicate<"!Subtarget->isThumb()">;
 def IsDarwin    : Predicate<"Subtarget->isTargetDarwin()">;
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@ -20,131 +20,233 @@ def t2_so_reg : Operand<i32>,    // reg imm
  let MIOperandInfo = (ops GPR, i32imm);
 }

-def LO16 : SDNodeXForm<imm, [{
-  // Transformation function: shift the immediate value down into the low bits.
-  return getI32Imm((unsigned short)N->getZExtValue());
+// t2_so_imm_XFORM - Return a t2_so_imm value packed into the format 
+// described for t2_so_imm def below.
+def t2_so_imm_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(
+        ARM_AM::getT2SOImmVal(N->getZExtValue()), MVT::i32);
 }]>;

-def HI16 : SDNodeXForm<imm, [{
-  // Transformation function: shift the immediate value down into the low bits.
-  return getI32Imm((unsigned)N->getZExtValue() >> 16);
+// t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value
+def t2_so_imm_not_XFORM : SDNodeXForm<imm, [{
+    return CurDAG->getTargetConstant(
+        ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())), MVT::i32);
 }]>;

-def imm16high : PatLeaf<(i32 imm), [{
-  // Returns true if all bits out of the [31..16] range are 0.
-  return ((N->getZExtValue() & 0xFFFF0000ULL) == N->getZExtValue());
-}], HI16>;
+// t2_so_imm_neg_XFORM - Return the negation of a t2_so_imm value
+def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
+    return CurDAG->getTargetConstant(
+        ARM_AM::getT2SOImmVal(-((int)N->getZExtValue())), MVT::i32);
+}]>;

-def imm16high0xffff : PatLeaf<(i32 imm), [{
-  // Returns true if lo 16 bits are set and this is a 32-bit value. 
-  return ((N->getZExtValue() & 0x0000FFFFULL) == 0xFFFFULL);
-}], HI16>;
+// t2_so_imm - Match a 32-bit immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
+// immediate splatted into multiple bytes of the word. t2_so_imm values are
+// represented in the imm field in the same 12-bit form that they are encoded
+// into t2_so_imm instructions: the 8-bit immediate is the least significant bits
+// [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
+def t2_so_imm : Operand<i32>,
+                PatLeaf<(imm), [{
+       return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1;
+     }], t2_so_imm_XFORM> {
+  let PrintMethod = "printT2SOImmOperand";
+}

-def imm0_4095 : PatLeaf<(i32 imm), [{ 
-  return (uint32_t)N->getZExtValue() < 4096; 
-}]>; 
+// t2_so_imm_not - Match an immediate that is a complement 
+// of a t2_so_imm.
+def t2_so_imm_not : Operand<i32>,
+                    PatLeaf<(imm), [{
+       return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
+     }], t2_so_imm_not_XFORM> {
+  let PrintMethod = "printT2SOImmOperand";
+}
+
+// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
+def t2_so_imm_neg : Operand<i32>,
+                    PatLeaf<(imm), [{
+       return ARM_AM::getT2SOImmVal(-((int)N->getZExtValue())) != -1;
+     }], t2_so_imm_neg_XFORM> {
+  let PrintMethod = "printT2SOImmOperand";
+}
+
+/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
+def imm0_4095 : PatLeaf<(i32 imm), [{
+  return (uint32_t)N->getZExtValue() < 4096;
+}]>;

 def imm0_4095_neg : PatLeaf<(i32 imm), [{ 
- return (uint32_t)-N->getZExtValue() < 4096; 
+ return (uint32_t)(-N->getZExtValue()) < 4096; 
 }], imm_neg_XFORM>; 

-def imm0_65535 : PatLeaf<(i32 imm), [{ 
-  return N->getZExtValue() < 65536; 
-}]>; 
-
-// A6.3.2 Modified immediate constants in Thumb instructions (#<const>)
-// FIXME: Move it the the addrmode matcher code.
-def t2_so_imm : PatLeaf<(i32 imm), [{
-  uint64_t v = N->getZExtValue();
-  if (v == 0 || v > 0xffffffffUL) return false;
-  // variant1 - 0b0000x - 8-bit which could be zero (not supported for now)
-
-  // variant2 - 0b00nnx - 8-bit repeated inside the 32-bit room
-  unsigned hi16 = (unsigned)(v >> 16);
-  unsigned lo16 = (unsigned)(v & 0xffffUL);
-  bool valid = (hi16 == lo16) && (
-    (v & 0x00ff00ffUL) == 0 ||        // type 0001x
-    (v & 0xff00ff00UL) == 0 ||        // type 0010x
-    ((lo16 >> 8) == (lo16 & 0xff)));  // type 0011x
-  if (valid) return true;
-
-  // variant3 - 0b01000..0b11111 - 8-bit shifted inside the 32-bit room
-  unsigned shift = CountLeadingZeros_32(v);
-  uint64_t mask = (0xff000000ULL >> shift);
-  // If valid, it is type 01000 + shift
-  return ((shift < 24) && (v & mask) > 0) && ((v & (~mask)) == 0);
+/// imm0_65535 predicate - True if the 32-bit immediate is in the range 
+/// [0.65535].
+def imm0_65535 : PatLeaf<(i32 imm), [{
+  return (uint32_t)N->getZExtValue() < 65536;
 }]>;


+/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
+/// e.g., 0xf000ffff
+def bf_inv_mask_imm : Operand<i32>,
+                      PatLeaf<(imm), [{ 
+  uint32_t v = (uint32_t)N->getZExtValue();
+  if (v == 0xffffffff)
+    return 0;
+  // naive checker. should do better, but simple is best for now since it's
+  // more likely to be correct.
+  while (v & 1) v >>= 1;    // shift off the leading 1's
+  if (v)
+    {
+      while (!(v & 1)) v >>=1;  // shift off the mask
+      while (v & 1) v >>= 1;    // shift off the trailing 1's
+    }
+  // if this is a mask for clearing a bitfield, what's left should be zero.
+  return (v == 0);
+}] > {
+  let PrintMethod = "printBitfieldInvMaskImmOperand";
+}
+
+/// Split a 32-bit immediate into two 16 bit parts.
+def t2_lo16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() & 0xffff,
+                                   MVT::i32);
+}]>;
+
+def t2_hi16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
+}]>;
+
+def t2_lo16AllZero : PatLeaf<(i32 imm), [{
+  // Returns true if all low 16-bits are 0.
+  return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
+  }], t2_hi16>;
+
 //===----------------------------------------------------------------------===//
-//  Thumb-2 to cover the functionality of the ARM instruction set.
+//  Thumb2 to cover the functionality of the ARM instruction set.
 //

-/// T2I_bin_irs - Defines a set of (op reg, {so_imm|reg|so_reg}) patterns for a
+/// T2I_bin_is - Defines a set of (op reg, {so_imm|so_reg}) patterns for a
 //  binary operation that produces a value.
-multiclass T2I_bin_irs<string opc, PatFrag opnode> {
+multiclass T2I_bin_is<string opc, PatFrag opnode> {
   // shifted imm
-   def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
-                       !strconcat(opc, " $dst, $lhs, $rhs"),
-                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
-                      Requires<[HasThumb2]>;
-   // register
-   def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
-                       !strconcat(opc, " $dst, $lhs, $rhs"),
-                       [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, 
-                      Requires<[HasThumb2]>;
+   def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
+                !strconcat(opc, " $dst, $lhs, $rhs"),
+                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
   // shifted register
-   def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
-                       !strconcat(opc, " $dst, $lhs, $rhs"),
-                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, 
-                      Requires<[HasThumb2]>;
+   def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                !strconcat(opc, " $dst, $lhs, $rhs"),
+                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
 }

-/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
+/// T2I_2bin_is - Same as T2I_bin_is except the order of operands are reversed.
+multiclass T2I_rbin_is<string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
+                !strconcat(opc, " $dst, $lhs, $rhs"),
+                [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>;
+   // shifted register
+   def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
+                !strconcat(opc, " $dst, $lhs, $rhs"),
+                [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>;
+}
+
+/// T2I_bin_s_is - Similar to T2I_bin_is except it sets the 's' bit so the
 /// instruction modifies the CPSR register.
 let Defs = [CPSR] in {
-multiclass T2I_bin_s_irs<string opc, PatFrag opnode> {
+multiclass T2I_bin_s_is<string opc, PatFrag opnode> {
   // shifted imm
-   def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
-                       !strconcat(opc, "s $dst, $lhs, $rhs"),
-                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
-                      Requires<[HasThumb2]>;
-
-   // register
-   def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
-                       !strconcat(opc, "s $dst, $lhs, $rhs"),
-                       [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, 
-                      Requires<[HasThumb2]>;
+   def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
+                !strconcat(opc, "s $dst, $lhs, $rhs"),
+                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;

   // shifted register
-   def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
-                       !strconcat(opc, "s $dst, $lhs, $rhs"),
-                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, 
-                      Requires<[HasThumb2]>;
+   def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                !strconcat(opc, "s $dst, $lhs, $rhs"),
+                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
 }
 }

-/// T2I_bin_c_irs - Similar to T2I_bin_irs except it uses the 's' bit. Also the
-/// instruction can optionally set the CPSR register.
+/// T2I_rbin_s_is - Same as T2I_bin_s_is except the order of operands are
+/// reversed.
+let Defs = [CPSR] in {
+multiclass T2I_rbin_s_is<string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
+                !strconcat(opc, "s $dst, $lhs, $rhs"),
+                [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>;
+
+   // shifted register
+   def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
+                !strconcat(opc, "s $dst, $lhs, $rhs"),
+                [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>;
+}
+}
+
+/// T2I_bin_ii12s - Defines a set of (op reg, {so_imm|imm0_4095|so_reg}) patterns
+/// for a binary operation that produces a value.
+multiclass T2I_bin_ii12s<string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
+                !strconcat(opc, " $dst, $lhs, $rhs"),
+                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
+   // 12-bit imm
+   def ri12 : T2I<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                !strconcat(opc, "w $dst, $lhs, $rhs"),
+                [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]>;
+   // shifted register
+   def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                !strconcat(opc, " $dst, $lhs, $rhs"),
+                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
+}
+
+/// T2I_bin_c_is - Defines a set of (op reg, {so_imm|reg}) patterns for a
+//  binary operation that produces a value and set the carry bit. It can also
+/// optionally set CPSR.
 let Uses = [CPSR] in {
-multiclass T2I_bin_c_irs<string opc, PatFrag opnode> {
+multiclass T2I_bin_c_is<string opc, PatFrag opnode> {
   // shifted imm
-   def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs, cc_out:$s),
-                       !strconcat(opc, "${s} $dst, $lhs, $rhs"),
-                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
-                      Requires<[HasThumb2]>;
-
-   // register
-   def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, cc_out:$s),
-                       !strconcat(opc, "${s} $dst, $lhs, $rhs"),
-                       [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, 
-                      Requires<[HasThumb2]>;
+   def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs, cc_out:$s),
+                !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;

   // shifted register
-   def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs, cc_out:$s),
-                       !strconcat(opc, "${s} $dst, $lhs, $rhs"),
-                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, 
-                      Requires<[HasThumb2]>;
+   def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs, cc_out:$s),
+                !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
+}
+}
+
+/// T2I_rbin_c_is - Same as T2I_bin_c_is except the order of operands are
+/// reversed.
+let Uses = [CPSR] in {
+multiclass T2I_rbin_c_is<string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s),
+                !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+                [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>;
+
+   // shifted register
+   def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s),
+                !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+                [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>;
+}
+}
+
+
+/// T21_cmp_irs - Defines a set of (op r, {so_imm|so_reg}) cmp / test
+/// patterns. Similar to T2I_bin_is except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+let Uses = [CPSR] in {
+multiclass T2I_cmp_is<string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs),
+                !strconcat(opc, " $lhs, $rhs"),
+                [(opnode GPR:$lhs, t2_so_imm:$rhs)]>;
+
+   // shifted register
+   def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs),
+                !strconcat(opc, " $lhs, $rhs"),
+                [(opnode GPR:$lhs, t2_so_reg:$rhs)]>;
 }
 }

@ -155,54 +257,184 @@ multiclass T2I_bin_c_irs<string opc, PatFrag opnode> {
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
 //
-def tMOVi16  : PseudoInst<(outs GPR:$dst), (ins i32imm:$src),
-                          "movw $dst, $src",
-                          [(set GPR:$dst, imm0_65535:$src)]>, 
-                         Requires<[HasThumb2]>;

+let neverHasSideEffects = 1 in
+def t2MOVr : T2I<(outs GPR:$dst), (ins GPR:$src),
+                  "mov $dst, $src", []>;
+
+def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src),
+                   "movw $dst, $src",
+                   [(set GPR:$dst, imm0_65535:$src)]>;
+
+
+// FIXME: Move (shifted register) is a pseudo-instruction for ASR, LSL, LSR,
+// ROR, and RRX. Consider splitting into multiple instructions.
+def t2MOVs  : T2I<(outs GPR:$dst), (ins so_reg:$src),
+                  "mov $dst, $src",
+                  [(set GPR:$dst, so_reg:$src)]>;
+def t2MOVrx : T2I<(outs GPR:$dst), (ins GPR:$src),
+                  "mov $dst, $src, rrx",
+                  [(set GPR:$dst, (ARMrrx GPR:$src))]>;
+
+
+// FIXME: Also available in ARM mode.
 let Constraints = "$src = $dst" in
-def tMOVTi16 : PseudoInst<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
-                          "movt $dst, $imm",
-                          [(set GPR:$dst, (or (and GPR:$src, 0xffff), 
-                                              imm16high:$imm))]>,
-                         Requires<[HasThumb2]>;
-
-def : Pat<(and (or GPR:$src, imm16high:$imm1), imm16high0xffff:$imm2),
-          (tMOVTi16 GPR:$src, (HI16 imm16high:$imm1))>,
-         Requires<[HasThumb2]>;
-
-def : Pat<(i32 imm:$imm),
-          (tMOVTi16 (tMOVi16 (LO16 imm:$imm)),(HI16 imm:$imm))>,
-         Requires<[HasThumb2]>;
+def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
+                   "movt $dst, $imm",
+                   [(set GPR:$dst,
+                         (or (and GPR:$src, 0xffff), t2_lo16AllZero:$imm))]>;

 //===----------------------------------------------------------------------===//
 //  Arithmetic Instructions.
 //
-defm t2ADD  : T2I_bin_irs <"add", BinOpFrag<(add node:$LHS, node:$RHS)>>;
-defm t2SUB  : T2I_bin_irs <"sub", BinOpFrag<(sub node:$LHS, node:$RHS)>>;

-def tADDri12 : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
-                          "add $dst, $lhs, $rhs", 
-                          [(set GPR:$dst, (add GPR:$lhs, imm0_4095:$rhs))]>,
-                         Requires<[HasThumb2]>; 
-def tSUBri12 : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), 
-                          "sub $dst, $lhs, $rhs",
-                          [(set GPR:$dst, (add GPR:$lhs, imm0_4095_neg:$rhs))]>,
-                         Requires<[HasThumb2]>;
+defm t2ADD  : T2I_bin_ii12s<"add", BinOpFrag<(add  node:$LHS, node:$RHS)>>;
+defm t2SUB  : T2I_bin_ii12s<"sub", BinOpFrag<(sub  node:$LHS, node:$RHS)>>;

-defm t2ADDS : T2I_bin_s_irs<"add", BinOpFrag<(addc node:$LHS, node:$RHS)>>;
-defm t2SUBS : T2I_bin_s_irs<"sub", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+// ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
+defm t2ADDS : T2I_bin_s_is<"add", BinOpFrag<(addc node:$LHS, node:$RHS)>>;
+defm t2SUBS : T2I_bin_s_is<"sub", BinOpFrag<(subc node:$LHS, node:$RHS)>>;

-defm t2ADC : T2I_bin_c_irs<"adc", BinOpFrag<(adde node:$LHS, node:$RHS)>>;
-defm t2SBC : T2I_bin_c_irs<"sbc", BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+// FIXME: predication support
+defm t2ADC  : T2I_bin_c_is<"adc", BinOpFrag<(adde node:$LHS, node:$RHS)>>;
+defm t2SBC  : T2I_bin_c_is<"sbc", BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+
+// RSB, RSC
+defm t2RSB  : T2I_rbin_is  <"rsb", BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
+defm t2RSBS : T2I_rbin_c_is<"rsb", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+defm t2RSC  : T2I_rbin_s_is<"rsc", BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
+def : Thumb2Pat<(add       GPR:$src, t2_so_imm_neg:$imm),
+                (t2SUBri   GPR:$src, t2_so_imm_neg:$imm)>;
+def : Thumb2Pat<(add       GPR:$src, imm0_4095_neg:$imm),
+                (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;


-def tMLS : PseudoInst<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), 
-                      "mls $dst, $a, $b, $c", 
-                      [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>,
-                     Requires<[HasThumb2]>;
+//===----------------------------------------------------------------------===//
+//  Bitwise Instructions.
+//

-def tORNrs : PseudoInst<(outs GPR:$dst), (ins GPR:$src1, t2_so_reg:$src2),
-                        "orn $dst, $src1, $src2",
-                        [(set GPR:$dst, (or GPR:$src1, (not t2_so_reg: $src2)))]>,
-                       Requires<[HasThumb2]>;
+defm t2AND  : T2I_bin_is  <"and", BinOpFrag<(and node:$LHS, node:$RHS)>>;
+defm t2ORR  : T2I_bin_is  <"orr", BinOpFrag<(or  node:$LHS, node:$RHS)>>;
+defm t2EOR  : T2I_bin_is  <"eor", BinOpFrag<(xor node:$LHS, node:$RHS)>>;
+
+defm t2BIC  : T2I_bin_is  <"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+
+def : Thumb2Pat<(and     GPR:$src, t2_so_imm_not:$imm),
+                (t2BICri GPR:$src, t2_so_imm_not:$imm)>;
+
+defm t2ORN  : T2I_bin_is  <"orn", BinOpFrag<(or  node:$LHS, (not node:$RHS))>>;
+
+def : Thumb2Pat<(or      GPR:$src, t2_so_imm_not:$imm),
+                (t2ORNri GPR:$src, t2_so_imm_not:$imm)>;
+
+
+def t2MVNr : T2I<(outs GPR:$dst), (ins t2_so_reg:$rhs),
+                  "mvn $dst, $rhs",
+                 [(set GPR:$dst, (not t2_so_reg:$rhs))]>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def t2MVNi : T2I<(outs GPR:$dst), (ins t2_so_imm_not:$rhs),
+                  "mvn $dst, $rhs",
+                 [(set GPR:$dst, t2_so_imm_not:$rhs)]>;
+
+// A8.6.17  BFC - Bitfield clear
+// FIXME: Also available in ARM mode.
+let Constraints = "$src = $dst" in
+def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+                "bfc $dst, $imm",
+                [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>;
+
+// FIXME: A8.6.18  BFI - Bitfield insert (Encoding T1)
+
+//===----------------------------------------------------------------------===//
+//  Multiply Instructions.
+//
+def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
+                "mul $dst, $a, $b",
+                [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
+
+def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+		"mla $dst, $a, $b, $c",
+		[(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
+
+def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+		"mls $dst, $a, $b, $c",
+                [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>;
+
+// FIXME: SMULL, etc.
+
+//===----------------------------------------------------------------------===//
+//  Misc. Arithmetic Instructions.
+//
+
+/////
+/// A8.6.31  CLZ
+/////
+// FIXME not firing? but ARM version does...
+def t2CLZ : T2I<(outs GPR:$dst), (ins GPR:$src),
+                "clz $dst, $src",
+                [(set GPR:$dst, (ctlz GPR:$src))]>;
+
+def t2REV : T2I<(outs GPR:$dst), (ins GPR:$src),
+                "rev $dst, $src",
+                [(set GPR:$dst, (bswap GPR:$src))]>;
+
+def t2REV16 : T2I<(outs GPR:$dst), (ins GPR:$src),
+                "rev16 $dst, $src",
+                [(set GPR:$dst,
+                    (or (and (srl GPR:$src, (i32 8)), 0xFF),
+                        (or (and (shl GPR:$src, (i32 8)), 0xFF00),
+                            (or (and (srl GPR:$src, (i32 8)), 0xFF0000),
+                                (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>;
+
+/////
+/// A8.6.137  REVSH
+/////
+def t2REVSH : T2I<(outs GPR:$dst), (ins GPR:$src),
+                 "revsh $dst, $src",
+                 [(set GPR:$dst,
+                    (sext_inreg
+                      (or (srl (and GPR:$src, 0xFFFF), (i32 8)),
+                          (shl GPR:$src, (i32 8))), i16))]>;
+
+// FIXME: PKHxx etc.
+
+//===----------------------------------------------------------------------===//
+//  Comparison Instructions...
+//
+
+defm t2CMP   : T2I_cmp_is<"cmp",
+                          BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+defm t2CMPnz : T2I_cmp_is<"cmp",
+                          BinOpFrag<(ARMcmpNZ node:$LHS, node:$RHS)>>;
+
+defm t2CMN   : T2I_cmp_is<"cmn",
+                          BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+defm t2CMNnz : T2I_cmp_is<"cmn",
+                          BinOpFrag<(ARMcmpNZ node:$LHS,(ineg node:$RHS))>>;
+
+def : Thumb2Pat<(ARMcmp  GPR:$src, t2_so_imm_neg:$imm),
+                (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
+
+def : Thumb2Pat<(ARMcmpNZ  GPR:$src, t2_so_imm_neg:$imm),
+                (t2CMNri   GPR:$src, t2_so_imm_neg:$imm)>;
+
+// FIXME: TST, TEQ, etc.
+
+// A8.6.27  CBNZ, CBZ - Compare and branch on (non)zero.
+// Short range conditional branch. Looks awesome for loops. Need to figure
+// out how to use this one.
+
+// FIXME: Conditional moves
+
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// Large immediate handling.
+
+def : Thumb2Pat<(i32 imm:$src),
+                (t2MOVTi16 (t2MOVi16 (t2_lo16 imm:$src)),
+                           (t2_hi16 imm:$src))>;
--- a/llvm/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@ -98,6 +98,7 @@ namespace {
    void printSOImm2PartOperand(const MachineInstr *MI, int opNum);
    void printSOOperand(const MachineInstr *MI, int OpNum);
    void printSORegOperand(const MachineInstr *MI, int opNum);
+    void printT2SOImmOperand(const MachineInstr *MI, int opNum);
    void printAddrMode2Operand(const MachineInstr *MI, int OpNo);
    void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo);
    void printAddrMode3Operand(const MachineInstr *MI, int OpNo);
@ -108,6 +109,7 @@ namespace {
                               const char *Modifier = 0);
    void printAddrModePCOperand(const MachineInstr *MI, int OpNo,
                                const char *Modifier = 0);
+    void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNo);
    void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNo);
    void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNo,
                                      unsigned Scale);
@ -455,6 +457,24 @@ void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) {
  }
 }

+static void printT2SOImm(raw_ostream &O, int64_t V) {
+  unsigned Imm = ARM_AM::getT2SOImmValDecode(V);
+  
+  // Always print the immediate directly, as the "rotate" form
+  // is deprecated in some contexts.
+  O << "#" << Imm;
+}
+
+/// printT2SOImmOperand - T2SOImm is:
+///  1. a 4-bit splat control value and 8 bit immediate value
+///  2. a 5-bit rotate amount and a non-zero 8-bit immediate value
+///     represented by a normalizedin 7-bit value (msb is always 1)
+void ARMAsmPrinter::printT2SOImmOperand(const MachineInstr *MI, int OpNum) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isImm() && "Not a valid so_imm value!");
+  printT2SOImm(O, MO.getImm());
+}
+
 void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) {
  const MachineOperand &MO1 = MI->getOperand(Op);
  const MachineOperand &MO2 = MI->getOperand(Op+1);
@ -619,6 +639,16 @@ void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
  O << "[pc, +" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName << "]";
 }

+void
+ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op) {
+  const MachineOperand &MO = MI->getOperand(Op);
+  uint32_t v = ~MO.getImm();
+  int32_t lsb = ffs (v) - 1;
+  int32_t width = fls (v) - lsb;
+  assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
+  O << "#" << lsb << ", #" << width;
+}
+
 void
 ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op) {
  const MachineOperand &MO1 = MI->getOperand(Op);