[X86] Separate imm from relocImm handling.

relocImm was a complexPattern that handled both ConstantSDNode and X86Wrapper. But it was only applied selectively because using it would cause patterns to be not importable into FastISel or GlobalISel. So it only got applied to flag setting instructions, stores, RMW arithmetic instructions, and rotates. Most of the test changes are a result of making patterns available to GlobalISel or FastISel. The absolute-cmp.ll change is due to this fixing a pattern ordering issue to make an absolute symbol match to an 8-bit immediate before trying a 32-bit immediate. I tried to use PatFrags to reduce the repetition, but I was getting errors from TableGen.
2020-06-13 11:29:28 -07:00 · 2020-06-13 11:29:28 -07:00 · 8885a7640b
parent 6973125cb7
commit 8885a7640b
9 changed files with 246 additions and 59 deletions
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@ -472,14 +472,6 @@ namespace {

    bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;

-    /// Returns whether this is a relocatable immediate in the range
-    /// [-2^Width .. 2^Width-1].
-    template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
-      if (auto *CN = dyn_cast<ConstantSDNode>(N))
-        return isInt<Width>(CN->getSExtValue());
-      return isSExtAbsoluteSymbolRef(Width, N);
-    }
-
    // Indicates we should prefer to use a non-temporal load for this load.
    bool useNonTemporalLoad(LoadSDNode *N) const {
      if (!N->isNonTemporal())
@ -2651,12 +2643,6 @@ bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
 }

 bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
-  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
-    Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
-                                   N.getValueType());
-    return true;
-  }
-
  // Keep track of the original value type and whether this value was
  // truncated. If we see a truncation from pointer type to VT that truncates
  // bits that are known to be zero, we can use a narrow reference.
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@ -605,16 +605,16 @@ def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;


 def Xi8  : X86TypeInfo<i8, "b", GR8, loadi8, i8mem,
-                       Imm8, i8imm, relocImm8_su, i8imm, invalid_node,
+                       Imm8, i8imm, imm_su, i8imm, invalid_node,
                       0, OpSizeFixed, 0>;
 def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
-                       Imm16, i16imm, relocImm16_su, i16i8imm, i16immSExt8_su,
+                       Imm16, i16imm, imm_su, i16i8imm, i16immSExt8_su,
                       1, OpSize16, 0>;
 def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
-                       Imm32, i32imm, relocImm32_su, i32i8imm, i32immSExt8_su,
+                       Imm32, i32imm, imm_su, i32i8imm, i32immSExt8_su,
                       1, OpSize32, 0>;
 def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
-                       Imm32S, i64i32imm, i64relocImmSExt32_su, i64i8imm, i64immSExt8_su,
+                       Imm32S, i64i32imm, i64immSExt32_su, i64i8imm, i64immSExt8_su,
                       1, OpSizeFixed, 1>;

 /// ITy - This instruction base class takes the type info for the instruction.
@ -1217,6 +1217,146 @@ def : Pat<(store (X86adc_flag GR64:$src, (loadi64 addr:$dst), EFLAGS),
                 addr:$dst),
          (ADC64mr addr:$dst, GR64:$src)>;

+// Patterns for basic arithmetic ops with relocImm for the immediate field.
+multiclass ArithBinOp_RF_relocImm_Pats<SDNode OpNodeFlag, SDNode OpNode> {
+  def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2),
+            (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
+  def : Pat<(OpNodeFlag GR16:$src1, i16relocImmSExt8_su:$src2),
+            (!cast<Instruction>(NAME#"16ri8") GR16:$src1, i16relocImmSExt8_su:$src2)>;
+  def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2),
+            (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
+  def : Pat<(OpNodeFlag GR32:$src1, i32relocImmSExt8_su:$src2),
+            (!cast<Instruction>(NAME#"32ri8") GR32:$src1, i32relocImmSExt8_su:$src2)>;
+  def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2),
+            (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
+  def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt8_su:$src2),
+            (!cast<Instruction>(NAME#"64ri8") GR64:$src1, i64relocImmSExt8_su:$src2)>;
+  def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2),
+            (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+  def : Pat<(store (OpNode (load addr:$dst), relocImm8_su:$src), addr:$dst),
+            (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>;
+  def : Pat<(store (OpNode (load addr:$dst), i16relocImmSExt8_su:$src), addr:$dst),
+            (!cast<Instruction>(NAME#"16mi8") addr:$dst, i16relocImmSExt8_su:$src)>;
+  def : Pat<(store (OpNode (load addr:$dst), relocImm16_su:$src), addr:$dst),
+            (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>;
+  def : Pat<(store (OpNode (load addr:$dst), i32relocImmSExt8_su:$src), addr:$dst),
+            (!cast<Instruction>(NAME#"32mi8") addr:$dst, i32relocImmSExt8_su:$src)>;
+  def : Pat<(store (OpNode (load addr:$dst), relocImm32_su:$src), addr:$dst),
+            (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>;
+  def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt8_su:$src), addr:$dst),
+            (!cast<Instruction>(NAME#"64mi8") addr:$dst, i64relocImmSExt8_su:$src)>;
+  def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt32_su:$src), addr:$dst),
+            (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>;
+}
+
+multiclass ArithBinOp_RFF_relocImm_Pats<SDNode OpNodeFlag> {
+  def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS),
+            (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
+  def : Pat<(OpNodeFlag GR16:$src1, i16relocImmSExt8_su:$src2, EFLAGS),
+            (!cast<Instruction>(NAME#"16ri8") GR16:$src1, i16relocImmSExt8_su:$src2)>;
+  def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS),
+            (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
+  def : Pat<(OpNodeFlag GR32:$src1, i32relocImmSExt8_su:$src2, EFLAGS),
+            (!cast<Instruction>(NAME#"32ri8") GR32:$src1, i32relocImmSExt8_su:$src2)>;
+  def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS),
+            (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
+  def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt8_su:$src2, EFLAGS),
+            (!cast<Instruction>(NAME#"64ri8") GR64:$src1, i64relocImmSExt8_su:$src2)>;
+  def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS),
+            (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+  def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), addr:$dst),
+            (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>;
+  def : Pat<(store (OpNodeFlag (load addr:$dst), i16relocImmSExt8_su:$src, EFLAGS), addr:$dst),
+            (!cast<Instruction>(NAME#"16mi8") addr:$dst, i16relocImmSExt8_su:$src)>;
+  def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), addr:$dst),
+            (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>;
+  def : Pat<(store (OpNodeFlag (load addr:$dst), i32relocImmSExt8_su:$src, EFLAGS), addr:$dst),
+            (!cast<Instruction>(NAME#"32mi8") addr:$dst, i32relocImmSExt8_su:$src)>;
+  def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), addr:$dst),
+            (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>;
+  def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt8_su:$src, EFLAGS), addr:$dst),
+            (!cast<Instruction>(NAME#"64mi8") addr:$dst, i64relocImmSExt8_su:$src)>;
+  def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), addr:$dst),
+            (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>;
+}
+
+multiclass ArithBinOp_F_relocImm_Pats<SDNode OpNodeFlag> {
+  def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2),
+            (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
+  def : Pat<(OpNodeFlag GR16:$src1, i16relocImmSExt8_su:$src2),
+            (!cast<Instruction>(NAME#"16ri8") GR16:$src1, i16relocImmSExt8_su:$src2)>;
+  def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2),
+            (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
+  def : Pat<(OpNodeFlag GR32:$src1, i32relocImmSExt8_su:$src2),
+            (!cast<Instruction>(NAME#"32ri8") GR32:$src1, i32relocImmSExt8_su:$src2)>;
+  def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2),
+            (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
+  def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt8_su:$src2),
+            (!cast<Instruction>(NAME#"64ri8") GR64:$src1, i64relocImmSExt8_su:$src2)>;
+  def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2),
+            (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+  def : Pat<(OpNodeFlag (loadi8 addr:$src1), relocImm8_su:$src2),
+            (!cast<Instruction>(NAME#"8mi") addr:$src1, relocImm8_su:$src2)>;
+  def : Pat<(OpNodeFlag (loadi16 addr:$src1), i16relocImmSExt8_su:$src2),
+            (!cast<Instruction>(NAME#"16mi8") addr:$src1, i16relocImmSExt8_su:$src2)>;
+  def : Pat<(OpNodeFlag (loadi16 addr:$src1), relocImm16_su:$src2),
+            (!cast<Instruction>(NAME#"16mi") addr:$src1, relocImm16_su:$src2)>;
+  def : Pat<(OpNodeFlag (loadi32 addr:$src1), i32relocImmSExt8_su:$src2),
+            (!cast<Instruction>(NAME#"32mi8") addr:$src1, i32relocImmSExt8_su:$src2)>;
+  def : Pat<(OpNodeFlag (loadi32 addr:$src1), relocImm32_su:$src2),
+            (!cast<Instruction>(NAME#"32mi") addr:$src1, relocImm32_su:$src2)>;
+  def : Pat<(OpNodeFlag (loadi64 addr:$src1), i64relocImmSExt8_su:$src2),
+            (!cast<Instruction>(NAME#"64mi8") addr:$src1, i64relocImmSExt8_su:$src2)>;
+  def : Pat<(OpNodeFlag (loadi64 addr:$src1), i64relocImmSExt32_su:$src2),
+            (!cast<Instruction>(NAME#"64mi32") addr:$src1, i64relocImmSExt32_su:$src2)>;
+}
+
+defm AND : ArithBinOp_RF_relocImm_Pats<X86and_flag, and>;
+defm OR  : ArithBinOp_RF_relocImm_Pats<X86or_flag, or>;
+defm XOR : ArithBinOp_RF_relocImm_Pats<X86xor_flag, xor>;
+defm ADD : ArithBinOp_RF_relocImm_Pats<X86add_flag, add>;
+defm SUB : ArithBinOp_RF_relocImm_Pats<X86sub_flag, sub>;
+
+defm ADC : ArithBinOp_RFF_relocImm_Pats<X86adc_flag>;
+defm SBB : ArithBinOp_RFF_relocImm_Pats<X86sbb_flag>;
+
+defm CMP : ArithBinOp_F_relocImm_Pats<X86cmp>;
+
+// ADC is commutable, but we can't indicate that to tablegen. So manually
+// reverse the operands.
+def : Pat<(X86adc_flag GR8:$src1, relocImm8_su:$src2, EFLAGS),
+          (ADC8ri relocImm8_su:$src2, GR8:$src1)>;
+def : Pat<(X86adc_flag i16relocImmSExt8_su:$src2, GR16:$src1, EFLAGS),
+          (ADC16ri8 GR16:$src1, i16relocImmSExt8_su:$src2)>;
+def : Pat<(X86adc_flag relocImm16_su:$src2, GR16:$src1, EFLAGS),
+          (ADC16ri GR16:$src1, relocImm16_su:$src2)>;
+def : Pat<(X86adc_flag i32relocImmSExt8_su:$src2, GR32:$src1, EFLAGS),
+          (ADC32ri8 GR32:$src1, i32relocImmSExt8_su:$src2)>;
+def : Pat<(X86adc_flag relocImm32_su:$src2, GR32:$src1, EFLAGS),
+          (ADC32ri GR32:$src1, relocImm32_su:$src2)>;
+def : Pat<(X86adc_flag i64relocImmSExt8_su:$src2, GR64:$src1, EFLAGS),
+          (ADC64ri8 GR64:$src1, i64relocImmSExt8_su:$src2)>;
+def : Pat<(X86adc_flag i64relocImmSExt32_su:$src2, GR64:$src1, EFLAGS),
+          (ADC64ri32 GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+def : Pat<(store (X86adc_flag relocImm8_su:$src, (load addr:$dst), EFLAGS), addr:$dst),
+          (ADC8mi addr:$dst, relocImm8_su:$src)>;
+def : Pat<(store (X86adc_flag i16relocImmSExt8_su:$src, (load addr:$dst), EFLAGS), addr:$dst),
+          (ADC16mi8 addr:$dst, i16relocImmSExt8_su:$src)>;
+def : Pat<(store (X86adc_flag relocImm16_su:$src, (load addr:$dst), EFLAGS), addr:$dst),
+          (ADC16mi addr:$dst, relocImm16_su:$src)>;
+def : Pat<(store (X86adc_flag i32relocImmSExt8_su:$src, (load addr:$dst), EFLAGS), addr:$dst),
+          (ADC32mi8 addr:$dst, i32relocImmSExt8_su:$src)>;
+def : Pat<(store (X86adc_flag relocImm32_su:$src, (load addr:$dst), EFLAGS), addr:$dst),
+          (ADC32mi addr:$dst, relocImm32_su:$src)>;
+def : Pat<(store (X86adc_flag i64relocImmSExt8_su:$src, (load addr:$dst), EFLAGS), addr:$dst),
+          (ADC64mi8 addr:$dst, i64relocImmSExt8_su:$src)>;
+def : Pat<(store (X86adc_flag i64relocImmSExt32_su:$src, (load addr:$dst), EFLAGS), addr:$dst),
+          (ADC64mi32 addr:$dst, i64relocImmSExt32_su:$src)>;
+
 //===----------------------------------------------------------------------===//
 // Semantically, test instructions are similar like AND, except they don't
 // generate a result.  From an encoding perspective, they are very different:
@ -1266,6 +1406,25 @@ let isCompare = 1 in {
                             "{$src, %rax|rax, $src}">;
 } // isCompare

+// Patterns to match a relocImm into the immediate field.
+def : Pat<(X86testpat GR8:$src1, relocImm8_su:$src2),
+          (TEST8ri addr:$src1, relocImm8_su:$src2)>;
+def : Pat<(X86testpat GR16:$src1, relocImm16_su:$src2),
+          (TEST16ri GR16:$src1, relocImm16_su:$src2)>;
+def : Pat<(X86testpat GR32:$src1, relocImm32_su:$src2),
+          (TEST32ri GR32:$src1, relocImm32_su:$src2)>;
+def : Pat<(X86testpat GR64:$src1, i64relocImmSExt32_su:$src2),
+          (TEST64ri32 GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+def : Pat<(X86testpat (loadi8 addr:$src1), relocImm8_su:$src2),
+          (TEST8mi addr:$src1, relocImm8_su:$src2)>;
+def : Pat<(X86testpat (loadi16 addr:$src1), relocImm16_su:$src2),
+          (TEST16mi addr:$src1, relocImm16_su:$src2)>;
+def : Pat<(X86testpat (loadi32 addr:$src1), relocImm32_su:$src2),
+          (TEST32mi addr:$src1, relocImm32_su:$src2)>;
+def : Pat<(X86testpat (loadi64 addr:$src1), i64relocImmSExt32_su:$src2),
+          (TEST64mi32 addr:$src1, i64relocImmSExt32_su:$src2)>;
+
 //===----------------------------------------------------------------------===//
 // ANDN Instruction
 //
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@ -1964,10 +1964,6 @@ def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>;
 def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>;
 def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;

-// sub reg, relocImm
-def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt8_su:$src2),
-          (SUB64ri8 GR64:$src1, i64relocImmSExt8_su:$src2)>;
-
 // mul reg, reg
 def : Pat<(mul GR16:$src1, GR16:$src2),
          (IMUL16rr GR16:$src1, GR16:$src2)>;
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@ -837,11 +837,10 @@ def tls64baseaddr : ComplexPattern<i64, 5, "selectTLSADDRAddr",

 def vectoraddr : ComplexPattern<iPTR, 5, "selectVectorAddr", [],[SDNPWantParent]>;

-// A relocatable immediate is either an immediate operand or an operand that can
-// be relocated by the linker to an immediate, such as a regular symbol in
-// non-PIC code.
-def relocImm : ComplexPattern<iAny, 1, "selectRelocImm", [imm, X86Wrapper], [],
-                              0>;
+// A relocatable immediate is an operand that can be relocated by the linker to
+// an immediate, such as a regular symbol in non-PIC code.
+def relocImm : ComplexPattern<iAny, 1, "selectRelocImm",
+                              [X86Wrapper], [], 0>;

 //===----------------------------------------------------------------------===//
 // X86 Instruction Predicate Definitions.
@ -1042,13 +1041,17 @@ def i32immSExt8  : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
 def i64immSExt8  : ImmLeaf<i64, [{ return isInt<8>(Imm); }]>;
 def i64immSExt32 : ImmLeaf<i64, [{ return isInt<32>(Imm); }]>;

-// FIXME: Ideally we would just replace the above i*immSExt* matchers with
-// relocImm-based matchers, but then FastISel would be unable to use them.
+def i16relocImmSExt8 : PatLeaf<(i16 relocImm), [{
+  return isSExtAbsoluteSymbolRef(8, N);
+}]>;
+def i32relocImmSExt8 : PatLeaf<(i32 relocImm), [{
+  return isSExtAbsoluteSymbolRef(8, N);
+}]>;
 def i64relocImmSExt8 : PatLeaf<(i64 relocImm), [{
-  return isSExtRelocImm<8>(N);
+  return isSExtAbsoluteSymbolRef(8, N);
 }]>;
 def i64relocImmSExt32 : PatLeaf<(i64 relocImm), [{
-  return isSExtRelocImm<32>(N);
+  return isSExtAbsoluteSymbolRef(32, N);
 }]>;

 // If we have multiple users of an immediate, it's much smaller to reuse
@ -1068,6 +1071,13 @@ def i64relocImmSExt32 : PatLeaf<(i64 relocImm), [{
 // Eventually, it would be nice to allow ConstantHoisting to merge constants
 // globally for potentially added savings.
 //
+def imm_su : PatLeaf<(imm), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i64immSExt32_su : PatLeaf<(i64immSExt32), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+
 def relocImm8_su : PatLeaf<(i8 relocImm), [{
    return !shouldAvoidImmediateInstFormsForSize(N);
 }]>;
@ -1078,6 +1088,19 @@ def relocImm32_su : PatLeaf<(i32 relocImm), [{
    return !shouldAvoidImmediateInstFormsForSize(N);
 }]>;

+def i16relocImmSExt8_su : PatLeaf<(i16relocImmSExt8), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i32relocImmSExt8_su : PatLeaf<(i32relocImmSExt8), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i64relocImmSExt8_su : PatLeaf<(i64relocImmSExt8), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i64relocImmSExt32_su : PatLeaf<(i64relocImmSExt32), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+
 def i16immSExt8_su : PatLeaf<(i16immSExt8), [{
    return !shouldAvoidImmediateInstFormsForSize(N);
 }]>;
@ -1088,13 +1111,6 @@ def i64immSExt8_su : PatLeaf<(i64immSExt8), [{
    return !shouldAvoidImmediateInstFormsForSize(N);
 }]>;

-def i64relocImmSExt8_su : PatLeaf<(i64relocImmSExt8), [{
-    return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-def i64relocImmSExt32_su : PatLeaf<(i64relocImmSExt32), [{
-    return !shouldAvoidImmediateInstFormsForSize(N);
-}]>;
-
 // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
 // unsigned field.
 def i64immZExt32 : ImmLeaf<i64, [{ return isUInt<32>(Imm); }]>;
@ -1559,7 +1575,7 @@ def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
                   [(set GR16:$dst, imm:$src)]>, OpSize16;
 def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
                   "mov{l}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, relocImm:$src)]>, OpSize32;
+                   [(set GR32:$dst, imm:$src)]>, OpSize32;
 def MOV64ri32 : RIi32S<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
                       "mov{q}\t{$src, $dst|$dst, $src}",
                       [(set GR64:$dst, i64immSExt32:$src)]>;
@ -1567,7 +1583,7 @@ def MOV64ri32 : RIi32S<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
 let isReMaterializable = 1, isMoveImm = 1 in {
 def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
                    "movabs{q}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, relocImm:$src)]>;
+                    [(set GR64:$dst, imm:$src)]>;
 }

 // Longer forms that use a ModR/M byte. Needed for disassembler
@ -1587,19 +1603,31 @@ def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src),
 let SchedRW = [WriteStore] in {
 def MOV8mi  : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
                   "mov{b}\t{$src, $dst|$dst, $src}",
-                   [(store (i8 relocImm8_su:$src), addr:$dst)]>;
+                   [(store (i8 imm_su:$src), addr:$dst)]>;
 def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
                   "mov{w}\t{$src, $dst|$dst, $src}",
-                   [(store (i16 relocImm16_su:$src), addr:$dst)]>, OpSize16;
+                   [(store (i16 imm_su:$src), addr:$dst)]>, OpSize16;
 def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
                   "mov{l}\t{$src, $dst|$dst, $src}",
-                   [(store (i32 relocImm32_su:$src), addr:$dst)]>, OpSize32;
+                   [(store (i32 imm_su:$src), addr:$dst)]>, OpSize32;
 def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
                       "mov{q}\t{$src, $dst|$dst, $src}",
-                       [(store i64relocImmSExt32_su:$src, addr:$dst)]>,
+                       [(store i64immSExt32_su:$src, addr:$dst)]>,
                       Requires<[In64BitMode]>;
 } // SchedRW

+def : Pat<(i32 relocImm:$src), (MOV32ri relocImm:$src)>;
+def : Pat<(i64 relocImm:$src), (MOV64ri relocImm:$src)>;
+
+def : Pat<(store (i8 relocImm8_su:$src), addr:$dst),
+          (MOV8mi addr:$dst, relocImm8_su:$src)>;
+def : Pat<(store (i16 relocImm16_su:$src), addr:$dst),
+          (MOV16mi addr:$dst, relocImm16_su:$src)>;
+def : Pat<(store (i32 relocImm32_su:$src), addr:$dst),
+          (MOV32mi addr:$dst, relocImm32_su:$src)>;
+def : Pat<(store (i64 i64relocImmSExt32_su:$src), addr:$dst),
+          (MOV64mi32 addr:$dst, i64immSExt32_su:$src)>;
+
 let hasSideEffects = 0 in {

 /// Memory offset versions of moves. The immediate is an address mode sized
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@ -472,19 +472,19 @@ def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),

 def ROL8ri   : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
                   "rol{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (rotl GR8:$src1, (i8 relocImm:$src2)))]>;
+                   [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
 def ROL16ri  : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
                   "rol{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (rotl GR16:$src1, (i8 relocImm:$src2)))]>,
+                   [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>,
                   OpSize16;
 def ROL32ri  : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
                   "rol{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (rotl GR32:$src1, (i8 relocImm:$src2)))]>,
+                   [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>,
                   OpSize32;
 def ROL64ri  : RIi8<0xC1, MRM0r, (outs GR64:$dst),
                    (ins GR64:$src1, u8imm:$src2),
                    "rol{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (rotl GR64:$src1, (i8 relocImm:$src2)))]>;
+                    [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;

 // Rotate by 1
 def ROL8r1   : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
@ -570,19 +570,19 @@ def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),

 def ROR8ri   : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
                   "ror{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (rotr GR8:$src1, (i8 relocImm:$src2)))]>;
+                   [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
 def ROR16ri  : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
                   "ror{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (rotr GR16:$src1, (i8 relocImm:$src2)))]>,
+                   [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>,
                   OpSize16;
 def ROR32ri  : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
                   "ror{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (rotr GR32:$src1, (i8 relocImm:$src2)))]>,
+                   [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>,
                   OpSize32;
 def ROR64ri  : RIi8<0xC1, MRM1r, (outs GR64:$dst),
                    (ins GR64:$src1, u8imm:$src2),
                    "ror{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (rotr GR64:$src1, (i8 relocImm:$src2)))]>;
+                    [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;

 // Rotate by 1
 def ROR8r1   : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
@ -1013,3 +1013,21 @@ let Predicates = [HasBMI2] in {
                      (INSERT_SUBREG
                        (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
 }
+
+def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
+          (ROL8ri GR8:$src1, relocImm:$src2)>;
+def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
+          (ROL16ri GR16:$src1, relocImm:$src2)>;
+def : Pat<(rotl GR32:$src1, (i8 relocImm:$src2)),
+          (ROL32ri GR32:$src1, relocImm:$src2)>;
+def : Pat<(rotl GR64:$src1, (i8 relocImm:$src2)),
+          (ROL64ri GR64:$src1, relocImm:$src2)>;
+
+def : Pat<(rotr GR8:$src1, (i8 relocImm:$src2)),
+          (ROR8ri GR8:$src1, relocImm:$src2)>;
+def : Pat<(rotr GR16:$src1, (i8 relocImm:$src2)),
+          (ROR16ri GR16:$src1, relocImm:$src2)>;
+def : Pat<(rotr GR32:$src1, (i8 relocImm:$src2)),
+          (ROR32ri GR32:$src1, relocImm:$src2)>;
+def : Pat<(rotr GR64:$src1, (i8 relocImm:$src2)),
+          (ROR64ri GR64:$src1, relocImm:$src2)>;
--- a/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll
@ -18,7 +18,7 @@ define i32 @test_ret_i32() {
 define i64 @test_ret_i64() {
 ; X32-LABEL: test_ret_i64:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X32-NEXT:    movl $-1, %eax
 ; X32-NEXT:    movl $15, %edx
 ; X32-NEXT:    retl
 ;
--- a/llvm/test/CodeGen/X86/GlobalISel/select-blsr.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/select-blsr.mir
@ -48,7 +48,7 @@ body:             |

    ; CHECK-LABEL: name: test_blsr32rr_nomatch
    ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi
-    ; CHECK: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri 4294967295
+    ; CHECK: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri -1
    ; CHECK: [[DEC32r:%[0-9]+]]:gr32 = DEC32r [[MOV32ri]], implicit-def $eflags
    ; CHECK: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[DEC32r]], [[COPY]], implicit-def $eflags
    ; CHECK: $edi = COPY [[AND32rr]]
--- a/llvm/test/CodeGen/X86/absolute-cmp.ll
+++ b/llvm/test/CodeGen/X86/absolute-cmp.ll
@ -15,8 +15,8 @@ define void @foo8(i64 %val) {
 ; NOPIC:       # %bb.0:
 ; NOPIC-NEXT:    pushq %rax # encoding: [0x50]
 ; NOPIC-NEXT:    .cfi_def_cfa_offset 16
-; NOPIC-NEXT:    cmpq $cmp8@ABS8, %rdi # encoding: [0x48,0x81,0xff,A,A,A,A]
-; NOPIC-NEXT:    # fixup A - offset: 3, value: cmp8@ABS8, kind: reloc_signed_4byte
+; NOPIC-NEXT:    cmpq $cmp8@ABS8, %rdi # encoding: [0x48,0x83,0xff,A]
+; NOPIC-NEXT:    # fixup A - offset: 3, value: cmp8@ABS8, kind: FK_Data_1
 ; NOPIC-NEXT:    ja .LBB0_2 # encoding: [0x77,A]
 ; NOPIC-NEXT:    # fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1
 ; NOPIC-NEXT:  # %bb.1: # %t
@ -31,8 +31,8 @@ define void @foo8(i64 %val) {
 ; PIC:       # %bb.0:
 ; PIC-NEXT:    pushq %rax # encoding: [0x50]
 ; PIC-NEXT:    .cfi_def_cfa_offset 16
-; PIC-NEXT:    cmpq $cmp8@ABS8, %rdi # encoding: [0x48,0x81,0xff,A,A,A,A]
-; PIC-NEXT:    # fixup A - offset: 3, value: cmp8@ABS8, kind: reloc_signed_4byte
+; PIC-NEXT:    cmpq $cmp8@ABS8, %rdi # encoding: [0x48,0x83,0xff,A]
+; PIC-NEXT:    # fixup A - offset: 3, value: cmp8@ABS8, kind: FK_Data_1
 ; PIC-NEXT:    ja .LBB0_2 # encoding: [0x77,A]
 ; PIC-NEXT:    # fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1
 ; PIC-NEXT:  # %bb.1: # %t
--- a/llvm/test/CodeGen/X86/fast-isel-fneg-kill.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-fneg-kill.ll
@ -11,7 +11,7 @@ define void @goo(double* %x, double* %y) nounwind {
 ; CHECK-NEXT: %[[REG10:.*]]:fr64 = MOVSDrm_alt %[[REG1]], 1, $noreg, 0, $noreg :: (load 8 from %ir.x)
 ; CHECK-NEXT: %[[REG6:.*]]:gr64 = MOVSDto64rr killed %[[REG10]]
 ; CHECK-NEXT: %[[REG7:.*]]:gr64 = MOV64ri -9223372036854775808
-; CHECK-NEXT: %[[REG8:.*]]:gr64 = XOR64rr killed %[[REG6]], %[[REG7]], implicit-def $eflags
+; CHECK-NEXT: %[[REG8:.*]]:gr64 = XOR64rr killed %[[REG6]], killed %[[REG7]], implicit-def $eflags
 ; CHECK-NEXT: %[[REG9:.*]]:fr64 = MOV64toSDrr killed %[[REG8]]
 ; CHECK-NEXT: MOVSDmr %[[REG3]], 1, $noreg, 0, $noreg, killed %[[REG9]] :: (store 8 into %ir.y)
 ; CHECK-NEXT: RETQ