[SystemZ] Prefer LHI;ST... over LAY;MV...

If we had a store of an integer to memory, and the integer and store size were suitable for a form of MV..., we used MV... no matter what. We could then have sequences like: lay %r2, 0(%r3,%r4) mvi 0(%r2), 4 In these cases it seems better to force the constant into a register and use a normal store: lhi %r2, 4 stc %r2, 0(%r3, %r4) since %r2 is more likely to be hoisted and is easier to rematerialize. llvm-svn: 189098
2013-08-23 11:18:53 +00:00 · 2013-08-23 11:18:53 +00:00 · a481f58542
parent 37cd6cfba2
commit a481f58542
9 changed files with 93 additions and 58 deletions
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@ -159,6 +159,12 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
  bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr,
                    SDValue &Base, SDValue &Disp);

+  // Try to match Addr as a FormBDX address with displacement type DR.
+  // Return true on success and if the result had no index.  Store the
+  // base and displacement in Base and Disp respectively.
+  bool selectMVIAddr(SystemZAddressingMode::DispRange DR, SDValue Addr,
+                     SDValue &Base, SDValue &Disp);
+
  // Try to match Addr as a FormBDX* address of form Form with
  // displacement type DR.  Return true on success, storing the base,
  // displacement and index in Base, Disp and Index respectively.
@ -189,6 +195,14 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
    return selectBDAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp);
  }

+  // MVI matching routines used by SystemZOperands.td.
+  bool selectMVIAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) {
+    return selectMVIAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp);
+  }
+  bool selectMVIAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) {
+    return selectMVIAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp);
+  }
+
  // BDX matching routines used by SystemZOperands.td.
  bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp,
                           SDValue &Index) {
@ -575,6 +589,17 @@ bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR,
  return true;
 }

+bool SystemZDAGToDAGISel::selectMVIAddr(SystemZAddressingMode::DispRange DR,
+                                        SDValue Addr, SDValue &Base,
+                                        SDValue &Disp) {
+  SystemZAddressingMode AM(SystemZAddressingMode::FormBDXNormal, DR);
+  if (!selectAddress(Addr, AM) || AM.Index.getNode())
+    return false;
+
+  getAddressOperands(AM, Addr.getValueType(), Base, Disp);
+  return true;
+}
+
 bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form,
                                        SystemZAddressingMode::DispRange DR,
                                        SDValue Addr, SDValue &Base,
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@ -627,27 +627,33 @@ class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
  let mayStore = 1;
 }

+// StoreSI* instructions are used to store an integer to memory, but the
+// addresses are more restricted than for normal stores.  If we are in the
+// situation of having to force either the address into a register or the
+// constant into a register, it's usually better to do the latter.
+// We therefore match the address in the same way as a normal store and
+// only use the StoreSI* instruction if the matched address is suitable.
 class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
-              Immediate imm, AddressingMode mode = bdaddr12only>
-  : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
+              Immediate imm>
+  : InstSI<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
           mnemonic#"\t$BD1, $I2",
-           [(operator imm:$I2, mode:$BD1)]> {
+           [(operator imm:$I2, mviaddr12pair:$BD1)]> {
  let mayStore = 1;
 }

 class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-               Immediate imm, AddressingMode mode = bdaddr20only>
-  : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
+               Immediate imm>
+  : InstSIY<opcode, (outs), (ins mviaddr20pair:$BD1, imm:$I2),
            mnemonic#"\t$BD1, $I2",
-            [(operator imm:$I2, mode:$BD1)]> {
+            [(operator imm:$I2, mviaddr20pair:$BD1)]> {
  let mayStore = 1;
 }

 class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
               Immediate imm>
-  : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
+  : InstSIL<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
            mnemonic#"\t$BD1, $I2",
-            [(operator imm:$I2, bdaddr12only:$BD1)]> {
+            [(operator imm:$I2, mviaddr12pair:$BD1)]> {
  let mayStore = 1;
 }

@ -655,9 +661,9 @@ multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
                       SDPatternOperator operator, Immediate imm> {
  let DispKey = mnemonic in {
    let DispSize = "12" in
-      def "" : StoreSI<mnemonic, siOpcode, operator, imm, bdaddr12pair>;
+      def "" : StoreSI<mnemonic, siOpcode, operator, imm>;
    let DispSize = "20" in
-      def Y  : StoreSIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>;
+      def Y  : StoreSIY<mnemonic#"y", siyOpcode, operator, imm>;
  }
 }

--- a/llvm/lib/Target/SystemZ/SystemZOperands.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperands.td
@ -435,6 +435,7 @@ def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr",  "64", "12", "Len8">;
 // <type> is one of:
 //   shift    : base + displacement (32-bit)
 //   bdaddr   : base + displacement
+//   mviaddr  : like bdaddr, but reject cases with a natural index
 //   bdxaddr  : base + displacement + index
 //   laaddr   : like bdxaddr, but used for Load Address operations
 //   dynalloc : base + displacement + index + ADJDYNALLOC
@ -460,6 +461,8 @@ def bdaddr12only      : BDMode <"BDAddr",   "64", "12", "Only">;
 def bdaddr12pair      : BDMode <"BDAddr",   "64", "12", "Pair">;
 def bdaddr20only      : BDMode <"BDAddr",   "64", "20", "Only">;
 def bdaddr20pair      : BDMode <"BDAddr",   "64", "20", "Pair">;
+def mviaddr12pair     : BDMode <"MVIAddr",  "64", "12", "Pair">;
+def mviaddr20pair     : BDMode <"MVIAddr",  "64", "20", "Pair">;
 def bdxaddr12only     : BDXMode<"BDXAddr",  "64", "12", "Only">;
 def bdxaddr12pair     : BDXMode<"BDXAddr",  "64", "12", "Pair">;
 def bdxaddr20only     : BDXMode<"BDXAddr",  "64", "20", "Only">;
--- a/llvm/test/CodeGen/SystemZ/alloca-02.ll
+++ b/llvm/test/CodeGen/SystemZ/alloca-02.ll
@ -21,18 +21,21 @@ define i64 @f1(i64 %length, i64 %index) {
 ;
 ; CHECK-C-LABEL: f1:
 ; CHECK-C: lgr %r15, [[ADDR:%r[1-5]]]
-; CHECK-C: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]])
-; CHECK-C: mvi 0([[TMP]]), 2
+; CHECK-C-DAG: la %r2, 160([[ADDR]])
+; CHECK-C-DAG: lhi [[TMP:%r[0-5]]], 2
+; CHECK-C: stc [[TMP]], 0({{%r3,%r2|%r2,%r3}})
 ;
 ; CHECK-D-LABEL: f1:
 ; CHECK-D: lgr %r15, [[ADDR:%r[1-5]]]
-; CHECK-D: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]])
-; CHECK-D: mvi 4095([[TMP]]), 3
+; CHECK-D-DAG: la %r2, 160([[ADDR]])
+; CHECK-D-DAG: lhi [[TMP:%r[0-5]]], 3
+; CHECK-D: stc [[TMP]], 4095({{%r3,%r2|%r2,%r3}})
 ;
 ; CHECK-E-LABEL: f1:
 ; CHECK-E: lgr %r15, [[ADDR:%r[1-5]]]
-; CHECK-E: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]])
-; CHECK-E: mviy 4096([[TMP]]), 4
+; CHECK-E-DAG: la %r2, 160([[ADDR]])
+; CHECK-E-DAG: lhi [[TMP:%r[0-5]]], 4
+; CHECK-E: stcy [[TMP]], 4096({{%r3,%r2|%r2,%r3}})
  %a = alloca i8, i64 %length
  store volatile i8 0, i8 *%a
  %b = getelementptr i8 *%a, i64 4095
--- a/llvm/test/CodeGen/SystemZ/frame-13.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-13.ll
@ -182,17 +182,16 @@ define void @f8() {
 }

 ; Check a case where the original displacement is out of range.  The backend
-; should force an LAY from the outset.  We don't yet do any kind of anchor
-; optimization, so there should be no offset on the MVHI itself.
+; should force STY to be used instead.
 define void @f9() {
 ; CHECK-NOFP-LABEL: f9:
-; CHECK-NOFP: lay %r1, 12296(%r15)
-; CHECK-NOFP: mvhi 0(%r1), 42
+; CHECK-NOFP: lhi [[TMP:%r[0-5]]], 42
+; CHECK-NOFP: sty [[TMP]], 12296(%r15)
 ; CHECK-NOFP: br %r14
 ;
 ; CHECK-FP-LABEL: f9:
-; CHECK-FP: lay %r1, 12296(%r11)
-; CHECK-FP: mvhi 0(%r1), 42
+; CHECK-FP: lhi [[TMP:%r[0-5]]], 42
+; CHECK-FP: sty [[TMP]], 12296(%r11)
 ; CHECK-FP: br %r14
  %region1 = alloca [2006 x i32], align 8
  %region2 = alloca [2006 x i32], align 8
--- a/llvm/test/CodeGen/SystemZ/int-const-03.ll
+++ b/llvm/test/CodeGen/SystemZ/int-const-03.ll
@ -139,11 +139,11 @@ define void @f14(i8 *%src) {
  ret void
 }

-; Check that MVI does not allow an index
+; Check that MVI does not allow an index.  We prefer STC in that case.
 define void @f15(i64 %src, i64 %index) {
 ; CHECK-LABEL: f15:
-; CHECK: agr %r2, %r3
-; CHECK: mvi 4095(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: stc [[TMP]], 4095({{%r2,%r3|%r3,%r2}}
 ; CHECK: br %r14
  %add1 = add i64 %src, %index
  %add2 = add i64 %add1, 4095
@ -152,11 +152,11 @@ define void @f15(i64 %src, i64 %index) {
  ret void
 }

-; Check that MVIY does not allow an index
+; Check that MVIY does not allow an index.  We prefer STCY in that case.
 define void @f16(i64 %src, i64 %index) {
 ; CHECK-LABEL: f16:
-; CHECK: agr %r2, %r3
-; CHECK: mviy 4096(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: stcy [[TMP]], 4096({{%r2,%r3|%r3,%r2}}
 ; CHECK: br %r14
  %add1 = add i64 %src, %index
  %add2 = add i64 %add1, 4096
--- a/llvm/test/CodeGen/SystemZ/int-const-04.ll
+++ b/llvm/test/CodeGen/SystemZ/int-const-04.ll
@ -75,34 +75,34 @@ define void @f8(i16 *%a) {
  ret void
 }

-; Check the next halfword up, which needs separate address logic.
-; Other sequences besides this one would be OK.
+; Check the next halfword up, which is out of range.  We prefer STHY
+; in that case.
 define void @f9(i16 *%a) {
 ; CHECK-LABEL: f9:
-; CHECK: aghi %r2, 4096
-; CHECK: mvhhi 0(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: sthy [[TMP]], 4096(%r2)
 ; CHECK: br %r14
  %ptr = getelementptr i16 *%a, i64 2048
  store i16 42, i16 *%ptr
  ret void
 }

-; Check negative displacements, which also need separate address logic.
+; Check negative displacements, for which we again prefer STHY.
 define void @f10(i16 *%a) {
 ; CHECK-LABEL: f10:
-; CHECK: aghi %r2, -2
-; CHECK: mvhhi 0(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: sthy [[TMP]], -2(%r2)
 ; CHECK: br %r14
  %ptr = getelementptr i16 *%a, i64 -1
  store i16 42, i16 *%ptr
  ret void
 }

-; Check that MVHHI does not allow an index
+; Check that MVHHI does not allow an index.
 define void @f11(i64 %src, i64 %index) {
 ; CHECK-LABEL: f11:
-; CHECK: agr %r2, %r3
-; CHECK: mvhhi 0(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: sth [[TMP]], 0({{%r2,%r3|%r3,%r2}})
 ; CHECK: br %r14
  %add = add i64 %src, %index
  %ptr = inttoptr i64 %add to i16 *
--- a/llvm/test/CodeGen/SystemZ/int-const-05.ll
+++ b/llvm/test/CodeGen/SystemZ/int-const-05.ll
@ -66,34 +66,33 @@ define void @f7(i32 *%a) {
  ret void
 }

-; Check the next word up, which needs separate address logic.
-; Other sequences besides this one would be OK.
+; Check the next word up, which is out of range.  We prefer STY in that case.
 define void @f8(i32 *%a) {
 ; CHECK-LABEL: f8:
-; CHECK: aghi %r2, 4096
-; CHECK: mvhi 0(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: sty [[TMP]], 4096(%r2)
 ; CHECK: br %r14
  %ptr = getelementptr i32 *%a, i64 1024
  store i32 42, i32 *%ptr
  ret void
 }

-; Check negative displacements, which also need separate address logic.
+; Check negative displacements, for which we again prefer STY.
 define void @f9(i32 *%a) {
 ; CHECK-LABEL: f9:
-; CHECK: aghi %r2, -4
-; CHECK: mvhi 0(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: sty [[TMP]], -4(%r2)
 ; CHECK: br %r14
  %ptr = getelementptr i32 *%a, i64 -1
  store i32 42, i32 *%ptr
  ret void
 }

-; Check that MVHI does not allow an index
+; Check that MVHI does not allow an index.
 define void @f10(i64 %src, i64 %index) {
 ; CHECK-LABEL: f10:
-; CHECK: agr %r2, %r3
-; CHECK: mvhi 0(%r2), 42
+; CHECK: lhi [[TMP:%r[0-5]]], 42
+; CHECK: st [[TMP]], 0({{%r2,%r3|%r3,%r2}})
 ; CHECK: br %r14
  %add = add i64 %src, %index
  %ptr = inttoptr i64 %add to i32 *
--- a/llvm/test/CodeGen/SystemZ/int-const-06.ll
+++ b/llvm/test/CodeGen/SystemZ/int-const-06.ll
@ -66,34 +66,34 @@ define void @f7(i64 *%a) {
  ret void
 }

-; Check the next doubleword up, which needs separate address logic.
-; Other sequences besides this one would be OK.
+; Check the next doubleword up, which is out of range.  We prefer STG
+; in that case.
 define void @f8(i64 *%a) {
 ; CHECK-LABEL: f8:
-; CHECK: aghi %r2, 4096
-; CHECK: mvghi 0(%r2), 42
+; CHECK: lghi [[TMP:%r[0-5]]], 42
+; CHECK: stg [[TMP]], 4096(%r2)
 ; CHECK: br %r14
  %ptr = getelementptr i64 *%a, i64 512
  store i64 42, i64 *%ptr
  ret void
 }

-; Check negative displacements, which also need separate address logic.
+; Check negative displacements, for which we again prefer STG.
 define void @f9(i64 *%a) {
 ; CHECK-LABEL: f9:
-; CHECK: aghi %r2, -8
-; CHECK: mvghi 0(%r2), 42
+; CHECK: lghi [[TMP:%r[0-5]]], 42
+; CHECK: stg [[TMP]], -8(%r2)
 ; CHECK: br %r14
  %ptr = getelementptr i64 *%a, i64 -1
  store i64 42, i64 *%ptr
  ret void
 }

-; Check that MVGHI does not allow an index
+; Check that MVGHI does not allow an index.
 define void @f10(i64 %src, i64 %index) {
 ; CHECK-LABEL: f10:
-; CHECK: agr %r2, %r3
-; CHECK: mvghi 0(%r2), 42
+; CHECK: lghi [[TMP:%r[0-5]]], 42
+; CHECK: stg [[TMP]], 0({{%r2,%r3|%r3,%r2}})
 ; CHECK: br %r14
  %add = add i64 %src, %index
  %ptr = inttoptr i64 %add to i64 *