Micro-optimization:

This code: float floatingPointComparison(float x, float y) { double product = (double)x * y; if (product == 0.0) return product; return product - 1.0; } produces this: _floatingPointComparison: 0000000000000000 cvtss2sd %xmm1,%xmm1 0000000000000004 cvtss2sd %xmm0,%xmm0 0000000000000008 mulsd %xmm1,%xmm0 000000000000000c pxor %xmm1,%xmm1 0000000000000010 ucomisd %xmm1,%xmm0 0000000000000014 jne 0x00000004 0000000000000016 jp 0x00000002 0000000000000018 jmp 0x00000008 000000000000001a addsd 0x00000006(%rip),%xmm0 0000000000000022 cvtsd2ss %xmm0,%xmm0 0000000000000026 ret The "jne/jp/jmp" sequence can be reduced to this instead: _floatingPointComparison: 0000000000000000 cvtss2sd %xmm1,%xmm1 0000000000000004 cvtss2sd %xmm0,%xmm0 0000000000000008 mulsd %xmm1,%xmm0 000000000000000c pxor %xmm1,%xmm1 0000000000000010 ucomisd %xmm1,%xmm0 0000000000000014 jp 0x00000002 0000000000000016 je 0x00000008 0000000000000018 addsd 0x00000006(%rip),%xmm0 0000000000000020 cvtsd2ss %xmm0,%xmm0 0000000000000024 ret for a savings of 2 bytes. This xform can happen when we recognize that jne and jp jump to the same "true" MBB, the unconditional jump would jump to the "false" MBB, and the "true" branch is the fall-through MBB. llvm-svn: 97766
2010-03-05 00:24:26 +00:00 · 2010-03-05 00:24:26 +00:00 · 6517f88f25
parent 2061c84109
commit 6517f88f25
2 changed files with 74 additions and 13 deletions
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@ -1786,6 +1786,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                           const SmallVectorImpl<MachineOperand> &Cond) const {
  // FIXME this should probably have a DebugLoc operand
  DebugLoc dl = DebugLoc::getUnknownLoc();
+
  // Shouldn't be a fall through.
  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
  assert((Cond.size() == 1 || Cond.size() == 0) &&
@ -1799,34 +1800,72 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
  }

  // Conditional branch.
+  const MachineBasicBlock *NextBB = next(&MBB);
  unsigned Count = 0;
  X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
+
+  // In a two-way conditional branch, if the fall-through block is the
+  // "false" branch of the conditional jumps, we can cut out the
+  // unconditional jump by rearranging the conditional jumps. This saves a
+  // few bytes and improves performance. I.e., for COND_NE_OR_P:
+  //
+  //     JNE L1
+  //     JP  L1
+  //     JMP L2
+  // L1:
+  //     ...
+  // L2:
+  //     ...
+  //
+  // to:
+  // 
+  //     JP  L1
+  //     JE  L2
+  // L1:
+  //     ...
+  // L2:
+  //     ...
+  //
+  // Similarly for COND_NP_OR_E.
  switch (CC) {
+  default:
+    BuildMI(&MBB, dl, get(GetCondBranchFromCond(CC))).addMBB(TBB);
+    ++Count;
+    break;
  case X86::COND_NP_OR_E:
    // Synthesize NP_OR_E with two branches.
-    BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB);
-    ++Count;
-    BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB);
-    ++Count;
+    if (FBB && FBB == NextBB) {
+      BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB);
+      BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(FBB);
+      FBB = 0;
+    } else {
+      BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB);
+      BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB);
+    }
+
+    Count += 2;
    break;
  case X86::COND_NE_OR_P:
    // Synthesize NE_OR_P with two branches.
-    BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB);
-    ++Count;
-    BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB);
-    ++Count;
+    if (FBB && FBB == NextBB) {
+      BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB);
+      BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(FBB);
+      FBB = 0;
+    } else {
+      BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB);
+      BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB);
+    }
+
+    Count += 2;
    break;
-  default: {
-    unsigned Opc = GetCondBranchFromCond(CC);
-    BuildMI(&MBB, dl, get(Opc)).addMBB(TBB);
-    ++Count;
-  }
  }
+
  if (FBB) {
    // Two-way Conditional branch. Insert the second branch.
    BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB);
    ++Count;
  }
+
  return Count;
 }

--- a/llvm/test/CodeGen/X86/jump-opt.ll
+++ b/llvm/test/CodeGen/X86/jump-opt.ll
@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
+
+; <rdar://problem/7598384>
+define float @test1(float %x, float %y) nounwind readnone optsize ssp {
+; CHECK:      jp
+; CHECK-NEXT: je
+entry:
+  %0 = fpext float %x to double
+  %1 = fpext float %y to double
+  %2 = fmul double %0, %1
+  %3 = fcmp oeq double %2, 0.000000e+00
+  br i1 %3, label %bb2, label %bb1
+
+bb1:
+  %4 = fadd double %2, -1.000000e+00
+  br label %bb2
+
+bb2:
+  %.0.in = phi double [ %4, %bb1 ], [ %2, %entry ]
+  %.0 = fptrunc double %.0.in to float
+  ret float %.0
+}