[AArch64][GlobalISel] Select FCMPSri/FCMPDri when comparing against 0.0

Add support for selecting FCMPSri and FCMPDri when comparing against 0.0, and factor out opcode selection for G_FCMP into its own function. Add a test to show that we don't do this with other immediates. Differential Revision: https://reviews.llvm.org/D62539 llvm-svn: 361888
2019-05-28 22:52:49 +00:00 · 2019-05-28 22:52:49 +00:00 · b73ea75b38
parent 1efbe67414
commit b73ea75b38
2 changed files with 83 additions and 13 deletions
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@ -741,6 +741,20 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
  return GenericOpc;
 }

+/// Helper function to select the opcode for a G_FCMP.
+static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
+  // If this is a compare against +0.0, then we don't have to explicitly
+  // materialize a constant.
+  const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
+  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
+  unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
+  if (OpSize != 32 && OpSize != 64)
+    return 0;
+  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
+                              {AArch64::FCMPSri, AArch64::FCMPDri}};
+  return CmpOpcTbl[ShouldUseImm][OpSize == 64];
+}
+
 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
  switch (P) {
  default:
@ -1845,15 +1859,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
      return false;
    }

-    unsigned CmpOpc = 0;
-    LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
-    if (CmpTy == LLT::scalar(32)) {
-      CmpOpc = AArch64::FCMPSrr;
-    } else if (CmpTy == LLT::scalar(64)) {
-      CmpOpc = AArch64::FCMPDrr;
-    } else {
+    unsigned CmpOpc = selectFCMPOpc(I, MRI);
+    if (!CmpOpc)
      return false;
-    }

    // FIXME: regbank

@ -1861,9 +1869,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
    changeFCMPPredToAArch64CC(
        (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);

-    MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
-                               .addUse(I.getOperand(2).getReg())
-                               .addUse(I.getOperand(3).getReg());
+    // Partially build the compare. Decide if we need to add a use for the
+    // third operand based off whether or not we're comparing against 0.0.
+    auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
+                     .addUse(I.getOperand(2).getReg());
+
+    // If we don't have an immediate compare, then we need to add a use of the
+    // register which wasn't used for the immediate.
+    // Note that the immediate will always be the last operand.
+    if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
+      CmpMI = CmpMI.addUse(I.getOperand(3).getReg());

    const unsigned DefReg = I.getOperand(0).getReg();
    unsigned Def1Reg = DefReg;
@ -1893,8 +1908,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
      constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
      constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
    }
-
-    constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
+    constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
    constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);

    I.eraseFromParent();
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir
@ -0,0 +1,56 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+# Verify that we get FCMPSri when we compare against 0.0 and that we get
+# FCMPSrr otherwise.
+
+...
+---
+name:            zero
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $s0, $s1
+
+    ; CHECK-LABEL: name: zero
+    ; CHECK: liveins: $s0, $s1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
+    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: $s0 = COPY [[CSINCWr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:fpr(s32) = COPY $s0
+    %1:fpr(s32) = COPY $s1
+    %2:fpr(s32) = G_FCONSTANT float 0.000000e+00
+    %3:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2
+    $s0 = COPY %3(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            notzero
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.1:
+    liveins: $s0, $s1
+
+    ; CHECK-LABEL: name: notzero
+    ; CHECK: liveins: $s0, $s1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: [[FMOVSi:%[0-9]+]]:fpr32 = FMOVSi 112
+    ; CHECK: FCMPSrr [[COPY]], [[FMOVSi]], implicit-def $nzcv
+    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: $s0 = COPY [[CSINCWr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:fpr(s32) = COPY $s0
+    %1:fpr(s32) = COPY $s1
+    %2:fpr(s32) = G_FCONSTANT float 1.000000e+00
+    %3:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2
+    $s0 = COPY %3(s32)
+    RET_ReallyLR implicit $s0