[AArch64][GlobalISel] If a G_BUILD_VECTOR operands are all G_CONSTANT then assign to gpr bank.

Even if the type is s8/s16, assigning to gpr is preferable with constants because worst case we can select via a constant pool load, and without cross-bank copies to the FPR bank more patterns can be imported later.
2020-09-25 17:38:10 -07:00 · 2020-09-25 17:38:10 -07:00 · 546e460a00
parent 83e3ea2cfc
commit 546e460a00
3 changed files with 49 additions and 4 deletions
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@ -13,6 +13,7 @@

 #include "AArch64RegisterBankInfo.h"
 #include "AArch64InstrInfo.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
@ -837,10 +838,17 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {

    // Get the instruction that defined the source operand reg, and check if
    // it's a floating point operation. Or, if it's a type like s16 which
-    // doesn't have a exact size gpr register class.
+    // doesn't have a exact size gpr register class. The exception is if the
+    // build_vector has all constant operands, which may be better to leave as
+    // gpr without copies, so it can be matched in imported patterns.
    MachineInstr *DefMI = MRI.getVRegDef(VReg);
    unsigned DefOpc = DefMI->getOpcode();
    const LLT SrcTy = MRI.getType(VReg);
+    if (all_of(MI.operands(), [&](const MachineOperand &Op) {
+          return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
+                                   TargetOpcode::G_CONSTANT;
+        }))
+      break;
    if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
        SrcTy.getSizeInBits() < 32) {
      // Have a floating point op.
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir
@ -32,3 +32,40 @@ body:             |
    RET_ReallyLR implicit $q0

 ...
+---
+name:            g_constant_operands_on_gpr
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    ; Check that we assign GPR to the operands even though they're < 32b in size.
+    ; They're all constant, so we can select it via a constant-pool load if needed
+    ; and this form is more amenable to selection by patterns (without x-bank copies).
+    ; CHECK-LABEL: name: g_constant_operands_on_gpr
+    ; CHECK: [[C:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 4
+    ; CHECK: [[C1:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 10
+    ; CHECK: [[C2:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 3
+    ; CHECK: [[C3:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 11
+    ; CHECK: [[C4:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 15
+    ; CHECK: [[C5:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 44
+    ; CHECK: [[C6:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 22
+    ; CHECK: [[C7:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 19
+    ; CHECK: [[C8:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 55
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:fpr(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C1]](s8), [[C2]](s8), [[C3]](s8), [[C4]](s8), [[C]](s8), [[C1]](s8), [[C5]](s8), [[C6]](s8), [[C4]](s8), [[C]](s8), [[C7]](s8), [[C2]](s8), [[C3]](s8), [[C4]](s8), [[C8]](s8)
+    ; CHECK: $q0 = COPY [[BUILD_VECTOR]](<16 x s8>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %1:_(s8) = G_CONSTANT i8 4
+    %2:_(s8) = G_CONSTANT i8 10
+    %3:_(s8) = G_CONSTANT i8 3
+    %4:_(s8) = G_CONSTANT i8 11
+    %5:_(s8) = G_CONSTANT i8 15
+    %6:_(s8) = G_CONSTANT i8 44
+    %7:_(s8) = G_CONSTANT i8 22
+    %8:_(s8) = G_CONSTANT i8 19
+    %9:_(s8) = G_CONSTANT i8 55
+    %0:_(<16 x s8>) = G_BUILD_VECTOR %1(s8), %2(s8), %3(s8), %4(s8), %5(s8), %1(s8), %2(s8), %6(s8), %7(s8), %5(s8), %1(s8), %8(s8), %3(s8), %4(s8), %5(s8), %9(s8)
+    $q0 = COPY %0(<16 x s8>)
+    RET_ReallyLR implicit $q0
+
+...
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@ -972,7 +972,7 @@ define <4 x i16> @abspattern2(<4 x i16> %a) nounwind {
 ; DAG-NEXT: ret

 ; For GlobalISel, this generates terrible code until we can pattern match this to abs.
-; GISEL-DAG: sub.4h
+; GISEL-DAG: neg.4h
 ; GISEL-DAG: cmgt.4h
 ; GISEL: csel
 ; GISEL: csel
@ -1015,8 +1015,8 @@ define <8 x i16> @abspattern5(<8 x i16> %a) nounwind {
 ; DAG: abs.8h
 ; DAG-NEXT: ret

-; GISEL: cmgt.8h
-; GISEL: sub.8h
+; GISEL-DAG: cmgt.8h
+; GISEL-DAG: neg.8h
 ; GISEL: csel
 ; GISEL: csel
 ; GISEL: csel