From 67ab9eb193ce74b0a28bd2413189a34d929cc46c Mon Sep 17 00:00:00 2001
From: Jessica Paquette <jpaquette@apple.com>
Date: Fri, 26 Apr 2019 18:00:01 +0000
Subject: [PATCH] [AArch64][GlobalISel] Select G_BSWAP for vectors of s32 and
 s64

There are instructions for these, so mark them as legal. Select the correct
instruction in AArch64InstructionSelector.cpp.

Update select-bswap.mir and arm64-rev.ll to reflect the changes.

llvm-svn: 359331
---
 .../AArch64/AArch64InstructionSelector.cpp    | 37 +++++++++
 .../Target/AArch64/AArch64LegalizerInfo.cpp   |  2 +-
 .../AArch64/GlobalISel/select-bswap.mir       | 76 +++++++++++++++++--
 llvm/test/CodeGen/AArch64/arm64-rev.ll        |  4 +
 4 files changed, 112 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 8a3310bf4e31..108658d1580b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -1087,6 +1087,43 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
   }
 
+  case TargetOpcode::G_BSWAP: {
+    // Handle vector types for G_BSWAP directly.
+    unsigned DstReg = I.getOperand(0).getReg();
+    LLT DstTy = MRI.getType(DstReg);
+
+    // We should only get vector types here; everything else is handled by the
+    // importer right now.
+    if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
+      LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
+      return false;
+    }
+
+    // Only handle 4 and 2 element vectors for now.
+    // TODO: 16-bit elements.
+    unsigned NumElts = DstTy.getNumElements();
+    if (NumElts != 4 && NumElts != 2) {
+      LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
+      return false;
+    }
+
+    // Choose the correct opcode for the supported types. Right now, that's
+    // v2s32, v4s32, and v2s64.
+    unsigned Opc = 0;
+    unsigned EltSize = DstTy.getElementType().getSizeInBits();
+    if (EltSize == 32)
+      Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
+                                          : AArch64::REV32v16i8;
+    else if (EltSize == 64)
+      Opc = AArch64::REV64v16i8;
+
+    // We should always get something by the time we get here...
+    assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
+
+    I.setDesc(TII.get(Opc));
+    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+  }
+
   case TargetOpcode::G_FCONSTANT:
   case TargetOpcode::G_CONSTANT: {
     const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
diff --git a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 01661fbdaeb4..aeedeba73ab7 100644
--- a/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -73,7 +73,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
       .widenScalarToNextPow2(0);
 
   getActionDefinitionsBuilder(G_BSWAP)
-      .legalFor({s32, s64})
+      .legalFor({s32, s64, v4s32, v2s32, v2s64})
       .clampScalar(0, s16, s64)
       .widenScalarToNextPow2(0);
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-bswap.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-bswap.mir
index 244803976fd6..d1978c2a2156 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-bswap.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-bswap.mir
@@ -1,13 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
 
---- |
-  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-
-  define void @bswap_s32() { ret void }
-  define void @bswap_s64() { ret void }
 ...
-
 ---
 name:            bswap_s32
 legalized:       true
@@ -50,4 +44,74 @@ body:             |
     %0(s64) = COPY $x0
     %1(s64) = G_BSWAP %0
     $x0 = COPY %1
+
+...
+---
+name:            bswap_v4s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: bswap_v4s32
+    ; CHECK: liveins: $q0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[REV32v16i8_:%[0-9]+]]:fpr128 = REV32v16i8 [[COPY]]
+    ; CHECK: $q0 = COPY [[REV32v16i8_]]
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:fpr(<4 x s32>) = COPY $q0
+    %1:fpr(<4 x s32>) = G_BSWAP %0
+    $q0 = COPY %1(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            bswap_v2s32
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $d0
+
+    ; CHECK-LABEL: name: bswap_v2s32
+    ; CHECK: liveins: $d0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[REV32v8i8_:%[0-9]+]]:fpr64 = REV32v8i8 [[COPY]]
+    ; CHECK: $d0 = COPY [[REV32v8i8_]]
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:fpr(<2 x s32>) = COPY $d0
+    %1:fpr(<2 x s32>) = G_BSWAP %0
+    $d0 = COPY %1(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            bswap_v2s64
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $q0
+
+    ; CHECK-LABEL: name: bswap_v2s64
+    ; CHECK: liveins: $q0
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[REV64v16i8_:%[0-9]+]]:fpr128 = REV64v16i8 [[COPY]]
+    ; CHECK: $q0 = COPY [[REV64v16i8_]]
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:fpr(<2 x s64>) = COPY $q0
+    %1:fpr(<2 x s64>) = G_BSWAP %0
+    $q0 = COPY %1(<2 x s64>)
+    RET_ReallyLR implicit $q0
+
 ...
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index ef14c7712275..a04fe05137b5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -396,6 +396,10 @@ define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rev32.16b v0, v0
 ; CHECK-NEXT:    ret
+; GISEL-LABEL: test_vrev32_bswap:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    rev32.16b v0, v0
+; GISEL-NEXT:    ret
   %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
   ret <4 x i32> %bswap
 }