forked from OSchip/llvm-project
[AArch64][GlobalISel] Select G_BSWAP for vectors of s32 and s64
There are instructions for these, so mark them as legal. Select the correct instruction in AArch64InstructionSelector.cpp. Update select-bswap.mir and arm64-rev.ll to reflect the changes. llvm-svn: 359331
This commit is contained in:
parent
8d1fb84327
commit
67ab9eb193
|
@ -1087,6 +1087,43 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
|
||||||
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case TargetOpcode::G_BSWAP: {
|
||||||
|
// Handle vector types for G_BSWAP directly.
|
||||||
|
unsigned DstReg = I.getOperand(0).getReg();
|
||||||
|
LLT DstTy = MRI.getType(DstReg);
|
||||||
|
|
||||||
|
// We should only get vector types here; everything else is handled by the
|
||||||
|
// importer right now.
|
||||||
|
if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
|
||||||
|
LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only handle 4 and 2 element vectors for now.
|
||||||
|
// TODO: 16-bit elements.
|
||||||
|
unsigned NumElts = DstTy.getNumElements();
|
||||||
|
if (NumElts != 4 && NumElts != 2) {
|
||||||
|
LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Choose the correct opcode for the supported types. Right now, that's
|
||||||
|
// v2s32, v4s32, and v2s64.
|
||||||
|
unsigned Opc = 0;
|
||||||
|
unsigned EltSize = DstTy.getElementType().getSizeInBits();
|
||||||
|
if (EltSize == 32)
|
||||||
|
Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
|
||||||
|
: AArch64::REV32v16i8;
|
||||||
|
else if (EltSize == 64)
|
||||||
|
Opc = AArch64::REV64v16i8;
|
||||||
|
|
||||||
|
// We should always get something by the time we get here...
|
||||||
|
assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
|
||||||
|
|
||||||
|
I.setDesc(TII.get(Opc));
|
||||||
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
||||||
|
}
|
||||||
|
|
||||||
case TargetOpcode::G_FCONSTANT:
|
case TargetOpcode::G_FCONSTANT:
|
||||||
case TargetOpcode::G_CONSTANT: {
|
case TargetOpcode::G_CONSTANT: {
|
||||||
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
|
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
|
||||||
|
|
|
@ -73,7 +73,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) {
|
||||||
.widenScalarToNextPow2(0);
|
.widenScalarToNextPow2(0);
|
||||||
|
|
||||||
getActionDefinitionsBuilder(G_BSWAP)
|
getActionDefinitionsBuilder(G_BSWAP)
|
||||||
.legalFor({s32, s64})
|
.legalFor({s32, s64, v4s32, v2s32, v2s64})
|
||||||
.clampScalar(0, s16, s64)
|
.clampScalar(0, s16, s64)
|
||||||
.widenScalarToNextPow2(0);
|
.widenScalarToNextPow2(0);
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,7 @@
|
||||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
|
# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
|
||||||
|
|
||||||
--- |
|
|
||||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
|
||||||
|
|
||||||
define void @bswap_s32() { ret void }
|
|
||||||
define void @bswap_s64() { ret void }
|
|
||||||
...
|
...
|
||||||
|
|
||||||
---
|
---
|
||||||
name: bswap_s32
|
name: bswap_s32
|
||||||
legalized: true
|
legalized: true
|
||||||
|
@ -50,4 +44,74 @@ body: |
|
||||||
%0(s64) = COPY $x0
|
%0(s64) = COPY $x0
|
||||||
%1(s64) = G_BSWAP %0
|
%1(s64) = G_BSWAP %0
|
||||||
$x0 = COPY %1
|
$x0 = COPY %1
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: bswap_v4s32
|
||||||
|
alignment: 2
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
machineFunctionInfo: {}
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $q0
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: bswap_v4s32
|
||||||
|
; CHECK: liveins: $q0
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
|
||||||
|
; CHECK: [[REV32v16i8_:%[0-9]+]]:fpr128 = REV32v16i8 [[COPY]]
|
||||||
|
; CHECK: $q0 = COPY [[REV32v16i8_]]
|
||||||
|
; CHECK: RET_ReallyLR implicit $q0
|
||||||
|
%0:fpr(<4 x s32>) = COPY $q0
|
||||||
|
%1:fpr(<4 x s32>) = G_BSWAP %0
|
||||||
|
$q0 = COPY %1(<4 x s32>)
|
||||||
|
RET_ReallyLR implicit $q0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: bswap_v2s32
|
||||||
|
alignment: 2
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
machineFunctionInfo: {}
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $d0
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: bswap_v2s32
|
||||||
|
; CHECK: liveins: $d0
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
|
||||||
|
; CHECK: [[REV32v8i8_:%[0-9]+]]:fpr64 = REV32v8i8 [[COPY]]
|
||||||
|
; CHECK: $d0 = COPY [[REV32v8i8_]]
|
||||||
|
; CHECK: RET_ReallyLR implicit $d0
|
||||||
|
%0:fpr(<2 x s32>) = COPY $d0
|
||||||
|
%1:fpr(<2 x s32>) = G_BSWAP %0
|
||||||
|
$d0 = COPY %1(<2 x s32>)
|
||||||
|
RET_ReallyLR implicit $d0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: bswap_v2s64
|
||||||
|
alignment: 2
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
machineFunctionInfo: {}
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $q0
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: bswap_v2s64
|
||||||
|
; CHECK: liveins: $q0
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
|
||||||
|
; CHECK: [[REV64v16i8_:%[0-9]+]]:fpr128 = REV64v16i8 [[COPY]]
|
||||||
|
; CHECK: $q0 = COPY [[REV64v16i8_]]
|
||||||
|
; CHECK: RET_ReallyLR implicit $q0
|
||||||
|
%0:fpr(<2 x s64>) = COPY $q0
|
||||||
|
%1:fpr(<2 x s64>) = G_BSWAP %0
|
||||||
|
$q0 = COPY %1(<2 x s64>)
|
||||||
|
RET_ReallyLR implicit $q0
|
||||||
|
|
||||||
...
|
...
|
||||||
|
|
|
@ -396,6 +396,10 @@ define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: rev32.16b v0, v0
|
; CHECK-NEXT: rev32.16b v0, v0
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
|
; GISEL-LABEL: test_vrev32_bswap:
|
||||||
|
; GISEL: // %bb.0:
|
||||||
|
; GISEL-NEXT: rev32.16b v0, v0
|
||||||
|
; GISEL-NEXT: ret
|
||||||
%bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
|
%bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
|
||||||
ret <4 x i32> %bswap
|
ret <4 x i32> %bswap
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue