AMDGPU/GlobalISel: RegBankSelect for DS ordered add/swap

llvm-svn: 364811
This commit is contained in:
Matt Arsenault 2019-07-01 17:04:57 +00:00
parent 732149b24e
commit 1094e6a814
4 changed files with 174 additions and 2 deletions

View File

@ -395,6 +395,7 @@ class AMDGPULDSF32Intrin<string clang_builtin> :
[IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>] [IntrArgMemOnly, NoCapture<0>, ImmArg<2>, ImmArg<3>, ImmArg<4>]
>; >;
// FIXME: The m0 argument should be moved after the normal arguments
class AMDGPUDSOrderedIntrinsic : Intrinsic< class AMDGPUDSOrderedIntrinsic : Intrinsic<
[llvm_i32_ty], [llvm_i32_ty],
// M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that

View File

@ -222,6 +222,20 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects(
const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } }; const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
return addMappingFromTable<2>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table)); return addMappingFromTable<2>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
} }
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap: {
// VGPR = M0, VGPR
static const OpRegBankEntry<3> Table[2] = {
// Perfectly legal.
{ { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
// Need a readfirstlane for m0
{ { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
};
const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
}
default: default:
return RegisterBankInfo::getInstrAlternativeMappings(MI); return RegisterBankInfo::getInstrAlternativeMappings(MI);
} }
@ -1042,6 +1056,14 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
executeInWaterfallLoop(MI, MRI, { 2 }); executeInWaterfallLoop(MI, MRI, { 2 });
return; return;
} }
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap: {
// This is only allowed to execute with 1 lane, so readfirstlane is safe.
assert(empty(OpdMapper.getVRegs(0)));
substituteSimpleCopyRegs(OpdMapper, 3);
constrainOpWithReadfirstlane(MI, MRI, 2); // M0
return;
}
default: default:
break; break;
} }
@ -1741,8 +1763,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_atomic_dec: case Intrinsic::amdgcn_atomic_dec:
return getDefaultMappingAllVGPR(MI); return getDefaultMappingAllVGPR(MI);
case Intrinsic::amdgcn_ds_ordered_add: case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap: case Intrinsic::amdgcn_ds_ordered_swap: {
return getInvalidInstructionMapping(); unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
unsigned M0Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
AMDGPU::SGPRRegBankID);
OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
break;
}
case Intrinsic::amdgcn_exp_compr: case Intrinsic::amdgcn_exp_compr:
OpdsMapping[0] = nullptr; // IntrinsicID OpdsMapping[0] = nullptr; // IntrinsicID
// FIXME: These are immediate values which can't be read from registers. // FIXME: These are immediate values which can't be read from registers.

View File

@ -0,0 +1,71 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
---
name: ds_ordered_add_ss
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1
; CHECK-LABEL: name: ds_ordered_add_ss
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[COPY]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0
...
---
name: ds_ordered_add_vs
legalized: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0
; CHECK-LABEL: name: ds_ordered_add_vs
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[V_READFIRSTLANE_B32_]], [[COPY2]](s32), 0, 0, 0, 0, 0, 0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0
...
---
name: ds_ordered_add_vv
legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; CHECK-LABEL: name: ds_ordered_add_vv
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[V_READFIRSTLANE_B32_]], [[COPY1]](s32), 0, 0, 0, 0, 0, 0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0
...
---
name: ds_ordered_add_sv
legalized: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0
; CHECK-LABEL: name: ds_ordered_add_sv
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), [[COPY]](s32), [[COPY1]](s32), 0, 0, 0, 0, 0, 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.add), %0, %1, 0, 0, 0, 0, 0, 0
...

View File

@ -0,0 +1,71 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
---
name: ds_ordered_swap_ss
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1
; CHECK-LABEL: name: ds_ordered_swap_ss
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[COPY]](s32), [[COPY2]](s32), 0, 0, 0, 0, 0, 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0
...
---
name: ds_ordered_swap_vs
legalized: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0
; CHECK-LABEL: name: ds_ordered_swap_vs
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[V_READFIRSTLANE_B32_]], [[COPY2]](s32), 0, 0, 0, 0, 0, 0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0
...
---
name: ds_ordered_swap_vv
legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; CHECK-LABEL: name: ds_ordered_swap_vv
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[V_READFIRSTLANE_B32_]], [[COPY1]](s32), 0, 0, 0, 0, 0, 0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0
...
---
name: ds_ordered_swap_sv
legalized: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0
; CHECK-LABEL: name: ds_ordered_swap_sv
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), [[COPY]](s32), [[COPY1]](s32), 0, 0, 0, 0, 0, 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.ordered.swap), %0, %1, 0, 0, 0, 0, 0, 0
...