[AMDGPU][MC][GFX90A] Corrected DS_GWS opcodes

Corrected DS_GWS opcodes to use even aligned registers.

Differential Revision: https://reviews.llvm.org/D103185
This commit is contained in:
Dmitry Preobrazhensky 2021-05-26 21:30:15 +03:00
parent c5c1ec7945
commit 13c6568c6e
5 changed files with 119 additions and 60 deletions

View File

@ -1546,6 +1546,7 @@ private:
bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
bool validateAGPRLdSt(const MCInst &Inst) const;
bool validateVGPRAlign(const MCInst &Inst) const;
bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
bool validateDivScale(const MCInst &Inst);
bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
const SMLoc &IDLoc);
@ -4108,6 +4109,34 @@ bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
return true;
}
// gfx90a has an undocumented limitation:
// DS_GWS opcodes must use even aligned registers.
bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
const OperandVector &Operands) {
if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
return true;
int Opc = Inst.getOpcode();
if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
return true;
const MCRegisterInfo *MRI = getMRI();
const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
int Data0Pos =
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
assert(Data0Pos != -1);
auto Reg = Inst.getOperand(Data0Pos).getReg();
auto RegIdx = Reg - (VGRP32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
if (RegIdx & 1) {
SMLoc RegLoc = getRegLoc(Reg, Operands);
Error(RegLoc, "vgpr must be even aligned");
return false;
}
return true;
}
bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
const OperandVector &Operands,
const SMLoc &IDLoc) {
@ -4251,6 +4280,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"invalid register class: vgpr tuples must be 64 bit aligned");
return false;
}
if (!validateGWS(Inst, Operands)) {
return false;
}
if (!validateDivScale(Inst)) {
Error(IDLoc, "ABS not allowed in VOP3B instructions");

View File

@ -260,3 +260,21 @@ v_ashrrev_i16 v0, lds_direct, v0
v_add_f32 v5, v1, lds_direct
// GFX90A: error: lds_direct is not supported on this GPU
ds_gws_init a1 offset:65535 gds
// GFX90A: error: vgpr must be even aligned
ds_gws_init a255 offset:65535 gds
// GFX90A: error: vgpr must be even aligned
ds_gws_sema_br v1 offset:65535 gds
// GFX90A: error: vgpr must be even aligned
ds_gws_sema_br v255 offset:65535 gds
// GFX90A: error: vgpr must be even aligned
ds_gws_barrier a3 offset:4 gds
// GFX90A: error: vgpr must be even aligned
ds_gws_barrier a255 offset:4 gds
// GFX90A: error: vgpr must be even aligned

View File

@ -0,0 +1,9 @@
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck %s --implicit-check-not=error: --strict-whitespace
//==============================================================================
// vgpr must be even aligned
ds_gws_init a1 offset:65535 gds
// CHECK: error: vgpr must be even aligned
// CHECK-NEXT:{{^}}ds_gws_init a1 offset:65535 gds
// CHECK-NEXT:{{^}} ^

View File

@ -10225,65 +10225,65 @@ ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:4
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:65535 gds
// GFX90A: ds_gws_init a1 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x01,0x00,0x00,0x00]
// GFX90A: ds_gws_init a0 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x00,0x00,0x00,0x00]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_gws_init a1 offset:65535 gds
ds_gws_init a0 offset:65535 gds
// GFX90A: ds_gws_init a255 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xff,0x00,0x00,0x00]
// GFX90A: ds_gws_init a254 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xfe,0x00,0x00,0x00]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_gws_init a255 offset:65535 gds
ds_gws_init a254 offset:65535 gds
// GFX90A: ds_gws_init a1 gds ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
// GFX90A: ds_gws_init a2 gds ; encoding: [0x00,0x00,0x33,0xdb,0x02,0x00,0x00,0x00]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_gws_init a1 gds
ds_gws_init a2 gds
// GFX90A: ds_gws_init a1 gds ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
// GFX90A: ds_gws_init a0 gds ; encoding: [0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_gws_init a1 gds
ds_gws_init a0 gds
// GFX90A: ds_gws_init a1 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
// GFX90A: ds_gws_init a0 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x00,0x00,0x00,0x00]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_gws_init a1 offset:4 gds
ds_gws_init a0 offset:4 gds
// GFX90A: ds_gws_sema_br a1 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x01,0x00,0x00,0x00]
// GFX90A: ds_gws_sema_br a2 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x02,0x00,0x00,0x00]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_gws_sema_br a1 offset:65535 gds
ds_gws_sema_br a2 offset:65535 gds
// GFX90A: ds_gws_sema_br a255 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xff,0x00,0x00,0x00]
// GFX90A: ds_gws_sema_br a254 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xfe,0x00,0x00,0x00]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_gws_sema_br a255 offset:65535 gds
ds_gws_sema_br a254 offset:65535 gds
// GFX90A: ds_gws_sema_br a1 gds ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
// GFX90A: ds_gws_sema_br a0 gds ; encoding: [0x00,0x00,0x37,0xdb,0x00,0x00,0x00,0x00]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_gws_sema_br a1 gds
ds_gws_sema_br a0 gds
// GFX90A: ds_gws_sema_br a1 gds ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
// GFX90A: ds_gws_sema_br a2 gds ; encoding: [0x00,0x00,0x37,0xdb,0x02,0x00,0x00,0x00]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_gws_sema_br a1 gds
ds_gws_sema_br a2 gds
// GFX90A: ds_gws_sema_br a1 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
// GFX90A: ds_gws_sema_br a0 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x00,0x00,0x00,0x00]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_gws_sema_br a1 offset:4 gds
ds_gws_sema_br a0 offset:4 gds
// GFX90A: ds_gws_barrier a1 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x01,0x00,0x00,0x00]
// GFX90A: ds_gws_barrier a2 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x02,0x00,0x00,0x00]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_gws_barrier a1 offset:65535 gds
ds_gws_barrier a2 offset:65535 gds
// GFX90A: ds_gws_barrier a255 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xff,0x00,0x00,0x00]
// GFX90A: ds_gws_barrier a254 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xfe,0x00,0x00,0x00]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_gws_barrier a255 offset:65535 gds
ds_gws_barrier a254 offset:65535 gds
// GFX90A: ds_gws_barrier a1 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
// GFX90A: ds_gws_barrier a0 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_gws_barrier a1 gds
ds_gws_barrier a0 gds
// GFX90A: ds_gws_barrier a1 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
// GFX90A: ds_gws_barrier a2 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_gws_barrier a1 gds
ds_gws_barrier a2 gds
// GFX90A: ds_gws_barrier a1 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
// GFX90A: ds_gws_barrier a0 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
ds_gws_barrier a1 offset:4 gds
ds_gws_barrier a0 offset:4 gds
// GFX90A: ds_consume a5 offset:65535 ; encoding: [0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0x05]
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU

View File

@ -7668,50 +7668,50 @@
# GFX90A: ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:65535 gds ; encoding: [0xff,0xff,0xfd,0xda,0x01,0x02,0x00,0x06]
0xff,0xff,0xfd,0xda,0x01,0x02,0x00,0x06
# GFX90A: ds_gws_init a1 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x01,0x00,0x00,0x00]
0xff,0xff,0x33,0xdb,0x01,0x00,0x00,0x00
# GFX90A: ds_gws_init a2 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x02,0x00,0x00,0x00]
0xff,0xff,0x33,0xdb,0x02,0x00,0x00,0x00
# GFX90A: ds_gws_init a255 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xff,0x00,0x00,0x00]
0xff,0xff,0x33,0xdb,0xff,0x00,0x00,0x00
# GFX90A: ds_gws_init a254 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xfe,0x00,0x00,0x00]
0xff,0xff,0x33,0xdb,0xfe,0x00,0x00,0x00
# GFX90A: ds_gws_init a1 gds ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00
# GFX90A: ds_gws_init a0 gds ; encoding: [0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00]
0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00
# GFX90A: ds_gws_init a1 gds ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00
# GFX90A: ds_gws_init a0 gds ; encoding: [0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00]
0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00
# GFX90A: ds_gws_init a1 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
0x04,0x00,0x33,0xdb,0x01,0x00,0x00,0x00
# GFX90A: ds_gws_init a2 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x02,0x00,0x00,0x00]
0x04,0x00,0x33,0xdb,0x02,0x00,0x00,0x00
# GFX90A: ds_gws_sema_br a1 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x01,0x00,0x00,0x00]
0xff,0xff,0x37,0xdb,0x01,0x00,0x00,0x00
# GFX90A: ds_gws_sema_br a2 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x02,0x00,0x00,0x00]
0xff,0xff,0x37,0xdb,0x02,0x00,0x00,0x00
# GFX90A: ds_gws_sema_br a255 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xff,0x00,0x00,0x00]
0xff,0xff,0x37,0xdb,0xff,0x00,0x00,0x00
# GFX90A: ds_gws_sema_br a254 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xfe,0x00,0x00,0x00]
0xff,0xff,0x37,0xdb,0xfe,0x00,0x00,0x00
# GFX90A: ds_gws_sema_br a1 gds ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00
# GFX90A: ds_gws_sema_br a0 gds ; encoding: [0x00,0x00,0x37,0xdb,0x00,0x00,0x00,0x00]
0x00,0x00,0x37,0xdb,0x00,0x00,0x00,0x00
# GFX90A: ds_gws_sema_br a1 gds ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00
# GFX90A: ds_gws_sema_br a2 gds ; encoding: [0x00,0x00,0x37,0xdb,0x02,0x00,0x00,0x00]
0x00,0x00,0x37,0xdb,0x02,0x00,0x00,0x00
# GFX90A: ds_gws_sema_br a1 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
0x04,0x00,0x37,0xdb,0x01,0x00,0x00,0x00
# GFX90A: ds_gws_sema_br a0 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x00,0x00,0x00,0x00]
0x04,0x00,0x37,0xdb,0x00,0x00,0x00,0x00
# GFX90A: ds_gws_barrier a1 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x01,0x00,0x00,0x00]
0xff,0xff,0x3b,0xdb,0x01,0x00,0x00,0x00
# GFX90A: ds_gws_barrier a2 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x02,0x00,0x00,0x00]
0xff,0xff,0x3b,0xdb,0x02,0x00,0x00,0x00
# GFX90A: ds_gws_barrier a255 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xff,0x00,0x00,0x00]
0xff,0xff,0x3b,0xdb,0xff,0x00,0x00,0x00
# GFX90A: ds_gws_barrier a254 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xfe,0x00,0x00,0x00]
0xff,0xff,0x3b,0xdb,0xfe,0x00,0x00,0x00
# GFX90A: ds_gws_barrier a1 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00
# GFX90A: ds_gws_barrier a0 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00]
0x00,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00
# GFX90A: ds_gws_barrier a1 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00
# GFX90A: ds_gws_barrier a2 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00]
0x00,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00
# GFX90A: ds_gws_barrier a1 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
0x04,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00
# GFX90A: ds_gws_barrier a2 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00]
0x04,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00
# GFX90A: ds_consume a5 offset:65535 ; encoding: [0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0x05]
0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0x05