forked from OSchip/llvm-project
[AMDGPU][MC][GFX90A] Corrected DS_GWS opcodes
Corrected DS_GWS opcodes to use even aligned registers. Differential Revision: https://reviews.llvm.org/D103185
This commit is contained in:
parent
c5c1ec7945
commit
13c6568c6e
|
@ -1546,6 +1546,7 @@ private:
|
|||
bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
|
||||
bool validateAGPRLdSt(const MCInst &Inst) const;
|
||||
bool validateVGPRAlign(const MCInst &Inst) const;
|
||||
bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
|
||||
bool validateDivScale(const MCInst &Inst);
|
||||
bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
|
||||
const SMLoc &IDLoc);
|
||||
|
@ -4108,6 +4109,34 @@ bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
|
|||
return true;
|
||||
}
|
||||
|
||||
// gfx90a has an undocumented limitation:
|
||||
// DS_GWS opcodes must use even aligned registers.
|
||||
bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
|
||||
const OperandVector &Operands) {
|
||||
if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
|
||||
return true;
|
||||
|
||||
int Opc = Inst.getOpcode();
|
||||
if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
|
||||
Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
|
||||
return true;
|
||||
|
||||
const MCRegisterInfo *MRI = getMRI();
|
||||
const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
|
||||
int Data0Pos =
|
||||
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
|
||||
assert(Data0Pos != -1);
|
||||
auto Reg = Inst.getOperand(Data0Pos).getReg();
|
||||
auto RegIdx = Reg - (VGRP32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
|
||||
if (RegIdx & 1) {
|
||||
SMLoc RegLoc = getRegLoc(Reg, Operands);
|
||||
Error(RegLoc, "vgpr must be even aligned");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
|
||||
const OperandVector &Operands,
|
||||
const SMLoc &IDLoc) {
|
||||
|
@ -4251,6 +4280,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
|
|||
"invalid register class: vgpr tuples must be 64 bit aligned");
|
||||
return false;
|
||||
}
|
||||
if (!validateGWS(Inst, Operands)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!validateDivScale(Inst)) {
|
||||
Error(IDLoc, "ABS not allowed in VOP3B instructions");
|
||||
|
|
|
@ -260,3 +260,21 @@ v_ashrrev_i16 v0, lds_direct, v0
|
|||
|
||||
v_add_f32 v5, v1, lds_direct
|
||||
// GFX90A: error: lds_direct is not supported on this GPU
|
||||
|
||||
ds_gws_init a1 offset:65535 gds
|
||||
// GFX90A: error: vgpr must be even aligned
|
||||
|
||||
ds_gws_init a255 offset:65535 gds
|
||||
// GFX90A: error: vgpr must be even aligned
|
||||
|
||||
ds_gws_sema_br v1 offset:65535 gds
|
||||
// GFX90A: error: vgpr must be even aligned
|
||||
|
||||
ds_gws_sema_br v255 offset:65535 gds
|
||||
// GFX90A: error: vgpr must be even aligned
|
||||
|
||||
ds_gws_barrier a3 offset:4 gds
|
||||
// GFX90A: error: vgpr must be even aligned
|
||||
|
||||
ds_gws_barrier a255 offset:4 gds
|
||||
// GFX90A: error: vgpr must be even aligned
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck %s --implicit-check-not=error: --strict-whitespace
|
||||
|
||||
//==============================================================================
|
||||
// vgpr must be even aligned
|
||||
|
||||
ds_gws_init a1 offset:65535 gds
|
||||
// CHECK: error: vgpr must be even aligned
|
||||
// CHECK-NEXT:{{^}}ds_gws_init a1 offset:65535 gds
|
||||
// CHECK-NEXT:{{^}} ^
|
|
@ -10225,65 +10225,65 @@ ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:4
|
|||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:65535 gds
|
||||
|
||||
// GFX90A: ds_gws_init a1 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x01,0x00,0x00,0x00]
|
||||
// GFX90A: ds_gws_init a0 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x00,0x00,0x00,0x00]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_gws_init a1 offset:65535 gds
|
||||
ds_gws_init a0 offset:65535 gds
|
||||
|
||||
// GFX90A: ds_gws_init a255 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xff,0x00,0x00,0x00]
|
||||
// GFX90A: ds_gws_init a254 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xfe,0x00,0x00,0x00]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_gws_init a255 offset:65535 gds
|
||||
ds_gws_init a254 offset:65535 gds
|
||||
|
||||
// GFX90A: ds_gws_init a1 gds ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
|
||||
// GFX90A: ds_gws_init a2 gds ; encoding: [0x00,0x00,0x33,0xdb,0x02,0x00,0x00,0x00]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_gws_init a1 gds
|
||||
ds_gws_init a2 gds
|
||||
|
||||
// GFX90A: ds_gws_init a1 gds ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
|
||||
// GFX90A: ds_gws_init a0 gds ; encoding: [0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_gws_init a1 gds
|
||||
ds_gws_init a0 gds
|
||||
|
||||
// GFX90A: ds_gws_init a1 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
|
||||
// GFX90A: ds_gws_init a0 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x00,0x00,0x00,0x00]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_gws_init a1 offset:4 gds
|
||||
ds_gws_init a0 offset:4 gds
|
||||
|
||||
// GFX90A: ds_gws_sema_br a1 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x01,0x00,0x00,0x00]
|
||||
// GFX90A: ds_gws_sema_br a2 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x02,0x00,0x00,0x00]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_gws_sema_br a1 offset:65535 gds
|
||||
ds_gws_sema_br a2 offset:65535 gds
|
||||
|
||||
// GFX90A: ds_gws_sema_br a255 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xff,0x00,0x00,0x00]
|
||||
// GFX90A: ds_gws_sema_br a254 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xfe,0x00,0x00,0x00]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_gws_sema_br a255 offset:65535 gds
|
||||
ds_gws_sema_br a254 offset:65535 gds
|
||||
|
||||
// GFX90A: ds_gws_sema_br a1 gds ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
|
||||
// GFX90A: ds_gws_sema_br a0 gds ; encoding: [0x00,0x00,0x37,0xdb,0x00,0x00,0x00,0x00]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_gws_sema_br a1 gds
|
||||
ds_gws_sema_br a0 gds
|
||||
|
||||
// GFX90A: ds_gws_sema_br a1 gds ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
|
||||
// GFX90A: ds_gws_sema_br a2 gds ; encoding: [0x00,0x00,0x37,0xdb,0x02,0x00,0x00,0x00]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_gws_sema_br a1 gds
|
||||
ds_gws_sema_br a2 gds
|
||||
|
||||
// GFX90A: ds_gws_sema_br a1 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
|
||||
// GFX90A: ds_gws_sema_br a0 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x00,0x00,0x00,0x00]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_gws_sema_br a1 offset:4 gds
|
||||
ds_gws_sema_br a0 offset:4 gds
|
||||
|
||||
// GFX90A: ds_gws_barrier a1 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x01,0x00,0x00,0x00]
|
||||
// GFX90A: ds_gws_barrier a2 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x02,0x00,0x00,0x00]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_gws_barrier a1 offset:65535 gds
|
||||
ds_gws_barrier a2 offset:65535 gds
|
||||
|
||||
// GFX90A: ds_gws_barrier a255 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xff,0x00,0x00,0x00]
|
||||
// GFX90A: ds_gws_barrier a254 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xfe,0x00,0x00,0x00]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_gws_barrier a255 offset:65535 gds
|
||||
ds_gws_barrier a254 offset:65535 gds
|
||||
|
||||
// GFX90A: ds_gws_barrier a1 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
|
||||
// GFX90A: ds_gws_barrier a0 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_gws_barrier a1 gds
|
||||
ds_gws_barrier a0 gds
|
||||
|
||||
// GFX90A: ds_gws_barrier a1 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
|
||||
// GFX90A: ds_gws_barrier a2 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_gws_barrier a1 gds
|
||||
ds_gws_barrier a2 gds
|
||||
|
||||
// GFX90A: ds_gws_barrier a1 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
|
||||
// GFX90A: ds_gws_barrier a0 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
ds_gws_barrier a1 offset:4 gds
|
||||
ds_gws_barrier a0 offset:4 gds
|
||||
|
||||
// GFX90A: ds_consume a5 offset:65535 ; encoding: [0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0x05]
|
||||
// NOT-GFX90A: error: invalid register class: agpr loads and stores not supported on this GPU
|
||||
|
|
|
@ -7668,50 +7668,50 @@
|
|||
# GFX90A: ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:65535 gds ; encoding: [0xff,0xff,0xfd,0xda,0x01,0x02,0x00,0x06]
|
||||
0xff,0xff,0xfd,0xda,0x01,0x02,0x00,0x06
|
||||
|
||||
# GFX90A: ds_gws_init a1 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x01,0x00,0x00,0x00]
|
||||
0xff,0xff,0x33,0xdb,0x01,0x00,0x00,0x00
|
||||
# GFX90A: ds_gws_init a2 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x02,0x00,0x00,0x00]
|
||||
0xff,0xff,0x33,0xdb,0x02,0x00,0x00,0x00
|
||||
|
||||
# GFX90A: ds_gws_init a255 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xff,0x00,0x00,0x00]
|
||||
0xff,0xff,0x33,0xdb,0xff,0x00,0x00,0x00
|
||||
# GFX90A: ds_gws_init a254 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xfe,0x00,0x00,0x00]
|
||||
0xff,0xff,0x33,0xdb,0xfe,0x00,0x00,0x00
|
||||
|
||||
# GFX90A: ds_gws_init a1 gds ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
|
||||
0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00
|
||||
# GFX90A: ds_gws_init a0 gds ; encoding: [0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00]
|
||||
0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00
|
||||
|
||||
# GFX90A: ds_gws_init a1 gds ; encoding: [0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
|
||||
0x00,0x00,0x33,0xdb,0x01,0x00,0x00,0x00
|
||||
# GFX90A: ds_gws_init a0 gds ; encoding: [0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00]
|
||||
0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00
|
||||
|
||||
# GFX90A: ds_gws_init a1 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x01,0x00,0x00,0x00]
|
||||
0x04,0x00,0x33,0xdb,0x01,0x00,0x00,0x00
|
||||
# GFX90A: ds_gws_init a2 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x02,0x00,0x00,0x00]
|
||||
0x04,0x00,0x33,0xdb,0x02,0x00,0x00,0x00
|
||||
|
||||
# GFX90A: ds_gws_sema_br a1 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x01,0x00,0x00,0x00]
|
||||
0xff,0xff,0x37,0xdb,0x01,0x00,0x00,0x00
|
||||
# GFX90A: ds_gws_sema_br a2 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x02,0x00,0x00,0x00]
|
||||
0xff,0xff,0x37,0xdb,0x02,0x00,0x00,0x00
|
||||
|
||||
# GFX90A: ds_gws_sema_br a255 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xff,0x00,0x00,0x00]
|
||||
0xff,0xff,0x37,0xdb,0xff,0x00,0x00,0x00
|
||||
# GFX90A: ds_gws_sema_br a254 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xfe,0x00,0x00,0x00]
|
||||
0xff,0xff,0x37,0xdb,0xfe,0x00,0x00,0x00
|
||||
|
||||
# GFX90A: ds_gws_sema_br a1 gds ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
|
||||
0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00
|
||||
# GFX90A: ds_gws_sema_br a0 gds ; encoding: [0x00,0x00,0x37,0xdb,0x00,0x00,0x00,0x00]
|
||||
0x00,0x00,0x37,0xdb,0x00,0x00,0x00,0x00
|
||||
|
||||
# GFX90A: ds_gws_sema_br a1 gds ; encoding: [0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
|
||||
0x00,0x00,0x37,0xdb,0x01,0x00,0x00,0x00
|
||||
# GFX90A: ds_gws_sema_br a2 gds ; encoding: [0x00,0x00,0x37,0xdb,0x02,0x00,0x00,0x00]
|
||||
0x00,0x00,0x37,0xdb,0x02,0x00,0x00,0x00
|
||||
|
||||
# GFX90A: ds_gws_sema_br a1 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x01,0x00,0x00,0x00]
|
||||
0x04,0x00,0x37,0xdb,0x01,0x00,0x00,0x00
|
||||
# GFX90A: ds_gws_sema_br a0 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x00,0x00,0x00,0x00]
|
||||
0x04,0x00,0x37,0xdb,0x00,0x00,0x00,0x00
|
||||
|
||||
# GFX90A: ds_gws_barrier a1 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x01,0x00,0x00,0x00]
|
||||
0xff,0xff,0x3b,0xdb,0x01,0x00,0x00,0x00
|
||||
# GFX90A: ds_gws_barrier a2 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x02,0x00,0x00,0x00]
|
||||
0xff,0xff,0x3b,0xdb,0x02,0x00,0x00,0x00
|
||||
|
||||
# GFX90A: ds_gws_barrier a255 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xff,0x00,0x00,0x00]
|
||||
0xff,0xff,0x3b,0xdb,0xff,0x00,0x00,0x00
|
||||
# GFX90A: ds_gws_barrier a254 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xfe,0x00,0x00,0x00]
|
||||
0xff,0xff,0x3b,0xdb,0xfe,0x00,0x00,0x00
|
||||
|
||||
# GFX90A: ds_gws_barrier a1 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
|
||||
0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00
|
||||
# GFX90A: ds_gws_barrier a0 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00]
|
||||
0x00,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00
|
||||
|
||||
# GFX90A: ds_gws_barrier a1 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
|
||||
0x00,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00
|
||||
# GFX90A: ds_gws_barrier a2 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00]
|
||||
0x00,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00
|
||||
|
||||
# GFX90A: ds_gws_barrier a1 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00]
|
||||
0x04,0x00,0x3b,0xdb,0x01,0x00,0x00,0x00
|
||||
# GFX90A: ds_gws_barrier a2 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00]
|
||||
0x04,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00
|
||||
|
||||
# GFX90A: ds_consume a5 offset:65535 ; encoding: [0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0x05]
|
||||
0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0x05
|
||||
|
|
Loading…
Reference in New Issue